OSX/libsecurity_cryptkit/lib/CurveParamDocs/schoof.c

   1 /* schoof.c
   2
   3    Elliptic curve order calculator, for
   4
   5    y^2 = x^3 + a x + b (mod p)
   6
   7    Compile with:
   8
   9    % cc -O schoof.c giants.c tools.c ellproj.c -lm -o schoof
  10
  11    and run, entering p and the a,b parameters.
  12    Eventually the curve order o is reported, together with
  13    the twist order o'.  The sum of these two orders is always
  14    2p + 2.  A final check is performed to verify that a
  15    random point (x,y,z) enjoys the annihilation
  16    o * (x,y,z) = point-at-infinity.  This is not absolutely
  17    definitive, rather it is a necessary condition on the oder o
  18    (i.e. it is a sanity check of sorts).
  19
  20  * Change history:
  21
  22      18 Nov 99   REC  fixed M. Scott bug (MAX_DIGS bumped by 2)
  23       5 Jul 99   REC  Installed improved (base-4) power ladder
  24       5 Jul 99   REC  Made use of binary segmentation square (per se)
  25       5 Jul 99   REC  Improved memory usage
  26       2 May 99   REC  Added initial primality check
  27      30 Apr 99   REC  Added actual order-annihilation test
  28      29 Apr 99   REC  Improvements due to A. Powell
  29       2 Feb 99   REC  Added explicit CRT result
  30      12 Jan 99   REC  Removed (hopefully) last of memory crashes
  31      20 Jan 98   REC  Creation
  32
  33  *      c. 1998 Perfectly Scientific, Inc.
  34  *      All Rights Reserved.
  35  *
  36  *
  37  *************************************************************/
  38
  39 #include <stdio.h>
  40 #include <math.h>
  41 #include <stdlib.h>
  42 #include "giants.h"
  43 #include "tools.h"
  44 #include "ellproj.h"
  45
  46 #define P_BREAK 32
  47
  48 #ifdef _WIN32
  49 #include <string.h>
  50 #define bzero(D, n) memset(D, 0x00, n)
  51 #define bcopy(S, D, n) memcpy(D, S, n)
  52 #endif
  53
  54 #define Q_MAX 256       /* Bits in largest primes handled. */
  55 #define L_CEIL 100       /* Bound on Schoof L values (not all needed in general). */
  56
  57
  58 typedef struct
  59          {
  60          int deg;
  61          giant *coe;
  62          } polystruct;
  63 typedef polystruct *poly;
  64
  65 extern int *pr;
  66
  67 static int Q, L_MAX;
  68 static int MAX_DIGS, MAX_COEFFS;
  69
  70 static giant *mcand, coe, tmp, err, aux, aux2, globx, globy, t1, t2,
  71         t3, t4, t5;
  72 static poly qscratch, rscratch, sscratch, tscratch, pbuff,
  73      pbuff2, precip, cubic, powx, powy, kxn, kyn, kxd, kyd,
  74      txn, txd, tyn, tyd, txn1, txd1, tyn1, tyd1,
  75      nx, dx, ny, dy, mn, md, tmp1, tmp2, tmp3, tmp4;
  76 static poly s[L_CEIL+2], smonic;
  77 static giant p, a, b;
  78 static int L;
  79
  80 void quickmodg(giant g, giant x)
  81 {       int sgn = x->sign;
  82
  83         if(sgn == 0) return;
  84         if(sgn > 0) {
  85                 if(gcompg(x, g) >= 0) subg(g, x);
  86                 return;
  87         }
  88         addg(g,x);
  89         return;
  90 }
  91
  92 int
  93 log_2(int n) {
  94         int c = 1, d = -1;
  95         while(c <= n) {
  96                 c <<= 1;
  97                 ++d;
  98         }
  99         return(d);
 100 }
 101
 102 void
 103 justifyp(poly x) {
 104         int j;
 105         for(j = x->deg; j >= 0; j--) {
 106                 if(!is0(x->coe[j])) break;
 107         }
 108         x->deg = (j>0)? j : 0;
 109 }
 110
 111 void
 112 polyrem(poly x) {
 113         int j;
 114    for(j=0; j <= x->deg; j++) {
 115       modg(p, x->coe[j]);
 116    }
 117    justifyp(x);
 118 }
 119
 120
 121 giant *
 122 newa(int n) {
 123         giant *p = (giant *)malloc(n*sizeof(giant));
 124         int j;
 125         for(j=0; j<n; j++) {
 126                 p[j] = newgiant(MAX_DIGS);
 127         }
 128         return(p);
 129 }
 130
 131 poly
 132 newpoly(int coeffs) {
 133         poly pol;
 134         pol = (poly) malloc(sizeof(polystruct));
 135         pol->coe = (giant *)newa(coeffs);
 136         return(pol);
 137 }
 138
 139 void
 140 init_all() {
 141    int j;
 142
 143    j = (2*Q + log_2(MAX_COEFFS) + 32 + 15)/16;
 144    j = j * MAX_COEFFS;
 145    globx = newgiant(j);
 146    globy = newgiant(j);
 147    s[0] = newpoly(MAX_COEFFS);
 148
 149    for(j=1; j<=L_MAX+1; j++) {
 150                 s[j] = newpoly(j*(j+1));
 151    }
 152    smonic = newpoly(MAX_COEFFS);
 153    powx = newpoly(MAX_COEFFS);
 154    powy = newpoly(MAX_COEFFS);
 155    kxn = newpoly(MAX_COEFFS);
 156    kxd = newpoly(MAX_COEFFS);
 157    kyn = newpoly(MAX_COEFFS);
 158    kyd = newpoly(MAX_COEFFS);
 159    txn = newpoly(MAX_COEFFS);
 160    txd = newpoly(MAX_COEFFS);
 161    tyn = newpoly(MAX_COEFFS);
 162    tyd = newpoly(MAX_COEFFS);
 163    txn1 = newpoly(MAX_COEFFS);
 164    txd1 = newpoly(MAX_COEFFS);
 165    tyn1 = newpoly(MAX_COEFFS);
 166    tyd1 = newpoly(MAX_COEFFS);
 167    nx = newpoly(MAX_COEFFS);
 168    ny = newpoly(MAX_COEFFS);
 169    dx = newpoly(MAX_COEFFS);
 170    dy = newpoly(MAX_COEFFS);
 171    mn = newpoly(MAX_COEFFS);
 172    md = newpoly(MAX_COEFFS);
 173    tmp1 = newpoly(MAX_COEFFS);
 174    tmp2 = newpoly(MAX_COEFFS);
 175    tmp3 = newpoly(MAX_COEFFS);
 176    tmp4 = newpoly(MAX_COEFFS);
 177    mcand = (giant *)newa(MAX_COEFFS);
 178 /* The next three need extra space for remaindering routines. */
 179         qscratch = newpoly(2*MAX_COEFFS);
 180         rscratch = newpoly(2*MAX_COEFFS);
 181         pbuff = newpoly(2*MAX_COEFFS);
 182         pbuff2 = newpoly(MAX_COEFFS);
 183         sscratch = newpoly(MAX_COEFFS);
 184         tscratch = newpoly(MAX_COEFFS);
 185         tmp = newgiant(MAX_DIGS);
 186         err = newgiant(MAX_DIGS);
 187         coe = newgiant(MAX_DIGS);
 188         aux = newgiant(MAX_DIGS);
 189         aux2 = newgiant(MAX_DIGS);
 190         t3 = newgiant(MAX_DIGS);
 191         t4 = newgiant(MAX_DIGS);
 192         t5 = newgiant(MAX_DIGS);
 193                 cubic = newpoly(4);
 194         precip = newpoly(MAX_COEFFS);
 195 }
 196
 197 void
 198 atoa(giant *a, giant *b, int n) {
 199         int j;
 200         for(j=0; j<n; j++) gtog(a[j], b[j]);
 201 }
 202
 203 void
 204 ptop(poly x, poly y)
 205 /* y := x. */
 206 {
 207         y->deg = x->deg;
 208         atoa(x->coe, y->coe, y->deg+1);
 209 }
 210
 211 void
 212 negp(poly y)
 213 /* y := -y. */
 214 {       int j;
 215         for(j=0; j <= y->deg; j++) {
 216                 negg(y->coe[j]);
 217                 quickmodg(p, y->coe[j]);
 218         }
 219 }
 220
 221 int
 222 iszer(giant a) {
 223
 224   if(a->sign == 0) return(1);
 225   return(0);
 226
 227 }
 228
 229 int
 230 iszerop(poly x) {
 231    if(x->deg == 0 && iszer(x->coe[0])) return 1;
 232    return 0;
 233 }
 234
 235 int
 236 is0(giant a) {
 237         return(iszer(a));
 238 }
 239
 240 int
 241 is1(giant a) {
 242         return(isone(a));
 243 }
 244
 245
 246 void
 247 addp(poly x, poly y)
 248 /* y += x. */
 249 {
 250         int d = x->deg, j;
 251
 252         if(y->deg > d) d = y->deg;
 253         for(j = 0; j <= d; j++) {
 254                 if((j <= x->deg) && (j <= y->deg)) {
 255                         addg(x->coe[j], y->coe[j]);
 256                         quickmodg(p, y->coe[j]);
 257                         continue;
 258                 }
 259                 if((j <= x->deg) && (j > y->deg)) {
 260                         gtog(x->coe[j], y->coe[j]);
 261                         quickmodg(p, y->coe[j]);
 262                         continue;
 263                 }
 264         }
 265         y->deg = d;
 266         justifyp(y);
 267 }
 268
 269 void
 270 subp(poly x, poly y)
 271 /* y -= x. */
 272 {
 273         int d = x->deg, j;
 274
 275         if(y->deg > d) d = y->deg;
 276         for(j = 0; j <= d; j++) {
 277                 if((j <= x->deg) && (j <= y->deg)) {
 278                         subg(x->coe[j], y->coe[j]);
 279                         quickmodg(p, y->coe[j]);
 280                         continue;
 281                 }
 282                 if((j <= x->deg) && (j > y->deg)) {
 283                         gtog(x->coe[j], y->coe[j]);
 284                         negg(y->coe[j]);
 285                         quickmodg(p, y->coe[j]);
 286                         continue;
 287                 }
 288         }
 289         y->deg = d;
 290         justifyp(y);
 291 }
 292
 293
 294 void
 295 grammarmulp(poly a, poly b)
 296 /* b *= a. */
 297 {
 298         int dega = a->deg, degb = b->deg, deg = dega + degb;
 299         register int d, kk, first, diffa;
 300
 301         for(d=deg; d>=0; d--) {
 302                 diffa = d-dega;
 303                 itog(0, coe);
 304                 for(kk=0; kk<=d; kk++) {
 305                         if((kk>degb)||(kk<diffa)) continue;
 306                         gtog(b->coe[kk], tmp);
 307                         mulg(a->coe[d-kk], tmp);
 308                         modg(p, tmp);
 309                         addg(tmp, coe);
 310                         quickmodg(p, coe);
 311                 }
 312                 gtog(coe, mcand[d]);
 313         }
 314         atoa(mcand, b->coe, deg+1);
 315         b->deg = deg;
 316         justifyp(b);
 317 }
 318
 319 void
 320 atop(giant *a, poly x, int deg)
 321 /* Copy array to poly, with monic option. */
 322 {
 323         int adeg = abs(deg);
 324         x->deg = adeg;
 325         atoa(a, x->coe, adeg);
 326         if(deg < 0) {
 327            itog(1, x->coe[adeg]);
 328         } else {
 329            gtog(a[adeg], x->coe[adeg]);
 330         }
 331 }
 332
 333 void
 334 just(giant g) {
 335    while((g->n[g->sign-1] == 0) && (g->sign > 0)) --g->sign;
 336 }
 337
 338 void
 339 unstuff_partial(giant g, poly y, int words){
 340         int j;
 341         for(j=0; j < y->deg; j++) {
 342                 bcopy((g->n) + j*words, y->coe[j]->n, words*sizeof(short));
 343       y->coe[j]->sign = words;
 344       just(y->coe[j]);
 345    }
 346 }
 347
 348 void
 349 stuff(poly x, giant g, int words) {
 350         int deg = x->deg, j, coedigs;
 351
 352    g->sign = words*(1 + deg);
 353    for(j=0; j <= deg; j++) {
 354                 coedigs = (x->coe[j])->sign;
 355                 bcopy(x->coe[j]->n, (g->n) + j*words, coedigs*sizeof(short));
 356       bzero((g->n) + (j*words+coedigs),
 357                                 sizeof(short)*(words-coedigs));
 358         }
 359    just(g);
 360 }
 361
 362 int maxwords = 0;
 363 void
 364
 365 binarysegmul(poly x, poly y) {
 366         int bits = bitlen(p), xwords, ywords, words;
 367
 368         xwords = (2*bits + log_2(x->deg+1) + 32 + 15)/16;
 369    ywords = (2*bits + log_2(y->deg+1) + 32 + 15)/16;
 370    if(xwords > ywords) words = xwords; else words = ywords;
 371    stuff(x, globx, words);
 372    stuff(y, globy, words);
 373    mulg(globx, globy);
 374    gtog(y->coe[y->deg], globx);  /* Save high coeff. */
 375    y->deg += x->deg;
 376    gtog(globx, y->coe[y->deg]);  /* Move high coeff. */
 377    unstuff_partial(globy, y, words);
 378    mulg(x->coe[x->deg], y->coe[y->deg]); /* resolve high coeff. */
 379    justifyp(y);
 380 }
 381
 382 binarysegsquare(poly y) {
 383         int bits = bitlen(p), words;
 384       words = (2*bits + log_2(y->deg+1) + 32 + 15)/16;
 385    stuff(y, globy, words);
 386    squareg(globy);
 387    gtog(y->coe[y->deg], globx);  /* Save high coeff. */
 388    y->deg += y->deg;
 389    gtog(globx, y->coe[y->deg]);  /* Move high coeff. */
 390    unstuff_partial(globy, y, words);
 391    mulg(y->coe[y->deg], y->coe[y->deg]); /* resolve high coeff. */
 392    justifyp(y);
 393 }
 394
 395 void
 396 assess(poly x, poly y){
 397         int max = 0, j;
 398    for(j=0; j <= x->deg; j++) {
 399                 if(bitlen(x->coe[j]) > max) max = bitlen(x->coe[j]);
 400    }
 401    max = 0;
 402    for(j=0; j <= y->deg; j++) {
 403                 if(bitlen(y->coe[j]) > max) max = bitlen(y->coe[j]);
 404    }
 405 }
 406
 407 int
 408 pcompp(poly x, poly y) {
 409     int j;
 410     if(x->deg != y->deg) return 1;
 411     for(j=0; j <= x->deg; j++) {
 412                         if(gcompg(x->coe[j], y->coe[j])) return 1;
 413     }
 414     return 0;
 415 }
 416
 417 /*
 418 int max_deg = 0;
 419 */
 420
 421 void
 422 mulp(poly x, poly y)
 423 /*  y *= x. */
 424 {
 425         int n, degx = x->deg, degy = y->deg;
 426
 427 /*
 428 if(degx > max_deg) {
 429         max_deg = degx; printf("xdeg: %d\n", degx);
 430 }
 431
 432 if(degy > max_deg) {
 433         max_deg = degy; printf("ydeg: %d\n", degy);
 434 }
 435 */
 436         if((degx < P_BREAK) || (degy < P_BREAK)) {
 437                 grammarmulp(x,y);
 438                 justifyp(y);
 439                 return;
 440         }
 441    if(x==y) binarysegsquare(y);
 442         else binarysegmul(x, y);
 443 }
 444
 445 void
 446 revp(poly x)
 447 /* Reverse the coefficients of x. */
 448 {       int j, deg = x->deg;
 449
 450         for(j=0; j <= deg/2; j++) {
 451                 gtog(x->coe[deg-j], tmp);
 452                 gtog(x->coe[j], x->coe[deg-j]);
 453                 gtog(tmp, x->coe[j]);
 454         }
 455    justifyp(x);
 456 }
 457
 458 void
 459 recipp(poly f, int deg)
 460 /* f := 1/f, via newton method.  */
 461 {
 462         int lim = deg + 1, prec = 1;
 463
 464         sscratch->deg = 0; itog(1, sscratch->coe[0]);
 465         itog(1, aux);
 466         while(prec < lim) {
 467                 prec <<= 1;
 468                 if(prec > lim) prec = lim;
 469                 f->deg = prec-1;
 470                 ptop(sscratch, tscratch);
 471                 mulp(f, tscratch);
 472                                 tscratch->deg = prec-1;
 473                                 polyrem(tscratch);
 474                 subg(aux, tscratch->coe[0]);
 475                 quickmodg(p, tscratch->coe[0]);
 476                 mulp(sscratch, tscratch);
 477                                 tscratch->deg = prec-1;
 478                                 polyrem(tscratch);
 479                 subp(tscratch, sscratch);
 480                 sscratch->deg = prec-1;
 481                                 polyrem(sscratch);
 482         }
 483         justifyp(sscratch);
 484         ptop(sscratch, f);
 485 }
 486
 487 int
 488 left_justifyp(poly x, int start)
 489 /* Left-justify the polynomial, checking from position "start." */
 490 {
 491         int j, shift = 0;
 492
 493         for(j = start; j <= x->deg; j++) {
 494                 if(!is0(x->coe[j])) {
 495                      shift = start;
 496                      break;
 497                 }
 498         }
 499         x->deg -= shift;
 500         for(j=0; j<= x->deg; j++) {
 501                 gtog(x->coe[j+shift], x->coe[j]);
 502         }
 503         return(shift);
 504 }
 505
 506 void
 507 remp(poly x, poly y, int mode)
 508 /* y := x (mod y).
 509   mode = 0 is normal operation,
 510        = 1 jams a fixed reciprocal,
 511        = 2 uses the fixed reciprocal.
 512  */
 513 {
 514         int degx = x->deg, degy = y->deg, d, shift;
 515
 516         if(degy == 0) {
 517                 y->deg = 0;
 518                 itog(0, y->coe[0]);
 519                 return;
 520         }
 521         d = degx - degy;
 522         if(d < 0) {
 523                 ptop(x, y);
 524                 return;
 525         }
 526         revp(x); revp(y);
 527         ptop(y, rscratch);
 528         switch(mode) {
 529            case 0: recipp(rscratch, d);
 530                    break;
 531       case 1: recipp(rscratch, degy); /* degy -1. */
 532                    ptop(rscratch, precip);
 533                    rscratch->deg = d; justifyp(rscratch);
 534                    break;
 535            case 2: ptop(precip, rscratch);
 536                    rscratch->deg = d; justifyp(rscratch);
 537                    break;
 538         }
 539 /* Next, a limited-precision multiply. */
 540         if(d < degx) { x->deg = d; justifyp(x);}
 541         mulp(x, rscratch);
 542         rscratch->deg = d;
 543                 polyrem(rscratch);
 544                 justifyp(rscratch);
 545         x->deg = degx; justifyp(x);
 546         mulp(rscratch, y);
 547         subp(x, y);
 548         negp(y); polyrem(y);
 549         shift = left_justifyp(y, d+1);
 550    for(d = y->deg+1; d <= degx-shift; d++) itog(0, y->coe[d]);
 551         y->deg = degx - shift;
 552         revp(y);
 553         revp(x);
 554 }
 555
 556 fullmod(poly x) {
 557    polyrem(x);
 558    ptop(smonic, s[0]);
 559    remp(x, s[0], 2);
 560    ptop(s[0], x);
 561    polyrem(x);
 562 }
 563
 564 scalarmul(giant s, poly x) {
 565         int j;
 566    for(j=0; j <= x->deg; j++) {
 567                 mulg(s, x->coe[j]);
 568       modg(p, x->coe[j]);
 569    }
 570 }
 571
 572
 573 schain(int el) {
 574    int j;
 575
 576         s[0]->deg = 0;
 577    itog(0, s[0]->coe[0]);
 578
 579         s[1]->deg = 0;
 580    itog(1, s[1]->coe[0]);
 581         s[2]->deg = 0;
 582    itog(2, s[2]->coe[0]);
 583
 584    s[3]->deg = 4;
 585    gtog(a, aux); mulg(a, aux); negg(aux);
 586    gtog(aux, s[3]->coe[0]);
 587    gtog(b, aux); smulg(12, aux);
 588    gtog(aux, s[3]->coe[1]);
 589    gtog(a, aux); smulg(6, aux);
 590    gtog(aux, s[3]->coe[2]);
 591    itog(0, s[3]->coe[3]);
 592    itog(3, s[3]->coe[4]);
 593
 594    s[4]->deg = 6;
 595    gtog(a, aux); mulg(a, aux); mulg(a, aux);
 596    gtog(b, tmp); mulg(b, tmp); smulg(8, tmp); addg(tmp, aux);
 597    negg(aux);
 598    gtog(aux, s[4]->coe[0]);
 599    gtog(b, aux); mulg(a, aux); smulg(4, aux); negg(aux);
 600    gtog(aux, s[4]->coe[1]);
 601    gtog(a, aux); mulg(a, aux); smulg(5, aux); negg(aux);
 602    gtog(aux, s[4]->coe[2]);
 603    gtog(b, aux); smulg(20, aux);
 604    gtog(aux, s[4]->coe[3]);
 605    gtog(a, aux); smulg(5, aux);
 606    gtog(aux, s[4]->coe[4]);
 607    itog(0, s[4]->coe[5]);
 608    itog(1, s[4]->coe[6]);
 609    itog(4, aux);
 610    scalarmul(aux, s[4]);
 611    cubic->deg = 3;
 612    itog(1, cubic->coe[3]);
 613    itog(0, cubic->coe[2]);
 614    gtog(a, cubic->coe[1]);
 615    gtog(b, cubic->coe[0]);
 616    for(j=5; j <= el; j++) {
 617         if(j % 2 == 0) {
 618                                 ptop(s[j/2 + 2], s[j]); mulp(s[j/2-1], s[j]);
 619             polyrem(s[j]); mulp(s[j/2-1], s[j]); polyrem(s[j]);
 620             ptop(s[j/2-2], s[0]); mulp(s[j/2+1], s[0]); polyrem(s[0]);
 621             mulp(s[j/2+1], s[0]); polyrem(s[0]);
 622             subp(s[0], s[j]); mulp(s[j/2], s[j]); polyrem(s[j]);
 623                            gtog(p, aux); iaddg(1, aux); gshiftright(1, aux);
 624                                 scalarmul(aux, s[j]);
 625         } else {
 626             ptop(s[(j-1)/2+2], s[j]);
 627             mulp(s[(j-1)/2], s[j]);
 628 polyrem(s[j]);
 629             mulp(s[(j-1)/2], s[j]);
 630 polyrem(s[j]);
 631             mulp(s[(j-1)/2], s[j]);
 632 polyrem(s[j]);
 633             ptop(s[(j-1)/2-1], s[0]);
 634             mulp(s[(j-1)/2 + 1], s[0]); polyrem(s[0]);
 635             mulp(s[(j-1)/2 + 1], s[0]); polyrem(s[0]);
 636             mulp(s[(j-1)/2 + 1], s[0]); polyrem(s[0]);
 637             if(((j-1)/2) % 2 == 1) {
 638                                         mulp(cubic, s[0]); polyrem(s[0]);
 639                                         mulp(cubic, s[0]); polyrem(s[0]);
 640             } else {
 641                                         mulp(cubic, s[j]); polyrem(s[j]);
 642                                         mulp(cubic, s[j]); polyrem(s[j]);
 643             }
 644 // polyout(s[1]); polyout(s[3]); polyout(s[0]); polyout(s[j]);
 645             subp(s[0], s[j]);
 646             polyrem(s[j]);
 647         }
 648    }
 649 }
 650
 651 init_recip(int el) {
 652    int j;
 653    ptop(s[el], smonic);
 654    if(el == 2) {
 655                 mulp(cubic, smonic); polyrem(smonic);
 656    }
 657    gtog(smonic->coe[smonic->deg], aux); /* High coeff. */
 658    binvg(p, aux);
 659    scalarmul(aux, smonic);  /* s is now monic. */
 660    s[0]->deg = smonic->deg + 1;
 661    for(j=0; j <= s[0]->deg; j++) itog(1, s[0]->coe[j]);
 662    ptop(smonic, pbuff);
 663    remp(s[0], pbuff, 1);  /* Initialize reciprocal of s as precip. */
 664 }
 665
 666 /* void powerpoly(poly x, giant n)
 667 {       int len, pos;
 668         ptop(x, pbuff);
 669         x->deg = 0; itog(1, x->coe[0]);
 670         len = bitlen(n);
 671         pos = 0;
 672         while(1) {
 673                 if(bitval(n, pos++)) {
 674                         mulp(pbuff, x);
 675                         fullmod(x);
 676                 }
 677                 if(pos>=len) break;
 678                 mulp(pbuff, pbuff);
 679                 fullmod(pbuff);
 680         }
 681 }
 682 */
 683
 684 void powerpoly(poly x, giant n)
 685 /* Base-4 window. */
 686 {       int pos, code;
 687         ptop(x, pbuff);  /* x. */
 688         ptop(pbuff, pbuff2);
 689         mulmod(pbuff2, pbuff2); mulmod(pbuff, pbuff2); /* x^3. */
 690         pos = bitlen(n)-2;
 691         while(pos >= 0) {
 692                 mulmod(x, x);
 693                 if(pos==0) {
 694                         if(bitval(n, pos) != 0) {
 695                                 mulmod(pbuff, x);
 696                         }
 697                         break;
 698                 }
 699                 code = (bitval(n, pos) != 0) * 2 + (bitval(n, pos-1) != 0);
 700                 switch(code) {
 701                         case 0: mulmod(x,x); break;
 702                         case 1: mulmod(x,x);
 703                                 mulmod(pbuff, x);
 704                                 break;
 705                         case 2: mulmod(pbuff, x);
 706                                 mulmod(x,x); break;
 707                         case 3: mulmod(x,x); mulmod(pbuff2, x); break;
 708                 }
 709                 pos -= 2;
 710         }
 711 }
 712
 713 mulmod(poly x, poly y) {
 714         mulp(x, y); fullmod(y);
 715 }
 716
 717 elldoublep(poly n1, poly d1, poly m1, poly c1, poly n0, poly d0,
 718                 poly m0, poly c0) {
 719
 720      ptop(n1, mn); mulmod(n1, mn);
 721           ptop(mn, pbuff); addp(mn, mn); addp(pbuff, mn);
 722      fullmod(mn);
 723           ptop(d1, pbuff); mulmod(d1, pbuff);
 724           scalarmul(a, pbuff); addp(pbuff, mn);
 725           fullmod(mn);
 726           mulmod(c1, mn);
 727           ptop(m1, md); addp(md, md);
 728           mulmod(d1, md); mulmod(d1, md); mulmod(cubic, md);
 729
 730      ptop(d1, n0); mulmod(mn, n0); mulmod(mn, n0);
 731           mulmod(cubic, n0);
 732           ptop(n1, pbuff); addp(pbuff, pbuff); fullmod(pbuff);
 733           mulmod(md, pbuff); mulmod(md, pbuff);
 734           subp(pbuff, n0); fullmod(n0);
 735      ptop(md, d0); mulmod(md, d0); mulmod(d1, d0);
 736
 737      ptop(mn, m0); mulmod(c1, m0);
 738           ptop(d0, pbuff); mulmod(n1, pbuff);
 739           ptop(n0, c0); mulmod(d1, c0); subp(c0, pbuff);
 740           fullmod(pbuff);
 741           mulmod(pbuff, m0);
 742           ptop(m1, pbuff); mulmod(md, pbuff);
 743           mulmod(d1, pbuff); mulmod(d0, pbuff);
 744           subp(pbuff, m0); fullmod(m0);
 745
 746      ptop(c1, c0); mulmod(md, c0); mulmod(d1, c0); mulmod(d0, c0);
 747 }
 748
 749 elladdp(poly n1, poly d1, poly m1, poly c1, poly n2, poly d2, poly m2, poly c2, poly n0, poly d0, poly m0, poly c0) {
 750         ptop(m2, mn); mulmod(c1, mn);
 751    ptop(m1, pbuff); mulmod(c2, pbuff);
 752    subp(pbuff, mn); fullmod(mn);
 753    mulmod(d1, mn); mulmod(d2, mn);
 754
 755         ptop(n2, md); mulmod(d1, md);
 756    ptop(n1, pbuff); mulmod(d2, pbuff);
 757    subp(pbuff, md); fullmod(md);
 758    mulmod(c1, md); mulmod(c2, md);
 759
 760    ptop(cubic, n0); mulmod(mn, n0); mulmod(mn, n0);
 761    mulmod(d1, n0); mulmod(d2, n0);
 762    ptop(n1, pbuff); mulmod(d2, pbuff);
 763    ptop(n2, d0); mulmod(d1, d0);
 764    addp(d0, pbuff); mulmod(md, pbuff); mulmod(md, pbuff);
 765    subp(pbuff, n0); fullmod(n0);
 766
 767    ptop(md, d0); mulmod(md, d0); mulmod(d1, d0); mulmod(d2, d0);
 768
 769    ptop(mn, m0); mulmod(c1, m0);
 770    ptop(d0, pbuff); mulmod(n1, pbuff);
 771    ptop(d1, c0); mulmod(n0, c0);
 772    subp(c0, pbuff); fullmod(pbuff);
 773    mulmod(pbuff, m0);
 774    ptop(m1, pbuff); mulmod(md, pbuff);
 775    mulmod(d0, pbuff); mulmod(d1, pbuff);
 776    subp(pbuff, m0); fullmod(m0);
 777
 778    ptop(md, c0); mulmod(c1, c0); mulmod(d0, c0); mulmod(d1, c0);
 779
 780 }
 781
 782 polyout(poly x) {
 783    int j;
 784    for(j=0; j <= x->deg; j++) {printf("%d: ",j); gout(x->coe[j]);}
 785 }
 786
 787 main(int argc, char **argv) {
 788       int j, ct = 0, el, xmatch, ymatch;
 789       int k, t;
 790       int T[L_CEIL], P[L_CEIL], LL[L_CEIL];
 791       giant ss[L_CEIL];
 792       unsigned int ord, ordminus;
 793       point_proj pt, pt2;
 794
 795       p = newgiant(INFINITY);  /* Also sets up internal giants' stacks. */
 796       j = ((Q_MAX+15)/16);
 797       init_tools(2*j);
 798           a = newgiant(j);
 799           b = newgiant(j);
 800
 801 entry:
 802       printf("Give p > 3, a, b on separate lines:\n"); fflush(stdout);
 803       gin(p);  /* Field prime. */
 804       if((Q = bitlen(p)) > Q_MAX) {
 805                 fprintf(stderr, "p too large, larger than %d bits.\n", Q);
 806                 goto entry;
 807           }
 808       if(!prime_probable(p)) {
 809                 fprintf(stderr, "p is not but must be prime.\n", Q);
 810                 goto entry;
 811           }
 812       gin(a); gin(b); /* Curve parameters. */
 813
 814           t1 = newgiant(2*j);
 815           t2 = newgiant(2*j);
 816 /* Next, discriminant test for legitimacy of curve. */
 817         gtog(a, t1); squareg(t1); modg(p, t1); mulg(a, t1); modg(p, t1);
 818     gshiftleft(2, t1);  /* 4 a^3 mod p. */
 819     gtog(b, t2); squareg(t2); modg(p, t2); smulg(27, t2);
 820     addg(t2, t1); modg(p, t1);
 821     if(isZero(t1)) {
 822                 fprintf(stderr, "Discriminant FAILED\n");
 823                 goto entry;
 824     }
 825     printf("Discriminant PASSED\n"); fflush(stdout);
 826
 827 /* Next, find an efficient prime power array such that
 828    Prod[powers] >= 16 p. */
 829
 830  /* Create minimal prime power array such that Prod[powers]^2 > 16p. */
 831
 832     gtog(p, t2); gshiftleft(4, t2);   /* t2 := 16p. */
 833
 834     L_MAX = 3;
 835     while(L_MAX <= L_CEIL-1) {
 836                 for(j=0; j <= L_MAX; j++) LL[j] = 0;
 837                 for(j=2; j <= L_MAX; j++) {
 838                         if(primeq(j)) {
 839                                 LL[j] = 1;
 840                         k = j;
 841                                 while(1) {
 842                                 k *= j;
 843                                         if(k <= L_MAX) {
 844                                                 LL[k] = 1;
 845                                                 LL[k/j] = 0;
 846                                         }
 847                                         else break;
 848                                 }
 849                         }
 850                 }
 851         itog(1, t1);
 852         for(j=2; j <= L_MAX; j++) {
 853                         if(LL[j]) { smulg(j, t1); smulg(j, t1); } /* Building up t1^2. */
 854                 }
 855                 if(gcompg(t1, t2) > 0) break;
 856         ++L_MAX;
 857         }
 858
 859    printf("Initializing for prime powers:\n");
 860    for(j=2; j <= L_MAX; j++) {
 861                 if(LL[j]) printf("%d ", j);
 862    }
 863    printf("\n");
 864    fflush(stdout);
 865
 866
 867    MAX_DIGS = (2 + (Q+15)/8);  /* Size of (squared) coefficients. */
 868    MAX_COEFFS = ((L_MAX+1)*(L_MAX+2));
 869
 870    init_all();
 871    schain(L_MAX+1);
 872
 873 for(L = 2; L <= L_MAX; L++) {
 874       if(!LL[L]) continue;
 875 printf("Resolving Schoof L = %d...\n", L);
 876       P[ct] = L;  /* Stuff another prime power. */
 877       init_recip(L);
 878 // printf("s: "); polyout(s[L]);
 879       gtog(p, aux2);
 880       k = idivg(L, aux2);  /* p (mod L). */
 881
 882 printf("power...\n");
 883       txd->deg = 0; itog(1, txd->coe[0]);
 884       tyd->deg = 0; itog(1, tyd->coe[0]);
 885       txn->deg = 1; itog(0, txn->coe[0]); itog(1, txn->coe[1]);
 886       ptop(txn, kxn);
 887
 888       gtog(p, aux2);
 889       powerpoly(txn, aux2); /* x^p. */
 890 printf("x^p done...\n");
 891       ptop(txn, powx);
 892       powerpoly(powx, aux2);
 893 printf("x^p^2 done...\n");
 894       ptop(cubic, tyn);
 895       gtog(p, aux2); itog(1, aux); subg(aux, aux2);
 896       gshiftright(1, aux2); /* aux2 := (p-1)/2. */
 897       powerpoly(tyn, aux2); /* y^p. */
 898 printf("y^p done...\n");
 899       ptop(tyn, powy); mulp(tyn, powy); fullmod(powy);
 900       mulp(cubic, powy); fullmod(powy);
 901       powerpoly(powy, aux2);
 902       mulp(tyn, powy); fullmod(powy);
 903 printf("Powers done...\n");
 904
 905 // printf("pow" ); polyout(powx); polyout(powy);
 906       ptop(txn, txn1); ptop(txd, txd1);  /* Save t = 1 case. */
 907       ptop(tyn, tyn1); ptop(tyd, tyd1);
 908 /* We now shall test
 909      (powx, y powy) + k(kxn/kxd, y kyn/kyd) = t(txn/txd, y tyn/tyd)
 910  */
 911
 912     if(k==1) { ptop(txd, kxd); ptop(txd, kyd);
 913                               ptop(txd, kyn);
 914     } else {
 915                    ptop(s[k], kxd); mulp(s[k], kxd); fullmod(kxd);
 916      if(k%2==0) { mulp(cubic, kxd); fullmod(kxd); }
 917      mulp(kxd, kxn); fullmod(kxn);
 918      ptop(s[k-1], pbuff); mulp(s[k+1], pbuff); fullmod(pbuff);
 919      if(k%2==1) {mulp(cubic, pbuff); fullmod(pbuff); }
 920      subp(pbuff, kxn); fullmod(kxn);
 921
 922      ptop(s[k+2], kyn); mulp(s[k-1], kyn); fullmod(kyn);
 923           mulp(s[k-1], kyn); fullmod(kyn);
 924      if(k > 2) {
 925                 ptop(s[k-2], pbuff); mulp(s[k+1], pbuff); fullmod(pbuff);
 926           mulp(s[k+1], pbuff); fullmod(pbuff);
 927      subp(pbuff, kyn); fullmod(kyn);
 928      }
 929      ptop(s[k], kyd); mulp(s[k], kyd); fullmod(kyd);
 930           mulp(s[k], kyd); fullmod(kyd);
 931      if(k%2==0) { mulp(cubic, kyd); fullmod(kyd);
 932                         mulp(cubic, kyd); fullmod(kyd);}
 933      itog(4, aux2); scalarmul(aux2, kyd);
 934     }
 935 //printf("kP: "); polyout(kxn); polyout(kxd); polyout(kyn); polyout(kyd);
 936 /* Commence t = 0 check. */
 937 printf("Checking t = %d ...\n", 0);
 938 fflush(stdout);
 939
 940      ptop(powx, pbuff); mulp(kxd, pbuff);
 941      subp(kxn, pbuff);
 942      fullmod(pbuff);
 943
 944      xmatch = ymatch = 0;
 945      if(iszerop(pbuff)) {
 946                  xmatch = 1;
 947          /* Now check y coords. */
 948                  if(L == 2) goto resolve;
 949          ptop(powy, pbuff); mulp(kyd, pbuff);
 950          addp(kyn, pbuff);
 951          fullmod(pbuff);
 952          if(iszerop(pbuff)) {
 953                resolve:
 954                                         printf("%d %d\n", L, 0);
 955                         T[ct++] = 0;
 956                     continue;
 957          } else ymatch = 1;
 958      }
 959 /* Combine pt1 and pt2. */
 960    if((xmatch == 1) && (ymatch == 1))
 961        elldoublep(powx, txd, powy, txd, nx, dx, ny, dy);
 962        else
 963        elladdp(powx, txd, powy, txd, kxn, kxd, kyn, kyd, nx, dx, ny, dy);
 964 /* Now {nx/dx, ny/dy} is (fixed) LHS. */
 965 // printf("add12: "); polyout(nx); polyout(dx); polyout(ny); polyout(dy);
 966 /* Commence t > 0 check. */
 967     for(t=1; t <= L/2; t++) {
 968 printf("Checking t = %d ...\n", t);
 969          if(t > 1) { /* Add (tx1, ty1) to (txn, tyn). */
 970                                 ptop(txn1, pbuff); mulmod(txd, pbuff);
 971             ptop(txn, powx); mulmod(txd1, powx);
 972                                 subp(powx, pbuff); fullmod(pbuff);
 973             if(!iszerop(pbuff))
 974                                  elladdp(txn1, txd1, tyn1, tyd1, txn, txd, tyn, tyd,
 975                                         tmp1, tmp2, tmp3, tmp4);
 976                            else elldoublep(txn, txd, tyn, tyd,
 977                                         tmp1, tmp2, tmp3, tmp4);
 978             ptop(tmp1, txn); ptop(tmp2, txd);
 979                            ptop(tmp3, tyn); ptop(tmp4, tyd);
 980          }
 981 // printf("tQ: "); polyout(txn); polyout(txd); polyout(tyn); polyout(tyd);
 982          /* Next, check {nx/dx, ny/dy} =? {txn/txd, tyn/tyd}. */
 983                         ptop(nx, pbuff); mulmod(txd, pbuff);
 984                    ptop(dx, powx); mulmod(txn, powx);
 985                         subp(powx, pbuff); fullmod(pbuff);
 986                    if(!iszerop(pbuff)) continue;
 987          /* Next, check y. */
 988                 //      printf("y check!\n");
 989                         ptop(ny, pbuff); mulmod(tyd, pbuff);
 990                    ptop(dy, powx); mulmod(tyn, powx);
 991                         subp(powx, pbuff); fullmod(pbuff);
 992                    if(iszerop(pbuff)) {
 993                         printf("%d %d\n", L, t);
 994                                 T[ct++] = t;
 995                    }  else {
 996                         printf("%d %d\n", L, L-t);
 997                                 T[ct++] = L-t;
 998                                 }
 999                                         fflush(stdout);
1000                                 break;
1001    }
1002 }
1003
1004 /* Now, prime powers P[] and CRT residues T[] are intact. */
1005         printf("Prime powers L:\n");
1006         printf("{");
1007     for(j=0; j < ct-1; j++) {
1008                 printf("%d, ", P[j]);
1009     }
1010     printf("%d }\n", P[ct-1]);
1011
1012         printf("Residues t (mod L):\n");
1013         printf("{");
1014     for(j=0; j < ct-1; j++) {
1015                 printf("%d, ", T[j]);
1016     }
1017     printf("%d }\n", T[ct-1]);
1018
1019 /* Mathematica algorithm for order:
1020 plis = {2^5, 3^3, 5^2, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47};
1021 tlis = {1,    26,   4, 2,  4, 11,  6,  5, 19, 22, 10, 16,  7, 22, 11};
1022 prod = Apply[Times, plis];
1023 prlis = prod/plis;
1024 invlis = Table[PowerMod[prlis[[q]], -1, plis[[q]]],{q,1,Length[plis]}];
1025 p = 2^127 - 1;
1026 t = Mod[tlis . (prlis * invlis), prod];
1027 ord = p + 1 - If[t^2 > 4p, t - prod, t]
1028 */
1029
1030   itog(1, t1);
1031   for(j=0; j < ct; j++) {
1032     free(s[j]);  /* Just to clear memory. */
1033         smulg(P[j], t1);
1034   }
1035
1036   for(j=0; j < 2*ct; j++) {
1037            ss[j] = newgiant(MAX_DIGS);
1038   }
1039
1040   for(j=0; j < ct; j++) {
1041         gtog(t1, ss[j]);
1042     itog(P[j], t2);
1043     divg(t2, ss[j]);
1044   }
1045
1046   for(j=0; j < ct; j++) {
1047        gtog(ss[j], ss[j+ct]);
1048        itog(P[j], t2);
1049            invg(t2, ss[j+ct]);
1050   }
1051
1052   itog(0, t4);
1053   for(j=0; j < ct; j++) {
1054         itog(T[j], t5);
1055         mulg(ss[j], t5);
1056         mulg(ss[j+ct], t5);
1057         addg(t5, t4);
1058   }
1059   modg(t1, t4);
1060   gtog(p, t5);
1061   iaddg(1, t5);
1062   gtog(t4, t2);
1063   squareg(t4);
1064   gtog(p, t3); gshiftleft(2, t3);
1065   if(gcompg(t4, t3) > 0) subg(t1, t2);
1066   subg(t2, t5);
1067   printf("Parameters:\n");
1068   printf("p = "); gout(p);
1069   printf("a = "); gout(a);
1070   printf("b = "); gout(b);
1071   printf("Curve order:\n");
1072   printf("o = "); gout(t5); gtog(t5, t3); /* Save order as t3. */
1073   printf("Twist order:\n");
1074   printf("o' = ");
1075   addg(t2, t5);
1076   addg(t2, t5);
1077   gout(t5);
1078 /* Next, verify the order. */
1079   printf("Verifying order o:...\n");
1080   init_ell_proj(MAX_DIGS);
1081   pt = new_point_proj(MAX_DIGS);
1082   pt2 = new_point_proj(MAX_DIGS);
1083   itog(1,t2);
1084   find_point_proj(pt, t2, a, b, p);
1085   printf("A point on the curve y^2 = x^3 + a x + b (mod p) is:\n");
1086   printf("(x,y,z) = {\n"); gout(pt->x); printf(",");
1087   gout(pt->y); printf(","); gout(pt->z);
1088   printf("}\n");
1089   ell_mul_proj(pt, pt2, t3, a, p);
1090   printf("A multiple is:\n");
1091   printf("o * (x,y,z) = {\n");
1092   gout(pt2->x); printf(",");gout(pt2->y); printf(",");gout(pt2->z);
1093   printf("}\n");
1094   if(!isZero(pt2->z)) {
1095         printf("TILT: multiple should be point-at-infinity.\n");
1096         exit(1);
1097   }
1098   printf("VERIFIED: multiple is indeed point-at-infinity.\n");
1099 }