OSX/libsecurity_cryptkit/lib/CurveParamDocs/schoofs.c

   1 /* schoofs.c
   2
   3    Elliptic curve order calculator, for
   4
   5    y^2 = x^3 + a x + b (mod p)
   6
   7    (NOTE:
   8     This version has order sieving, triggering on the
   9     initial b parameter and incrementing same.
  10     Parameter details are described in schoof.c)
  11
  12    Compile with:
  13
  14    % cc -O schoofs.c giants.c tools.c -lm -o schoofs
  15
  16    and run, entering the a,b parameters.
  17
  18  * Change history:
  19
  20      20 Mar 01 (REC) Added binarysegsquare() and base-4 ladder
  21      20 Mar 01 (REC) Bumped MAX_DIGS as in schoof.c
  22       4 Feb 99 (REC) Sieving invoked.
  23       2 Feb 99 (REC) Added explicit CRT result
  24      12 Jan 99 (REC) Removed (hopefully) last of memory crashes
  25      20 Jan 98 (REC) Creation
  26
  27  *      c. 1998 Perfectly Scientific, Inc.
  28  *      All Rights Reserved.
  29  *
  30  *
  31  *************************************************************/
  32
  33 #include <stdio.h>
  34 #include<assert.h>
  35 #include <math.h>
  36 #include"giants.h"
  37 #include "tools.h"
  38
  39 #define P_BREAK 32
  40
  41
  42 #define Q 256 /* See schoof.c for explanation. */
  43 #define L_MAX 100
  44 #define MAX_DIGS (2 + (Q+15)/8)  /* Size of (squared) coefficients. */
  45 #define MAX_COEFFS ((L_MAX+1)*(L_MAX+2))
  46
  47 typedef struct
  48          {
  49          int deg;
  50          giant *coe;
  51          } polystruct;
  52 typedef polystruct *poly;
  53
  54
  55 static giant *mcand, coe, tmp, err, aux, aux2, globx, globy, t1, t2,
  56         t3, t4, t5;
  57 static poly qscratch, rscratch, sscratch, tscratch, pbuff, pbuff2,
  58      precip, cubic, powx, powy, kxn, kyn, kxd, kyd,
  59      txn, txd, tyn, tyd, txn1, txd1, tyn1, tyd1,
  60      nx, dx, ny, dy, mn, md, tmp1, tmp2, tmp3, tmp4;
  61 static poly s[L_MAX+2], smonic;
  62 static giant p, a, b, magcheck;
  63 int L;
  64
  65 void quickmodg(giant g, giant x)
  66 {       int sgn = x->sign;
  67
  68         if(sgn == 0) return;
  69         if(sgn > 0) {
  70                 if(gcompg(x, g) >= 0) subg(g, x);
  71                 return;
  72         }
  73         addg(g,x);
  74         return;
  75 }
  76
  77 int
  78 log_2(int n) {
  79         int c = 1, d = -1;
  80         while(c <= n) {
  81                 c <<= 1;
  82                 ++d;
  83         }
  84         return(d);
  85 }
  86
  87 void
  88 justifyp(poly x) {
  89         int j;
  90         for(j = x->deg; j >= 0; j--) {
  91                 if(!is0(x->coe[j])) break;
  92         }
  93         x->deg = (j>0)? j : 0;
  94 }
  95
  96 void
  97 polyrem(poly x) {
  98         int j;
  99    for(j=0; j <= x->deg; j++) {
 100       modg(p, x->coe[j]);
 101    }
 102    justifyp(x);
 103 }
 104
 105
 106 giant *
 107 newa(int n) {
 108         giant *p = (giant *)malloc(n*sizeof(giant));
 109         int j;
 110         for(j=0; j<n; j++) {
 111                 p[j] = newgiant(MAX_DIGS);
 112         }
 113         return(p);
 114 }
 115
 116 poly
 117 newpoly(int coeffs) {
 118         poly pol;
 119         pol = (poly) malloc(sizeof(polystruct));
 120         pol->coe = (giant *)newa(coeffs);
 121         return(pol);
 122 }
 123
 124 void
 125 init_all() {
 126    int j;
 127
 128    j = (2*Q + log_2(MAX_COEFFS) + 32 + 15)/16;
 129    globx = newgiant(MAX_COEFFS * j);
 130    globy = newgiant(MAX_COEFFS * j);
 131
 132    init_tools(MAX_DIGS);
 133    powx = newpoly(MAX_COEFFS);
 134    powy = newpoly(MAX_COEFFS);
 135    kxn = newpoly(MAX_COEFFS);
 136    kxd = newpoly(MAX_COEFFS);
 137    kyn = newpoly(MAX_COEFFS);
 138    kyd = newpoly(MAX_COEFFS);
 139    txn = newpoly(MAX_COEFFS);
 140    txd = newpoly(MAX_COEFFS);
 141    tyn = newpoly(MAX_COEFFS);
 142    tyd = newpoly(MAX_COEFFS);
 143    txn1 = newpoly(MAX_COEFFS);
 144    txd1 = newpoly(MAX_COEFFS);
 145    tyn1 = newpoly(MAX_COEFFS);
 146    tyd1 = newpoly(MAX_COEFFS);
 147    nx = newpoly(MAX_COEFFS);
 148    ny = newpoly(MAX_COEFFS);
 149    dx = newpoly(MAX_COEFFS);
 150    dy = newpoly(MAX_COEFFS);
 151    mn = newpoly(MAX_COEFFS);
 152    md = newpoly(MAX_COEFFS);
 153    tmp1 = newpoly(MAX_COEFFS);
 154    tmp2 = newpoly(MAX_COEFFS);
 155    tmp3 = newpoly(MAX_COEFFS);
 156    tmp4 = newpoly(MAX_COEFFS);
 157    mcand = (giant *)newa(MAX_COEFFS);
 158
 159    s[0] = newpoly(MAX_COEFFS);
 160
 161    for(j=1; j<=L_MAX+1; j++) {
 162                 s[j] = newpoly(j*(j+1));
 163    }
 164    smonic = newpoly(MAX_COEFFS);
 165 /* The next three need extra space for remaindering routine. */
 166         qscratch = newpoly(2*MAX_COEFFS);
 167         rscratch = newpoly(2*MAX_COEFFS);
 168         pbuff = newpoly(2*MAX_COEFFS);
 169         pbuff2 = newpoly(MAX_COEFFS);
 170         sscratch = newpoly(MAX_COEFFS);
 171         tscratch = newpoly(MAX_COEFFS);
 172         tmp = newgiant(MAX_DIGS);
 173         err = newgiant(MAX_DIGS);
 174         coe = newgiant(MAX_DIGS);
 175         aux = newgiant(MAX_DIGS);
 176         aux2 = newgiant(MAX_DIGS);
 177         t1 = newgiant(MAX_DIGS);
 178         t2 = newgiant(MAX_DIGS);
 179         t3 = newgiant(MAX_DIGS);
 180         t4 = newgiant(MAX_DIGS);
 181         t5 = newgiant(MAX_DIGS);
 182                 cubic = newpoly(4);
 183         p = newgiant(MAX_DIGS);
 184         a = newgiant(MAX_DIGS);
 185         b = newgiant(MAX_DIGS);
 186         magcheck = newgiant(MAX_DIGS);
 187         precip = newpoly(MAX_COEFFS);
 188 }
 189
 190 void
 191 atoa(giant *a, giant *b, int n) {
 192         int j;
 193         for(j=0; j<n; j++) gtog(a[j], b[j]);
 194 }
 195
 196 void
 197 ptop(poly x, poly y)
 198 /* y := x. */
 199 {
 200         y->deg = x->deg;
 201         atoa(x->coe, y->coe, y->deg+1);
 202 }
 203
 204 void
 205 negp(poly y)
 206 /* y := -y. */
 207 {       int j;
 208         for(j=0; j <= y->deg; j++) {
 209                 negg(y->coe[j]);
 210                 quickmodg(p, y->coe[j]);
 211         }
 212 }
 213
 214 int
 215 iszer(giant a) {
 216
 217   if(a->sign == 0) return(1);
 218   return(0);
 219
 220 }
 221
 222 int
 223 iszerop(poly x) {
 224    if(x->deg == 0 && iszer(x->coe[0])) return 1;
 225    return 0;
 226 }
 227
 228 int
 229 is0(giant a) {
 230         return(iszer(a));
 231 }
 232
 233 int
 234 is1(giant a) {
 235         return(isone(a));
 236 }
 237
 238
 239 void
 240 addp(poly x, poly y)
 241 /* y += x. */
 242 {
 243         int d = x->deg, j;
 244
 245         if(y->deg > d) d = y->deg;
 246         for(j = 0; j <= d; j++) {
 247                 if((j <= x->deg) && (j <= y->deg)) {
 248                         addg(x->coe[j], y->coe[j]);
 249                         quickmodg(p, y->coe[j]);
 250                         continue;
 251                 }
 252                 if((j <= x->deg) && (j > y->deg)) {
 253                         gtog(x->coe[j], y->coe[j]);
 254                         quickmodg(p, y->coe[j]);
 255                         continue;
 256                 }
 257         }
 258         y->deg = d;
 259         justifyp(y);
 260 }
 261
 262 void
 263 subp(poly x, poly y)
 264 /* y -= x. */
 265 {
 266         int d = x->deg, j;
 267
 268         if(y->deg > d) d = y->deg;
 269         for(j = 0; j <= d; j++) {
 270                 if((j <= x->deg) && (j <= y->deg)) {
 271                         subg(x->coe[j], y->coe[j]);
 272                         quickmodg(p, y->coe[j]);
 273                         continue;
 274                 }
 275                 if((j <= x->deg) && (j > y->deg)) {
 276                         gtog(x->coe[j], y->coe[j]);
 277                         negg(y->coe[j]);
 278                         quickmodg(p, y->coe[j]);
 279                         continue;
 280                 }
 281         }
 282         y->deg = d;
 283         justifyp(y);
 284 }
 285
 286
 287 void
 288 grammarmulp(poly a, poly b)
 289 /* b *= a. */
 290 {
 291         int dega = a->deg, degb = b->deg, deg = dega + degb;
 292         register int d, kk, first, diffa;
 293
 294         for(d=deg; d>=0; d--) {
 295                 diffa = d-dega;
 296                 itog(0, coe);
 297                 for(kk=0; kk<=d; kk++) {
 298                         if((kk>degb)||(kk<diffa)) continue;
 299                         gtog(b->coe[kk], tmp);
 300                         mulg(a->coe[d-kk], tmp);
 301                         modg(p, tmp);
 302                         addg(tmp, coe);
 303                         quickmodg(p, coe);
 304                 }
 305                 gtog(coe, mcand[d]);
 306         }
 307         atoa(mcand, b->coe, deg+1);
 308         b->deg = deg;
 309         justifyp(b);
 310 }
 311
 312 void
 313 atop(giant *a, poly x, int deg)
 314 /* Copy array to poly, with monic option. */
 315 {
 316         int adeg = abs(deg);
 317         x->deg = adeg;
 318         atoa(a, x->coe, adeg);
 319         if(deg < 0) {
 320            itog(1, x->coe[adeg]);
 321         } else {
 322            gtog(a[adeg], x->coe[adeg]);
 323         }
 324 }
 325
 326 void
 327 just(giant g) {
 328    while((g->n[g->sign-1] == 0) && (g->sign > 0)) --g->sign;
 329 }
 330
 331 void
 332 unstuff_partial(giant g, poly y, int words){
 333         int j;
 334         for(j=0; j < y->deg; j++) {
 335                 bcopy((g->n) + j*words, y->coe[j]->n, words*sizeof(short));
 336       y->coe[j]->sign = words;
 337       just(y->coe[j]);
 338    }
 339 }
 340
 341 void
 342 stuff(poly x, giant g, int words) {
 343         int deg = x->deg, j, coedigs;
 344
 345    g->sign = words*(1 + deg);
 346    for(j=0; j <= deg; j++) {
 347                 coedigs = (x->coe[j])->sign;
 348                 bcopy(x->coe[j]->n, (g->n) + j*words, coedigs*sizeof(short));
 349       bzero((g->n) + (j*words+coedigs),
 350                                 sizeof(short)*(words-coedigs));
 351         }
 352    just(g);
 353 }
 354
 355 int maxwords = 0;
 356 void
 357
 358 binarysegmul(poly x, poly y) {
 359         int bits = bitlen(p), xwords, ywords, words;
 360
 361         xwords = (2*bits + log_2(x->deg+1) + 32 + 15)/16;
 362    ywords = (2*bits + log_2(y->deg+1) + 32 + 15)/16;
 363    if(xwords > ywords) words = xwords; else words = ywords;
 364 if(words > maxwords) {
 365         maxwords = words;
 366 //      printf("m: %d\n", words);
 367         fflush(stdout);
 368 }
 369    stuff(x, globx, words);
 370    stuff(y, globy, words);
 371    mulg(globx, globy);
 372    gtog(y->coe[y->deg], globx);  /* Save high coeff. */
 373    y->deg += x->deg;
 374    gtog(globx, y->coe[y->deg]);  /* Move high coeff. */
 375    unstuff_partial(globy, y, words);
 376    mulg(x->coe[x->deg], y->coe[y->deg]); /* resolve high coeff. */
 377    justifyp(y);
 378 }
 379
 380 binarysegsquare(poly y) {
 381         int bits = bitlen(p), words;
 382       words = (2*bits + log_2(y->deg+1) + 32 + 15)/16;
 383    stuff(y, globy, words);
 384    squareg(globy);
 385    gtog(y->coe[y->deg], globx);  /* Save high coeff. */
 386    y->deg += y->deg;
 387    gtog(globx, y->coe[y->deg]);  /* Move high coeff. */
 388    unstuff_partial(globy, y, words);
 389    mulg(y->coe[y->deg], y->coe[y->deg]); /* resolve high coeff. */
 390    justifyp(y);
 391 }
 392
 393
 394 void
 395 assess(poly x, poly y){
 396         int max = 0, j;
 397    for(j=0; j <= x->deg; j++) {
 398                 if(bitlen(x->coe[j]) > max) max = bitlen(x->coe[j]);
 399    }
 400 //   printf("max: %d ", max);
 401    max = 0;
 402    for(j=0; j <= y->deg; j++) {
 403                 if(bitlen(y->coe[j]) > max) max = bitlen(y->coe[j]);
 404    }
 405 //   printf("%d\n", max);
 406
 407
 408 }
 409
 410 int
 411 pcompp(poly x, poly y) {
 412     int j;
 413     if(x->deg != y->deg) return 1;
 414     for(j=0; j <= x->deg; j++) {
 415                         if(gcompg(x->coe[j], y->coe[j])) return 1;
 416     }
 417     return 0;
 418 }
 419
 420 int maxdeg = 0;
 421
 422 void
 423 mulp(poly x, poly y)
 424 /*  y *= x. */
 425 {
 426         int n, degx = x->deg, degy = y->deg;
 427
 428 /*
 429 if(degx > max_deg) {
 430         max_deg = degx; printf("xdeg: %d\n", degx);
 431 }
 432
 433 if(degy > max_deg) {
 434         max_deg = degy; printf("ydeg: %d\n", degy);
 435 }
 436 */
 437         if((degx < P_BREAK) || (degy < P_BREAK)) {
 438                 grammarmulp(x,y);
 439                 justifyp(y);
 440                 return;
 441         }
 442    if(x==y) binarysegsquare(y);
 443         else binarysegmul(x, y);
 444 }
 445
 446 void
 447 revp(poly x)
 448 /* Reverse the coefficients of x. */
 449 {       int j, deg = x->deg;
 450
 451         for(j=0; j <= deg/2; j++) {
 452                 gtog(x->coe[deg-j], tmp);
 453                 gtog(x->coe[j], x->coe[deg-j]);
 454                 gtog(tmp, x->coe[j]);
 455         }
 456    justifyp(x);
 457 }
 458
 459 void
 460 recipp(poly f, int deg)
 461 /* f := 1/f, via newton method.  */
 462 {
 463         int lim = deg + 1, prec = 1;
 464
 465         sscratch->deg = 0; itog(1, sscratch->coe[0]);
 466         itog(1, aux);
 467         while(prec < lim) {
 468                 prec <<= 1;
 469                 if(prec > lim) prec = lim;
 470                 f->deg = prec-1;
 471                 ptop(sscratch, tscratch);
 472                 mulp(f, tscratch);
 473                                 tscratch->deg = prec-1;
 474                                 polyrem(tscratch);
 475                 subg(aux, tscratch->coe[0]);
 476                 quickmodg(p, tscratch->coe[0]);
 477                 mulp(sscratch, tscratch);
 478                                 tscratch->deg = prec-1;
 479                                 polyrem(tscratch);
 480                 subp(tscratch, sscratch);
 481                 sscratch->deg = prec-1;
 482                                 polyrem(sscratch);
 483         }
 484         justifyp(sscratch);
 485         ptop(sscratch, f);
 486 }
 487
 488 int
 489 left_justifyp(poly x, int start)
 490 /* Left-justify the polynomial, checking from position "start." */
 491 {
 492         int j, shift = 0;
 493
 494         for(j = start; j <= x->deg; j++) {
 495                 if(!is0(x->coe[j])) {
 496                      shift = start;
 497                      break;
 498                 }
 499         }
 500         x->deg -= shift;
 501         for(j=0; j<= x->deg; j++) {
 502                 gtog(x->coe[j+shift], x->coe[j]);
 503         }
 504         return(shift);
 505 }
 506
 507 void
 508 remp(poly x, poly y, int mode)
 509 /* y := x (mod y).
 510   mode = 0 is normal operation,
 511        = 1 jams a fixed reciprocal,
 512        = 2 uses the fixed reciprocal.
 513  */
 514 {
 515         int degx = x->deg, degy = y->deg, d, shift;
 516
 517         if(degy == 0) {
 518                 y->deg = 0;
 519                 itog(0, y->coe[0]);
 520                 return;
 521         }
 522         d = degx - degy;
 523         if(d < 0) {
 524                 ptop(x, y);
 525                 return;
 526         }
 527         revp(x); revp(y);
 528         ptop(y, rscratch);
 529         switch(mode) {
 530            case 0: recipp(rscratch, d);
 531                    break;
 532       case 1: recipp(rscratch, degy); /* degy -1. */
 533                    ptop(rscratch, precip);
 534                    rscratch->deg = d; justifyp(rscratch);
 535                    break;
 536            case 2: ptop(precip, rscratch);
 537                    rscratch->deg = d; justifyp(rscratch);
 538                    break;
 539         }
 540 /* Next, a limited-precision multiply. */
 541         if(d < degx) { x->deg = d; justifyp(x);}
 542         mulp(x, rscratch);
 543         rscratch->deg = d;
 544                 polyrem(rscratch);
 545                 justifyp(rscratch);
 546         x->deg = degx; justifyp(x);
 547         mulp(rscratch, y);
 548         subp(x, y);
 549         negp(y); polyrem(y);
 550         shift = left_justifyp(y, d+1);
 551    for(d = y->deg+1; d <= degx-shift; d++) itog(0, y->coe[d]);
 552         y->deg = degx - shift;
 553         revp(y);
 554         revp(x);
 555 }
 556
 557 fullmod(poly x) {
 558    polyrem(x);
 559    ptop(smonic, s[0]);
 560    remp(x, s[0], 2);
 561    ptop(s[0], x);
 562    polyrem(x);
 563 }
 564
 565 scalarmul(giant s, poly x) {
 566         int j;
 567    for(j=0; j <= x->deg; j++) {
 568                 mulg(s, x->coe[j]);
 569       modg(p, x->coe[j]);
 570    }
 571 }
 572
 573
 574 schain(int el) {
 575    int j;
 576
 577         s[0]->deg = 0;
 578    itog(0, s[0]->coe[0]);
 579
 580         s[1]->deg = 0;
 581    itog(1, s[1]->coe[0]);
 582         s[2]->deg = 0;
 583    itog(2, s[2]->coe[0]);
 584
 585    s[3]->deg = 4;
 586    gtog(a, aux); mulg(a, aux); negg(aux);
 587    gtog(aux, s[3]->coe[0]);
 588    gtog(b, aux); smulg(12, aux);
 589    gtog(aux, s[3]->coe[1]);
 590    gtog(a, aux); smulg(6, aux);
 591    gtog(aux, s[3]->coe[2]);
 592    itog(0, s[3]->coe[3]);
 593    itog(3, s[3]->coe[4]);
 594
 595    s[4]->deg = 6;
 596    gtog(a, aux); mulg(a, aux); mulg(a, aux);
 597    gtog(b, tmp); mulg(b, tmp); smulg(8, tmp); addg(tmp, aux);
 598    negg(aux);
 599    gtog(aux, s[4]->coe[0]);
 600    gtog(b, aux); mulg(a, aux); smulg(4, aux); negg(aux);
 601    gtog(aux, s[4]->coe[1]);
 602    gtog(a, aux); mulg(a, aux); smulg(5, aux); negg(aux);
 603    gtog(aux, s[4]->coe[2]);
 604    gtog(b, aux); smulg(20, aux);
 605    gtog(aux, s[4]->coe[3]);
 606    gtog(a, aux); smulg(5, aux);
 607    gtog(aux, s[4]->coe[4]);
 608    itog(0, s[4]->coe[5]);
 609    itog(1, s[4]->coe[6]);
 610    itog(4, aux);
 611    scalarmul(aux, s[4]);
 612    cubic->deg = 3;
 613    itog(1, cubic->coe[3]);
 614    itog(0, cubic->coe[2]);
 615    gtog(a, cubic->coe[1]);
 616    gtog(b, cubic->coe[0]);
 617    for(j=5; j <= el; j++) {
 618         if(j % 2 == 0) {
 619                                 ptop(s[j/2 + 2], s[j]); mulp(s[j/2-1], s[j]);
 620             polyrem(s[j]); mulp(s[j/2-1], s[j]); polyrem(s[j]);
 621             ptop(s[j/2-2], s[0]); mulp(s[j/2+1], s[0]); polyrem(s[0]);
 622             mulp(s[j/2+1], s[0]); polyrem(s[0]);
 623             subp(s[0], s[j]); mulp(s[j/2], s[j]); polyrem(s[j]);
 624                            gtog(p, aux); iaddg(1, aux); gshiftright(1, aux);
 625                                 scalarmul(aux, s[j]);
 626         } else {
 627             ptop(s[(j-1)/2+2], s[j]);
 628             mulp(s[(j-1)/2], s[j]); polyrem(s[j]);
 629             mulp(s[(j-1)/2], s[j]); polyrem(s[j]);
 630             mulp(s[(j-1)/2], s[j]); polyrem(s[j]);
 631             ptop(s[(j-1)/2-1], s[0]);
 632             mulp(s[(j-1)/2 + 1], s[0]); polyrem(s[0]);
 633             mulp(s[(j-1)/2 + 1], s[0]); polyrem(s[0]);
 634             mulp(s[(j-1)/2 + 1], s[0]); polyrem(s[0]);
 635             if(((j-1)/2) % 2 == 1) {
 636                                         mulp(cubic, s[0]); polyrem(s[0]);
 637                                         mulp(cubic, s[0]); polyrem(s[0]);
 638             } else {
 639                                         mulp(cubic, s[j]); polyrem(s[j]);
 640                                         mulp(cubic, s[j]); polyrem(s[j]);
 641             }
 642 // polyout(s[1]); polyout(s[3]); polyout(s[0]); polyout(s[j]);
 643             subp(s[0], s[j]);
 644             polyrem(s[j]);
 645         }
 646    }
 647 }
 648
 649 init_recip(int el) {
 650    int j;
 651    ptop(s[el], smonic);
 652    if(el == 2) {
 653                 mulp(cubic, smonic); polyrem(smonic);
 654    }
 655    gtog(smonic->coe[smonic->deg], aux); /* High coeff. */
 656    binvg(p, aux);
 657    scalarmul(aux, smonic);  /* s is now monic. */
 658    s[0]->deg = smonic->deg + 1;
 659    for(j=0; j <= s[0]->deg; j++) itog(1, s[0]->coe[j]);
 660    ptop(smonic, pbuff);
 661    remp(s[0], pbuff, 1);  /* Initialize reciprocal of s as precip. */
 662 }
 663
 664 void powerpoly(poly x, giant n)
 665 /* Base-4 window. */
 666 {       int pos, code;
 667         ptop(x, pbuff);  /* x. */
 668         ptop(pbuff, pbuff2);
 669         mulmod(pbuff2, pbuff2); mulmod(pbuff, pbuff2); /* x^3. */
 670         pos = bitlen(n)-2;
 671         while(pos >= 0) {
 672                 mulmod(x, x);
 673                 if(pos==0) {
 674                         if(bitval(n, pos) != 0) {
 675                                 mulmod(pbuff, x);
 676                         }
 677                         break;
 678                 }
 679                 code = (bitval(n, pos) != 0) * 2 + (bitval(n, pos-1) != 0);
 680                 switch(code) {
 681                         case 0: mulmod(x,x); break;
 682                         case 1: mulmod(x,x);
 683                                 mulmod(pbuff, x);
 684                                 break;
 685                         case 2: mulmod(pbuff, x);
 686                                 mulmod(x,x); break;
 687                         case 3: mulmod(x,x); mulmod(pbuff2, x); break;
 688                 }
 689                 pos -= 2;
 690         }
 691 }
 692
 693 mulmod(poly x, poly y) {
 694         mulp(x, y); fullmod(y);
 695 }
 696
 697 elldoublep(poly n1, poly d1, poly m1, poly c1, poly n0, poly d0,
 698                 poly m0, poly c0) {
 699
 700      ptop(n1, mn); mulmod(n1, mn);
 701           ptop(mn, pbuff); addp(mn, mn); addp(pbuff, mn);
 702      fullmod(mn);
 703           ptop(d1, pbuff); mulmod(d1, pbuff);
 704           scalarmul(a, pbuff); addp(pbuff, mn);
 705           fullmod(mn);
 706           mulmod(c1, mn);
 707           ptop(m1, md); addp(md, md);
 708           mulmod(d1, md); mulmod(d1, md); mulmod(cubic, md);
 709
 710      ptop(d1, n0); mulmod(mn, n0); mulmod(mn, n0);
 711           mulmod(cubic, n0);
 712           ptop(n1, pbuff); addp(pbuff, pbuff); fullmod(pbuff);
 713           mulmod(md, pbuff); mulmod(md, pbuff);
 714           subp(pbuff, n0); fullmod(n0);
 715      ptop(md, d0); mulmod(md, d0); mulmod(d1, d0);
 716
 717      ptop(mn, m0); mulmod(c1, m0);
 718           ptop(d0, pbuff); mulmod(n1, pbuff);
 719           ptop(n0, c0); mulmod(d1, c0); subp(c0, pbuff);
 720           fullmod(pbuff);
 721           mulmod(pbuff, m0);
 722           ptop(m1, pbuff); mulmod(md, pbuff);
 723           mulmod(d1, pbuff); mulmod(d0, pbuff);
 724           subp(pbuff, m0); fullmod(m0);
 725
 726      ptop(c1, c0); mulmod(md, c0); mulmod(d1, c0); mulmod(d0, c0);
 727 }
 728
 729 elladdp(poly n1, poly d1, poly m1, poly c1, poly n2, poly d2, poly m2, poly c2, poly n0, poly d0, poly m0, poly c0) {
 730         ptop(m2, mn); mulmod(c1, mn);
 731    ptop(m1, pbuff); mulmod(c2, pbuff);
 732    subp(pbuff, mn); fullmod(mn);
 733    mulmod(d1, mn); mulmod(d2, mn);
 734
 735         ptop(n2, md); mulmod(d1, md);
 736    ptop(n1, pbuff); mulmod(d2, pbuff);
 737    subp(pbuff, md); fullmod(md);
 738    mulmod(c1, md); mulmod(c2, md);
 739
 740    ptop(cubic, n0); mulmod(mn, n0); mulmod(mn, n0);
 741    mulmod(d1, n0); mulmod(d2, n0);
 742    ptop(n1, pbuff); mulmod(d2, pbuff);
 743    ptop(n2, d0); mulmod(d1, d0);
 744    addp(d0, pbuff); mulmod(md, pbuff); mulmod(md, pbuff);
 745    subp(pbuff, n0); fullmod(n0);
 746
 747    ptop(md, d0); mulmod(md, d0); mulmod(d1, d0); mulmod(d2, d0);
 748
 749    ptop(mn, m0); mulmod(c1, m0);
 750    ptop(d0, pbuff); mulmod(n1, pbuff);
 751    ptop(d1, c0); mulmod(n0, c0);
 752    subp(c0, pbuff); fullmod(pbuff);
 753    mulmod(pbuff, m0);
 754    ptop(m1, pbuff); mulmod(md, pbuff);
 755    mulmod(d0, pbuff); mulmod(d1, pbuff);
 756    subp(pbuff, m0); fullmod(m0);
 757
 758    ptop(md, c0); mulmod(c1, c0); mulmod(d0, c0); mulmod(d1, c0);
 759
 760 }
 761
 762 polyout(poly x) {
 763    int j;
 764    for(j=0; j <= x->deg; j++) {printf("%d: ",j); gout(x->coe[j]);}
 765 }
 766
 767 #define SIEVE_CUT 7
 768
 769 main(int argc, char **argv) {
 770       int j, ct, el, xmatch, ymatch;
 771       int k, k2, t, linit;
 772       int T[L_MAX], P[L_MAX];
 773       giant ss[L_MAX];
 774       unsigned int ord, ordminus;
 775
 776       printf("Initializing...\n"); fflush(stdout);
 777       init_all();
 778
 779   for(j=0; j < L_MAX; j++) {  /* Array to be used for CRT reconstruction. */
 780            ss[j] = NULL;
 781   }
 782
 783       printf("Give p, a, b on separate lines:\n"); fflush(stdout);
 784       gin(p);  /* Field prime. */
 785       if(bitlen(p) > Q) {
 786                 fprintf(stderr, "p too large, larger than %d bits.\n", Q);
 787                 exit(0);
 788           }
 789
 790       gin(a); gin(b); /* Curve parameters. */
 791
 792 newb:
 793         printf("Checking discriminant for:\nb = ");
 794         gout(b);
 795         gtog(a, t1); squareg(t1); modg(p, t1); mulg(a, t1); modg(p, t1);
 796     gshiftleft(2, t1);  /* 4 a^3. */
 797     gtog(b, t2); squareg(t2); modg(p, t2); smulg(27, t2);
 798     addg(t2, t1); modg(p, t1);
 799     if(isZero(t1)) {
 800                 printf("Discriminant FAILED\n");
 801                 iaddg(1, b);
 802                 goto newb;
 803     }
 804     printf("Discriminant PASSED\n");
 805     printf("Starting sieve process:\n");
 806
 807
 808       schain(SIEVE_CUT + 1);  /* Do first piece of chain, for small-sieving. */
 809       linit = 0;
 810       ct = 0;
 811       itog(1, magcheck);
 812 for(el = 2; el <= L_MAX; el += 1 + (el%2)) {
 813       if(!primeq(el)) continue;
 814       L = el; while(L*el <= 4) L *= el;
 815 printf("Resolving Schoof L = %d...\n", L);
 816       P[ct] = L;  /* Stuff another prime power. */
 817       smulg(L, magcheck);
 818       gtog(magcheck, tmp); squareg(tmp); gshiftright(2, tmp);
 819       if(gcompg(tmp, p) > 0) break;  /* Done...go to CRT phase. */
 820 if((L > SIEVE_CUT) && (linit == 0)) {
 821                 schain(L_MAX+1);
 822                 linit = 1;
 823           }
 824       init_recip(L);
 825 // printf("s: "); polyout(s[L]);
 826       gtog(p, aux2);
 827       k = idivg(L, aux2);  /* p (mod L). */
 828       gtog(p, aux2);
 829       k2 = idivg(el, aux2);
 830 // printf("power...\n");
 831       txd->deg = 0; itog(1, txd->coe[0]);
 832       tyd->deg = 0; itog(1, tyd->coe[0]);
 833       txn->deg = 1; itog(0, txn->coe[0]); itog(1, txn->coe[1]);
 834       ptop(txn, kxn);
 835
 836       gtog(p, aux2);
 837       powerpoly(txn, aux2); /* x^p. */
 838 // printf("x^p done...\n");
 839       ptop(txn, powx);
 840       powerpoly(powx, aux2);
 841 // printf("x^p^2 done...\n");
 842       ptop(cubic, tyn);
 843       gtog(p, aux2); itog(1, aux); subg(aux, aux2);
 844       gshiftright(1, aux2); /* aux2 := (p-1)/2. */
 845       powerpoly(tyn, aux2); /* y^p. */
 846 // printf("y^p done...\n");
 847       ptop(tyn, powy); mulp(tyn, powy); fullmod(powy);
 848       mulp(cubic, powy); fullmod(powy);
 849       powerpoly(powy, aux2);
 850       mulp(tyn, powy); fullmod(powy);
 851 // printf("Powers done...\n");
 852
 853 // printf("pow" ); polyout(powx); polyout(powy);
 854       ptop(txn, txn1); ptop(txd, txd1);  /* Save t = 1 case. */
 855       ptop(tyn, tyn1); ptop(tyd, tyd1);
 856 /* We now shall test
 857      (powx, y powy) + k(kxn/kxd, y kyn/kyd) = t(txn/txd, y tyn/tyd)
 858  */
 859
 860     if(k==1) { ptop(txd, kxd); ptop(txd, kyd);
 861                               ptop(txd, kyn);
 862     } else {
 863                    ptop(s[k], kxd); mulp(s[k], kxd); fullmod(kxd);
 864      if(k%2==0) { mulp(cubic, kxd); fullmod(kxd); }
 865      mulp(kxd, kxn); fullmod(kxn);
 866      ptop(s[k-1], pbuff); mulp(s[k+1], pbuff); fullmod(pbuff);
 867      if(k%2==1) {mulp(cubic, pbuff); fullmod(pbuff); }
 868      subp(pbuff, kxn); fullmod(kxn);
 869
 870      ptop(s[k+2], kyn); mulp(s[k-1], kyn); fullmod(kyn);
 871           mulp(s[k-1], kyn); fullmod(kyn);
 872      if(k > 2) {
 873                 ptop(s[k-2], pbuff); mulp(s[k+1], pbuff); fullmod(pbuff);
 874           mulp(s[k+1], pbuff); fullmod(pbuff);
 875      subp(pbuff, kyn); fullmod(kyn);
 876      }
 877      ptop(s[k], kyd); mulp(s[k], kyd); fullmod(kyd);
 878           mulp(s[k], kyd); fullmod(kyd);
 879      if(k%2==0) { mulp(cubic, kyd); fullmod(kyd);
 880                         mulp(cubic, kyd); fullmod(kyd);}
 881      itog(4, aux2); scalarmul(aux2, kyd);
 882     }
 883 //printf("kP: "); polyout(kxn); polyout(kxd); polyout(kyn); polyout(kyd);
 884 /* Commence t = 0 check. */
 885 // printf("Checking t = %d ...\n", 0);
 886 fflush(stdout);
 887
 888      ptop(powx, pbuff); mulp(kxd, pbuff);
 889      subp(kxn, pbuff);
 890      fullmod(pbuff);
 891
 892      xmatch = ymatch = 0;
 893      if(iszerop(pbuff)) {
 894                  xmatch = 1;
 895          /* Now check y coords. */
 896                  if(L == 2) goto resolve;
 897          ptop(powy, pbuff); mulp(kyd, pbuff);
 898          addp(kyn, pbuff);
 899          fullmod(pbuff);
 900          if(iszerop(pbuff)) {
 901                resolve:
 902                                         printf("%d %d\n", L, 0);
 903                         T[ct++] = 0;
 904                if((k2 + 1 - T[ct-1]) % el == 0) {
 905                                         printf("TILT: %d\n", el);
 906                                         el = 2;
 907                                         iaddg(1, b);
 908                                         goto newb;
 909                                 }
 910                     continue;
 911          } else ymatch = 1;
 912      }
 913 /* Combine pt1 and pt2. */
 914    if((xmatch == 1) && (ymatch == 1))
 915        elldoublep(powx, txd, powy, txd, nx, dx, ny, dy);
 916        else
 917        elladdp(powx, txd, powy, txd, kxn, kxd, kyn, kyd, nx, dx, ny, dy);
 918
 919 /* Now {nx/dx, ny/dy} is (fixed) LHS. */
 920 // printf("add12: "); polyout(nx); polyout(dx); polyout(ny); polyout(dy);
 921 /* Commence t > 0 check. */
 922     for(t=1; t <= L/2; t++) {
 923 // printf("Checking t = %d ...\n", t);
 924          if(t > 1) { /* Add (tx1, ty1) to (txn, tyn). */
 925                                 ptop(txn1, pbuff); mulmod(txd, pbuff);
 926             ptop(txn, powx); mulmod(txd1, powx);
 927                                 subp(powx, pbuff); fullmod(pbuff);
 928             if(!iszerop(pbuff))
 929                                  elladdp(txn1, txd1, tyn1, tyd1, txn, txd, tyn, tyd,
 930                                         tmp1, tmp2, tmp3, tmp4);
 931                            else elldoublep(txn, txd, tyn, tyd,
 932                                         tmp1, tmp2, tmp3, tmp4);
 933             ptop(tmp1, txn); ptop(tmp2, txd);
 934                            ptop(tmp3, tyn); ptop(tmp4, tyd);
 935          }
 936 // printf("tQ: "); polyout(txn); polyout(txd); polyout(tyn); polyout(tyd);
 937          /* Next, check {nx/dx, ny/dy} =? {txn/txd, tyn/tyd}. */
 938                         ptop(nx, pbuff); mulmod(txd, pbuff);
 939                    ptop(dx, powx); mulmod(txn, powx);
 940                         subp(powx, pbuff); fullmod(pbuff);
 941                    if(!iszerop(pbuff)) continue;
 942          /* Next, check y. */
 943                 //      printf("y check!\n");
 944                         ptop(ny, pbuff); mulmod(tyd, pbuff);
 945                    ptop(dy, powx); mulmod(tyn, powx);
 946                         subp(powx, pbuff); fullmod(pbuff);
 947                    if(iszerop(pbuff)) {
 948                         printf("%d %d\n", L, t);
 949                                 T[ct++] = t;
 950                    }  else {
 951                         printf("%d %d\n", L, L-t);
 952                                 T[ct++] = L-t;
 953                                 }
 954                if((k2 + 1 - T[ct-1]) % el == 0) {
 955                                         printf("TILT: %d\n", el);
 956                                         el = 2;
 957                                         iaddg(1, b);
 958                                         goto newb;
 959                                 }
 960
 961                                         fflush(stdout);
 962                                 break;
 963    }
 964 }
 965
 966 /* Now, prime powers P[] and CRT residues T[] are intact. */
 967         printf("Prime powers L:\n");
 968         printf("{");
 969     for(j=0; j < ct-1; j++) {
 970                 printf("%d, ", P[j]);
 971     }
 972     printf("%d }\n", P[ct-1]);
 973
 974         printf("Residues t (mod L):\n");
 975         printf("{");
 976     for(j=0; j < ct-1; j++) {
 977                 printf("%d, ", T[j]);
 978     }
 979     printf("%d }\n", T[ct-1]);
 980
 981 /* Mathematica algorithm for order:
 982 plis = {2^5, 3^3, 5^2, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47};
 983 tlis = {1,    26,   4, 2,  4, 11,  6,  5, 19, 22, 10, 16,  7, 22, 11};
 984 prod = Apply[Times, plis];
 985 prlis = prod/plis;
 986 invlis = Table[PowerMod[prlis[[q]], -1, plis[[q]]],{q,1,Length[plis]}];
 987 p = 2^127 - 1;
 988 t = Mod[tlis . (prlis * invlis), prod];
 989 ord = p + 1 - If[t^2 > 4p, t - prod, t]
 990 */
 991
 992   itog(1, t1);
 993   for(j=0; j < ct; j++) {
 994         smulg(P[j], t1);
 995   }
 996
 997   for(j=0; j < 2*ct; j++) {
 998            if(!ss[j]) ss[j] = newgiant(MAX_DIGS);
 999   }
1000
1001   for(j=0; j < ct; j++) {
1002         gtog(t1, ss[j]);
1003     itog(P[j], t2);
1004     divg(t2, ss[j]);
1005   }
1006
1007   for(j=0; j < ct; j++) {
1008        gtog(ss[j], ss[j+ct]);
1009        itog(P[j], t2);
1010            invg(t2, ss[j+ct]);
1011   }
1012
1013   itog(0, t4);
1014   for(j=0; j < ct; j++) {
1015         itog(T[j], t5);
1016         mulg(ss[j], t5);
1017         mulg(ss[j+ct], t5);
1018         addg(t5, t4);
1019   }
1020   modg(t1, t4);
1021   gtog(p, t5);
1022   iaddg(1, t5);
1023   gtog(t4, t2);
1024   squareg(t4);
1025   gtog(p, t3); gshiftleft(2, t3);
1026   if(gcompg(t4, t3) > 0) subg(t1, t2);
1027   subg(t2, t5);
1028   printf("Parameters:\n");
1029   printf("p = "); gout(p);
1030   printf("a = "); gout(a);
1031   printf("b = "); gout(b);
1032   printf("Curve order:\n");
1033   printf("o = "); gout(t5);
1034   printf("pprob: %d\n", prime_probable(t5));
1035   printf("Twist order:\n");
1036   printf("o' = ");
1037   addg(t2, t5);
1038   addg(t2, t5);
1039   gout(t5);
1040   printf("pprob: %d\n", prime_probable(t5));
1041
1042   iaddg(1,b);
1043   goto newb;
1044 }