2 * atomTime.c - measure performance of mulg, gsquare, feemod,
3 * gshift{left,right}, elliptic, ellMulProj
7 #include "ckutilsPlatform.h"
8 #include "CryptKitSA.h"
9 #include "ckutilities.h" /* needs private headers */
10 #include "curveParams.h" /* ditto */
11 #include "falloc.h" /* ditto */
13 #include "ellipticProj.h"
18 /* default loops for "fast" and "slow" ops respecitively */
19 #define LOOPS_DEF_FAST 10000
20 #define LOOPS_DEF_SLOW 100
22 #define NUM_BORROW 100
24 /* individual enables - normally all on, disable to zero in on one test */
26 #define GSQUARE_ENABLE 1
27 #define FEEMOD_ENABLE 1
28 #define BORROW_ENABLE 1
29 #define SHIFT_ENABLE 1
30 #define BINVAUX_ENABLE 1
31 #define MAKE_RECIP_ENABLE 1
32 #define MODGRECIP_ENABLE 1
33 #define KEYGEN_ENABLE 1
34 #define ELLIPTIC_ENABLE 1
35 #define ELL_SIMPLE_ENABLE 1
37 static void usage(char **argv
)
39 printf("Usage: %s [l=loops_fast] [L=loops_slow] [q(uick)] [D=depth]\n", argv
[0]);
44 * Fill numGiants with random data of length bits.
46 static void genRandGiants(giant
*giants
,
54 unsigned bytes
= (bits
+ 7) / 8;
56 unsigned giantMsd
= 0; // index of MSD
57 unsigned log2BitsPerDigit
;
58 unsigned bitsPerDigit
;
60 /* just to satisfy compiler - make sure it's always called */
65 log2BitsPerDigit
= GIANT_LOG2_BITS_PER_DIGIT
;
66 bitsPerDigit
= GIANT_BITS_PER_DIGIT
;
70 * deserializeGiant() has a resolution of one byte. We
71 * need more resolution - that is, we'll be creating
72 * giants a little larger than we need, and we'll mask off
73 * some bits in the giants' m.s. digit.
74 * This assumes that data fills the giantDigits such
75 * that if bytes mod GIANT_BYTES_PER_DIGIT != 0, the
76 * empty byte(s) in the MSD are in the m.s. byte(s).
78 giantMsd
= bits
>> log2BitsPerDigit
;
79 mask
= (1 << (bits
& (bitsPerDigit
- 1))) - 1;
81 rdata
= fmalloc(bytes
);
82 for(i
=0; i
<numGiants
; i
++) {
84 feeRandBytes(rand
, rdata
, bytes
);
85 deserializeGiant(rdata
, g
, bytes
);
89 g
->n
[giantMsd
] &= mask
;
92 * We've zeroed out some bits; we might have to
93 * adjust the sign of the giant as well. Note that
94 * deserializeGiant always yields positive
97 for(j
=(g
->sign
- 1); j
!=0; j
--) {
111 #if CRYPTKIT_ELL_PROJ_ENABLE
113 * Assumes the presence of numEllPoints items in *points, and that the
114 * x coordinate in each point is init'd to a random giant. Uses the x
115 * coords as seeds to make normalized points of the entire *points array.
117 static void makePoints(pointProjStruct
*points
,
118 unsigned numEllPoints
,
122 giant seed
= newGiant(cp
->maxDigits
);
124 for(i
=0; i
<numEllPoints
; i
++) {
125 gtog(points
[i
].x
, seed
);
126 findPointProj(&points
[i
], seed
, cp
);
131 #endif /* CRYPTKIT_ELL_PROJ_ENABLE */
133 int main(int argc
, char **argv
)
137 unsigned loopsFast
= LOOPS_DEF_FAST
;
138 unsigned loopsSlow
= LOOPS_DEF_SLOW
;
150 #if CRYPTKIT_ELL_PROJ_ENABLE
151 unsigned numEllGiants
; // for elliptic ops
152 unsigned numEllPoints
; // for elliptic ops
156 unsigned minDepth
= 0;
157 unsigned maxDepth
= FEE_DEPTH_MAX
;
158 unsigned basePrimeLen
;
161 #if CRYPTKIT_ELL_PROJ_ENABLE
162 pointProjStruct
*points
;
165 giant modGiant
= NULL
;
167 feePubKey
*keyArray
= NULL
;
168 giant gborrow
[NUM_BORROW
];
170 /* just to satisfy compiler - make sure it's always called */
171 genRandGiants(NULL
, 0, 0, 0);
173 for(arg
=1; arg
<argc
; arg
++) {
177 loopsFast
= atoi(&argp
[2]);
180 loopsSlow
= atoi(&argp
[2]);
186 minDepth
= maxDepth
= atoi(&argp
[2]);
195 * Common random generator
197 time((unsigned long *)&seed
);
198 rand
= feeRandAllocWithSeed(seed
);
200 maxLoops
= loopsFast
;
201 if(loopsSlow
> maxLoops
) {
202 maxLoops
= loopsSlow
;
206 * Alloc array of giants big enough for squaring at the largest
207 * key size, enough of them for 'loops' mulgs
209 cp
= curveParamsForDepth(FEE_DEPTH_LARGEST
);
210 numGiants
= maxLoops
* 2; // 2 giants per loop
211 if(loopsSlow
> (maxLoops
/ 4)) {
212 /* findPointProj needs 4 giants per loop */
215 #if CRYPTKIT_ELL_PROJ_ENABLE
216 numEllGiants
= loopsSlow
* 2;
217 numEllPoints
= loopsSlow
* 2;
219 giants
= fmalloc(numGiants
* sizeof(giant
));
221 printf("malloc failure\n");
224 for(i
=0; i
<numGiants
; i
++) {
225 giants
[i
] = newGiant(cp
->maxDigits
);
226 if(giants
[i
] == NULL
) {
227 printf("malloc failure\n");
233 #if CRYPTKIT_ELL_PROJ_ENABLE
235 * Projective points - two per ellLoop. The giants come from
236 * giants[]. We reserve an extra giant per point.
237 * We're assuming that numEllPoints < (4 * numGiants).
239 points
= fmalloc(numEllPoints
* sizeof(pointProjStruct
));
241 printf("malloc failure\n");
245 for(i
=0; i
<numEllPoints
; i
++) {
246 points
[i
].x
= giants
[j
++];
247 points
[i
].y
= giants
[j
++];
248 points
[i
].z
= giants
[j
++];
253 /* feePubKey array */
254 keyArray
= fmalloc(sizeof(feePubKey
) * loopsSlow
);
255 if(keyArray
== NULL
) {
256 printf("malloc failure\n");
261 * Alloc an array of shiftCnt ints
263 shiftCnt
= fmalloc(maxLoops
* sizeof(int));
264 if(shiftCnt
== NULL
) {
265 printf("malloc failure\n");
269 for(depth
=minDepth
; depth
<=maxDepth
; depth
++) {
272 if((depth
!= FEE_DEPTH_127M
) &&
273 (depth
!= FEE_DEPTH_161W
)) {
279 * Get curve params for this depth
281 cp
= curveParamsForDepth(depth
);
283 printf("malloc failure\n");
286 switch(cp
->curveType
) {
288 curveType
= "FCT_Montgomery";
290 case FCT_Weierstrass
:
291 curveType
= "FCT_Weierstrass";
294 curveType
= "FCT_General";
297 printf("***Unknown curveType!\n");
301 switch(cp
->primeType
) {
303 printf("depth=%d; FPT_General, %s; keysize=%d;\n",
304 depth
, curveType
, bitlen(cp
->basePrime
));
307 printf("depth=%d; FPT_Mersenne, %s; q=%d\n",
308 depth
, curveType
, cp
->q
);
311 printf("depth=%d; FPT_FEE, %s; q=%d k=%d\n",
312 depth
, curveType
, cp
->q
, cp
->k
);
315 basePrimeLen
= bitlen(cp
->basePrime
);
319 * bitlen(giant) <= bitlen(basePrime);
320 * giants[n+1] *= giants[n]
323 genRandGiants(giants
, numGiants
, basePrimeLen
, rand
);
324 PLAT_GET_TIME(startTime
);
325 for(i
=0; i
<numGiants
; i
+=2) {
326 mulg(giants
[i
], giants
[i
+1]);
328 PLAT_GET_TIME(endTime
);
329 elapsed
= PLAT_GET_NS(startTime
, endTime
);
330 printf(" mulg: %12.2f ns per op\n",
331 elapsed
/ (numGiants
/ 2));
332 #endif /* MULG_ENABLE */
336 * bitlen(giant) <= bitlen(basePrime);
340 genRandGiants(giants
, numGiants
, basePrimeLen
, rand
);
341 PLAT_GET_TIME(startTime
);
342 for(i
=0; i
<loopsFast
; i
++) {
345 PLAT_GET_TIME(endTime
);
346 elapsed
= PLAT_GET_NS(startTime
, endTime
);
347 printf(" gsquare: %12.2f ns per op\n", elapsed
/ loopsFast
);
348 #endif /* GSQUARE_ENABLE */
352 * bitlen(giant) <= ((2 * bitlen(basePrime) - 2);
356 genRandGiants(giants
, numGiants
, (basePrimeLen
* 2) - 2,
358 PLAT_GET_TIME(startTime
);
359 for(i
=0; i
<loopsFast
; i
++) {
360 feemod(cp
, giants
[i
]);
362 PLAT_GET_TIME(endTime
);
363 elapsed
= PLAT_GET_NS(startTime
, endTime
);
364 printf(" feemod: %12.2f ns per op\n", elapsed
/ loopsFast
);
365 #endif /* FEEMOD_ENABLE */
372 PLAT_GET_TIME(startTime
);
373 for(i
=0; i
<loopsFast
; i
++) {
374 for(j
=0; j
<NUM_BORROW
; j
++) {
375 gborrow
[j
] = borrowGiant(cp
->maxDigits
);
377 for(j
=0; j
<NUM_BORROW
; j
++) {
378 returnGiant(gborrow
[j
]);
381 PLAT_GET_TIME(endTime
);
382 elapsed
= PLAT_GET_NS(startTime
, endTime
);
383 printf(" borrow/return: %12.2f ns per op\n",
384 elapsed
/ (loopsFast
* NUM_BORROW
));
385 #endif /* BORROW_ENABLE */
389 * bitlen(giant) <= bitlen(basePrime)
390 * 0 <= shiftCnt <= bitlen(basePrime)
391 * gshiftright(giants[i])
394 genRandGiants(giants
, numGiants
, basePrimeLen
, rand
);
395 for(i
=0; i
<loopsFast
; i
++) {
396 shiftCnt
[i
] = feeRandNextNum(rand
) % basePrimeLen
;
398 PLAT_GET_TIME(startTime
);
399 for(i
=0; i
<loopsFast
; i
++) {
400 gshiftright(shiftCnt
[i
], giants
[i
]);
402 PLAT_GET_TIME(endTime
);
403 elapsed
= PLAT_GET_NS(startTime
, endTime
);
404 printf(" gshiftright: %12.2f ns per op\n", elapsed
/ loopsFast
);
408 * bitlen(giant) <= bitlen(basePrime)
409 * 1 <= shiftCnt <= bitlen(basePrime)
410 * gshiftleft(giants[i]
412 genRandGiants(giants
, numGiants
, basePrimeLen
, rand
);
413 PLAT_GET_TIME(startTime
);
414 for(i
=0; i
<loopsFast
; i
++) {
415 gshiftright(shiftCnt
[i
], giants
[i
]);
417 PLAT_GET_TIME(endTime
);
418 elapsed
= PLAT_GET_NS(startTime
, endTime
);
419 printf(" gshiftleft: %12.2f ns per op\n", elapsed
/ loopsFast
);
420 #endif /* SHIFT_ENABLE */
424 * bitlen(giant) <= bitlen(basePrime);
425 * binvaux(basePrime, giants[n+1])
428 genRandGiants(giants
, loopsSlow
, basePrimeLen
, rand
);
429 PLAT_GET_TIME(startTime
);
430 for(i
=0; i
<loopsSlow
; i
++) {
431 binvaux(cp
->basePrime
, giants
[i
]);
433 PLAT_GET_TIME(endTime
);
434 elapsed
= PLAT_GET_US(startTime
, endTime
);
435 printf(" binvaux: %12.2f us per op\n",
436 elapsed
/ loopsSlow
);
437 #endif /* BINVAUX_ENABLE */
441 * bitlen(giant) <= bitlen(basePrime);
442 * make_recip(giants[n], giants[n+1]
444 #if MAKE_RECIP_ENABLE
445 genRandGiants(giants
, numGiants
, basePrimeLen
, rand
);
446 PLAT_GET_TIME(startTime
);
447 for(i
=0; i
<loopsSlow
*2; i
+=2) {
448 make_recip(giants
[i
], giants
[i
+1]);
450 PLAT_GET_TIME(endTime
);
451 elapsed
= PLAT_GET_US(startTime
, endTime
);
452 printf(" make_recip: %12.2f us per op\n",
453 elapsed
/ loopsSlow
);
454 #endif /* MAKE_RECIP_ENABLE */
457 * modg_via_recip test
458 * bitlen(giant) <= ((2 * bitlen(basePrime) - 2);
459 * bitlen(modGiant) <= bitlen(basePrime)
460 * calc recip of modGiant
461 * modg_via_recip(giants[i])
464 genRandGiants(giants
, numGiants
, (basePrimeLen
* 2) - 2,
466 modGiant
= borrowGiant(cp
->maxDigits
);
467 recip
= borrowGiant(cp
->maxDigits
);
468 genRandGiants(&modGiant
, 1, basePrimeLen
, rand
);
469 make_recip(modGiant
, recip
);
471 PLAT_GET_TIME(startTime
);
472 for(i
=0; i
<loopsSlow
; i
++) {
473 modg_via_recip(modGiant
, recip
, giants
[i
]);
475 PLAT_GET_TIME(endTime
);
476 elapsed
= PLAT_GET_NS(startTime
, endTime
);
477 printf(" modg_via_recip: %12.2f ns per op\n",
478 elapsed
/ loopsSlow
);
479 returnGiant(modGiant
);
483 #endif /* MODGRECIP_ENABLE */
487 * keyArray[n] = feePubKeyAlloc();
488 * feePubKeyInitFromPrivData(keyArray[n] );
491 PLAT_GET_TIME(startTime
);
492 for(i
=0; i
<loopsSlow
; i
++) {
493 keyArray
[i
] = feePubKeyAlloc();
494 if(keyArray
[i
] == NULL
) {
495 printf("malloc failure\n");
498 /* fixme how about some better seed data */
499 feePubKeyInitFromPrivDataDepth(keyArray
[i
],
500 (unsigned char *)"somePrivData",
505 PLAT_GET_TIME(endTime
);
506 elapsed
= PLAT_GET_US(startTime
, endTime
);
507 printf(" keygen: %12.2f us per op\n",
508 elapsed
/ loopsSlow
);
509 for(i
=0; i
<loopsSlow
; i
++) {
510 feePubKeyFree(keyArray
[i
]);
512 #endif /* KEYGEN_ENABLE*/
516 * bitlen(giant) <= bitlen(basePrime);
517 * {giants[n], 1} *= giants[n+1] (elliptic mult)
520 genRandGiants(giants
, numGiants
, basePrimeLen
, rand
);
521 z
= borrowGiant(cp
->maxDigits
);
522 PLAT_GET_TIME(startTime
);
523 for(i
=0; i
<loopsSlow
; i
+=2) {
524 /* superoptimized int_to_giant(1) */
527 elliptic(giants
[i
], z
, giants
[i
+1], cp
);
529 PLAT_GET_TIME(endTime
);
530 elapsed
= PLAT_GET_US(startTime
, endTime
);
531 printf(" elliptic: %12.2f us per op\n",
532 elapsed
/ (loopsSlow
/ 2));
533 #endif /* ELLIPTIC_ENABLE*/
536 * elliptic_simple test
537 * bitlen(giant) <= bitlen(basePrime);
538 * giants[n] *= giants[n+1] (elliptic mult)
540 #if ELL_SIMPLE_ENABLE
541 genRandGiants(giants
, numGiants
, basePrimeLen
, rand
);
542 PLAT_GET_TIME(startTime
);
543 for(i
=0; i
<loopsSlow
*2; i
+=2) {
544 elliptic_simple(giants
[i
], giants
[i
+1], cp
);
546 PLAT_GET_TIME(endTime
);
547 elapsed
= PLAT_GET_US(startTime
, endTime
);
548 printf(" elliptic_simple: %12.2f us per op\n",
549 elapsed
/ loopsSlow
);
550 #endif /* ELL_SIMPLE_ENABLE */
552 if(cp
->curveType
!= FCT_Weierstrass
) {
556 #if CRYPTKIT_ELL_PROJ_ENABLE
559 * bitlen(giant) <= bitlen(basePrime);
560 * point[n+1] = point[n] * giants[4n+3]
562 * note we're cooking up way more giants than we have to;
563 * we really only need the x's and k's. But what the heck.
565 genRandGiants(giants
, 4 * numEllPoints
, basePrimeLen
, rand
);
566 makePoints(points
, numEllPoints
, cp
);
567 PLAT_GET_TIME(startTime
);
568 for(i
=0; i
<loopsSlow
; i
+=2) {
569 ellMulProj(&points
[i
], &points
[i
+1],
572 PLAT_GET_TIME(endTime
);
573 elapsed
= PLAT_GET_US(startTime
, endTime
);
574 printf(" ellMulProj: %12.2f us per op\n",
575 elapsed
/ (loopsSlow
/ 2));
578 * ellMulProjSimple test
579 * bitlen(giant) <= bitlen(basePrime);
580 * point[n] *= giants[4n+3] (projective elliptic mult)
582 genRandGiants(giants
, 4 * numEllPoints
, basePrimeLen
, rand
);
583 makePoints(points
, numEllPoints
, cp
);
584 PLAT_GET_TIME(startTime
);
585 for(i
=0; i
<loopsSlow
; i
++) {
586 ellMulProjSimple(&points
[i
], giants
[4*i
+3], cp
);
588 PLAT_GET_TIME(endTime
);
589 elapsed
= PLAT_GET_US(startTime
, endTime
);
590 printf(" ellMulProjSimple: %12.2f us per op\n",
591 elapsed
/ loopsSlow
);
595 * bitlen(giant) <= bitlen(basePrime);
596 * point[n] += point[n+1] (projective elliptic add)
598 genRandGiants(giants
, 4 * numEllPoints
, basePrimeLen
, rand
);
599 makePoints(points
, numEllPoints
, cp
);
600 PLAT_GET_TIME(startTime
);
601 for(i
=0; i
<loopsSlow
; i
+=2) {
602 ellAddProj(&points
[i
], &points
[i
+1], cp
);
604 PLAT_GET_TIME(endTime
);
605 elapsed
= PLAT_GET_NS(startTime
, endTime
);
606 printf(" ellAddProj: %12.2f ns per op\n",
607 elapsed
/ loopsSlow
);
611 * bitlen(giant) <= bitlen(basePrime);
612 * ellDoubleProj(point[n])
614 genRandGiants(giants
, 4 * numEllPoints
, basePrimeLen
, rand
);
615 makePoints(points
, numEllPoints
, cp
);
616 PLAT_GET_TIME(startTime
);
617 for(i
=0; i
<loopsSlow
; i
++) {
618 ellDoubleProj(&points
[i
], cp
);
620 PLAT_GET_TIME(endTime
);
621 elapsed
= PLAT_GET_NS(startTime
, endTime
);
622 printf(" ellDoubleProj: %12.2f ns per op\n",
623 elapsed
/ loopsSlow
);
627 * bitlen(giant) <= bitlen(basePrime);
628 * findPointProj(point[n], giants[4n+3])
630 genRandGiants(giants
, 4 * loopsSlow
, basePrimeLen
, rand
);
631 PLAT_GET_TIME(startTime
);
632 for(i
=0; i
<loopsSlow
; i
++) {
633 findPointProj(&points
[i
], giants
[4*i
+ 3], cp
);
635 PLAT_GET_TIME(endTime
);
636 elapsed
= PLAT_GET_US(startTime
, endTime
);
637 printf(" findPointProj: %12.2f us per op\n",
638 elapsed
/ loopsSlow
);
640 #endif /* CRYPTKIT_ELL_PROJ_ENABLE */
647 for(i
=0; i
<numGiants
; i
++) {
648 freeGiant(giants
[i
]);