]> git.saurik.com Git - apple/security.git/blob - libsecurity_apple_csp/lib/rijndael-alg-ref.c
Security-55163.44.tar.gz
[apple/security.git] / libsecurity_apple_csp / lib / rijndael-alg-ref.c
1 /*
2 * Copyright (c) 2000-2001 Apple Computer, Inc. All Rights Reserved.
3 *
4 * The contents of this file constitute Original Code as defined in and are
5 * subject to the Apple Public Source License Version 1.2 (the 'License').
6 * You may not use this file except in compliance with the License. Please obtain
7 * a copy of the License at http://www.apple.com/publicsource and read it before
8 * using this file.
9 *
10 * This Original Code and all software distributed under the License are
11 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS
12 * OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, INCLUDING WITHOUT
13 * LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
14 * PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. Please see the License for the
15 * specific language governing rights and limitations under the License.
16 */
17
18
19 /* rijndael-alg-ref.c v2.0 August '99
20 * Reference ANSI C code
21 * authors: Paulo Barreto
22 * Vincent Rijmen
23 *
24 * PPC and 128-bit block optimization by Doug Mitchell May 2001.
25 */
26
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30
31 #include "rijndael-alg-ref.h"
32 #include <cspdebugging.h>
33
34 #define SC ((BC - 4) >> 1)
35
36 #include "boxes-ref.h"
37
38 static const word8 shifts[3][4][2] = {
39 { { 0, 0 },
40 { 1, 3 },
41 { 2, 2 },
42 { 3, 1 }
43 },
44 { { 0, 0 },
45 { 1, 5 },
46 { 2, 4 },
47 { 3, 3 }
48 },
49 { { 0, 0 },
50 { 1, 7 },
51 { 3, 5 },
52 { 4, 4 }
53 }
54 };
55
56 #if !GLADMAN_AES_128_ENABLE
57
58 /* 128 bit key/word shift table in bits */
59 static const word8 shifts128[4][2] = {
60 { 0, 0 },
61 { 8, 24 },
62 { 16, 16 },
63 { 24, 8 }
64 };
65
66 #endif /* GLADMAN_AES_128_ENABLE */
67
68 #if !AES_MUL_BY_LOOKUP
69 /*
70 * Profiling measurements showed that the mul routine is where a large propertion of
71 * the time is spent. Since the first argument to mul is always one of six
72 * constants (2, 3, 0xe, etc.), we implement six 256x256 byte lookup tables to
73 * do the multiplies. This eliminates the need for the log/antilog tables, so
74 * it's only adding one kilobyte of const data. Throughput improvement for this
75 * mod is a factor of 3.3 for encrypt and 4.1 for decrypt in the 128-bit optimized
76 * case. Improvement for the general case (with a 256 bit key) is 1.46 for encrypt
77 * and 1.88 for decrypt. (Decrypt wins more for this enhancement because the
78 * InvMixColumn does four muls, vs. 2 muls for MixColumn). Measurements taken
79 * on a 500 MHz G4 with 1 MB of L2 cache.
80 */
81
82 /*
83 * The mod 255 op in mul is really expensive...
84 *
85 * We know that b <= (254 * 2), so there are only two cases. Either return b,
86 * or return b-255.
87 *
88 * On a G4 this single optimization results in a 24% speedup for encrypt and
89 * a 25% speedup for decrypt.
90 */
91 static inline word8 mod255(word32 b)
92 {
93 if(b >= 255) {
94 b -= 255;
95 }
96 return b;
97 }
98
99 word8 mul(word8 a, word8 b) {
100 /* multiply two elements of GF(2^m)
101 * needed for MixColumn and InvMixColumn
102 */
103 if (a && b) return Alogtable[mod255(Logtable[a] + Logtable[b])];
104 else return 0;
105 }
106 #endif /* !AES_MUL_BY_LOOKUP */
107
108 void KeyAddition(word8 a[4][MAXBC], word8 rk[4][MAXBC], word8 BC) {
109 /* Exor corresponding text input and round key input bytes
110 */
111 int i, j;
112
113 for(i = 0; i < 4; i++)
114 for(j = 0; j < BC; j++) a[i][j] ^= rk[i][j];
115 }
116
117 void ShiftRow(word8 a[4][MAXBC], word8 d, word8 BC) {
118 /* Row 0 remains unchanged
119 * The other three rows are shifted a variable amount
120 */
121 word8 tmp[MAXBC];
122 int i, j;
123
124 for(i = 1; i < 4; i++) {
125 for(j = 0; j < BC; j++) tmp[j] = a[i][(j + shifts[SC][i][d]) % BC];
126 for(j = 0; j < BC; j++) a[i][j] = tmp[j];
127 }
128 }
129
130 void Substitution(word8 a[4][MAXBC], const word8 box[256], word8 BC) {
131 /* Replace every byte of the input by the byte at that place
132 * in the nonlinear S-box
133 */
134 int i, j;
135
136 for(i = 0; i < 4; i++)
137 for(j = 0; j < BC; j++) a[i][j] = box[a[i][j]] ;
138 }
139
140 void MixColumn(word8 a[4][MAXBC], word8 BC) {
141 /* Mix the four bytes of every column in a linear way
142 */
143 word8 b[4][MAXBC];
144 int i, j;
145
146 for(j = 0; j < BC; j++) {
147 for(i = 0; i < 4; i++) {
148 #if AES_MUL_BY_LOOKUP
149 b[i][j] = mulBy0x02[a[i][j]]
150 ^ mulBy0x03[a[(i + 1) % 4][j]]
151 ^ a[(i + 2) % 4][j]
152 ^ a[(i + 3) % 4][j];
153 #else
154 b[i][j] = mul(2,a[i][j])
155 ^ mul(3,a[(i + 1) % 4][j])
156 ^ a[(i + 2) % 4][j]
157 ^ a[(i + 3) % 4][j];
158 #endif
159 }
160 }
161 for(i = 0; i < 4; i++) {
162 for(j = 0; j < BC; j++) a[i][j] = b[i][j];
163 }
164 }
165
166 void InvMixColumn(word8 a[4][MAXBC], word8 BC) {
167 /* Mix the four bytes of every column in a linear way
168 * This is the opposite operation of Mixcolumn
169 */
170 word8 b[4][MAXBC];
171 int i, j;
172
173 for(j = 0; j < BC; j++) {
174 for(i = 0; i < 4; i++) {
175 #if AES_MUL_BY_LOOKUP
176 b[i][j] = mulBy0x0e[a[i][j]]
177 ^ mulBy0x0b[a[(i + 1) % 4][j]]
178 ^ mulBy0x0d[a[(i + 2) % 4][j]]
179 ^ mulBy0x09[a[(i + 3) % 4][j]];
180 #else
181 b[i][j] = mul(0xe,a[i][j])
182 ^ mul(0xb,a[(i + 1) % 4][j])
183 ^ mul(0xd,a[(i + 2) % 4][j])
184 ^ mul(0x9,a[(i + 3) % 4][j]);
185 #endif
186 }
187 }
188 for(i = 0; i < 4; i++) {
189 for(j = 0; j < BC; j++) a[i][j] = b[i][j];
190 }
191 }
192
193 int rijndaelKeySched (
194 word8 k[4][MAXKC],
195 int keyBits,
196 int blockBits,
197 word8 W[MAXROUNDS+1][4][MAXBC]) {
198
199 /* Calculate the necessary round keys
200 * The number of calculations depends on keyBits and blockBits
201 */
202 int KC, BC, ROUNDS;
203 int i, j, t, rconpointer = 0;
204 word8 tk[4][MAXKC];
205
206 switch (keyBits) {
207 case 128: KC = 4; break;
208 case 192: KC = 6; break;
209 case 256: KC = 8; break;
210 default : return (-1);
211 }
212
213 switch (blockBits) {
214 case 128: BC = 4; break;
215 case 192: BC = 6; break;
216 case 256: BC = 8; break;
217 default : return (-2);
218 }
219
220 switch (keyBits >= blockBits ? keyBits : blockBits) {
221 case 128: ROUNDS = 10; break;
222 case 192: ROUNDS = 12; break;
223 case 256: ROUNDS = 14; break;
224 default : return (-3); /* this cannot happen */
225 }
226
227
228 for(j = 0; j < KC; j++)
229 for(i = 0; i < 4; i++)
230 tk[i][j] = k[i][j];
231 t = 0;
232 /* copy values into round key array */
233 for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++)
234 for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk[i][j];
235
236 while (t < (ROUNDS+1)*BC) { /* while not enough round key material calculated */
237 /* calculate new values */
238 for(i = 0; i < 4; i++)
239 tk[i][0] ^= S[tk[(i+1)%4][KC-1]];
240 tk[0][0] ^= rcon[rconpointer++];
241
242 if (KC != 8)
243 for(j = 1; j < KC; j++)
244 for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
245 else {
246 for(j = 1; j < KC/2; j++)
247 for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
248 for(i = 0; i < 4; i++) tk[i][KC/2] ^= S[tk[i][KC/2 - 1]];
249 for(j = KC/2 + 1; j < KC; j++)
250 for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
251 }
252 /* copy values into round key array */
253 for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++)
254 for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk[i][j];
255 }
256
257 return 0;
258 }
259
260 int rijndaelEncrypt (
261 word8 a[4][MAXBC],
262 int keyBits,
263 int blockBits,
264 word8 rk[MAXROUNDS+1][4][MAXBC])
265 {
266 /* Encryption of one block, general case.
267 */
268 int r, BC, ROUNDS;
269
270 switch (blockBits) {
271 case 128: BC = 4; break;
272 case 192: BC = 6; break;
273 case 256: BC = 8; break;
274 default : return (-2);
275 }
276
277 switch (keyBits >= blockBits ? keyBits : blockBits) {
278 case 128: ROUNDS = 10; break;
279 case 192: ROUNDS = 12; break;
280 case 256: ROUNDS = 14; break;
281 default : return (-3); /* this cannot happen */
282 }
283
284 /* begin with a key addition
285 */
286 KeyAddition(a,rk[0],BC);
287
288 /* ROUNDS-1 ordinary rounds
289 */
290 for(r = 1; r < ROUNDS; r++) {
291 Substitution(a,S,BC);
292 ShiftRow(a,0,BC);
293 MixColumn(a,BC);
294 KeyAddition(a,rk[r],BC);
295 }
296
297 /* Last round is special: there is no MixColumn
298 */
299 Substitution(a,S,BC);
300 ShiftRow(a,0,BC);
301 KeyAddition(a,rk[ROUNDS],BC);
302
303 return 0;
304 }
305
306 int rijndaelDecrypt (
307 word8 a[4][MAXBC],
308 int keyBits,
309 int blockBits,
310 word8 rk[MAXROUNDS+1][4][MAXBC])
311 {
312 int r, BC, ROUNDS;
313
314 switch (blockBits) {
315 case 128: BC = 4; break;
316 case 192: BC = 6; break;
317 case 256: BC = 8; break;
318 default : return (-2);
319 }
320
321 switch (keyBits >= blockBits ? keyBits : blockBits) {
322 case 128: ROUNDS = 10; break;
323 case 192: ROUNDS = 12; break;
324 case 256: ROUNDS = 14; break;
325 default : return (-3); /* this cannot happen */
326 }
327
328 /* To decrypt: apply the inverse operations of the encrypt routine,
329 * in opposite order
330 *
331 * (KeyAddition is an involution: it 's equal to its inverse)
332 * (the inverse of Substitution with table S is Substitution with the
333 * inverse table of S)
334 * (the inverse of Shiftrow is Shiftrow over a suitable distance)
335 */
336
337 /* First the special round:
338 * without InvMixColumn
339 * with extra KeyAddition
340 */
341 KeyAddition(a,rk[ROUNDS],BC);
342 Substitution(a,Si,BC);
343 ShiftRow(a,1,BC);
344
345 /* ROUNDS-1 ordinary rounds
346 */
347 for(r = ROUNDS-1; r > 0; r--) {
348 KeyAddition(a,rk[r],BC);
349 InvMixColumn(a,BC);
350 Substitution(a,Si,BC);
351 ShiftRow(a,1,BC);
352 }
353
354 /* End with the extra key addition
355 */
356
357 KeyAddition(a,rk[0],BC);
358
359 return 0;
360 }
361
362 #if !GLADMAN_AES_128_ENABLE
363
364 /*
365 * All of these 128-bit-key-and-block routines require 32-bit word-aligned
366 * char array pointers.ÊThe key schedule arrays are easy; they come from
367 * keyInstance which has a 4-byte-aligned element preceeding the key schedule.
368 * Others require manual alignment of a local variable by the caller.
369 */
370
371 static inline void KeyAddition128(
372 word8 a[4][BC_128_OPT],
373 word8 rk[4][MAXBC]) {
374
375 /* these casts are endian-independent */
376 ((word32 *)a)[0] ^= *((word32 *)(&rk[0]));
377 ((word32 *)a)[1] ^= *((word32 *)(&rk[1]));
378 ((word32 *)a)[2] ^= *((word32 *)(&rk[2]));
379 ((word32 *)a)[3] ^= *((word32 *)(&rk[3]));
380 }
381
382 static void Substitution128(
383 word8 a[4][BC_128_OPT],
384 const word8 box[256]) {
385 /* Replace every byte of the input by the byte at that place
386 * in the nonlinear S-box
387 */
388 int i, j;
389
390 /* still to be optimized - larger S boxes? */
391 for(i = 0; i < 4; i++) {
392 for(j = 0; j < BC_128_OPT; j++) {
393 a[i][j] = box[a[i][j]];
394 }
395 }
396 }
397
398 #if defined(__ppc__) && defined(__GNUC__)
399
400 static inline void rotateWordLeft(
401 word8 *word, // known to be word aligned
402 unsigned rotCount) // in bits
403 {
404 word32 lword = *((word32 *)word);
405 asm("rlwnm %0,%1,%2,0,31" : "=r"(lword) : "0"(lword), "r"(rotCount));
406 *((word32 *)word) = lword;
407 }
408
409 #else
410
411 /*
412 * Insert your machine/compiler dependent code here,
413 * or just use this, which works on any platform and compiler
414 * which supports the __attribute__((aligned(4))) directive.
415 */
416 static void rotateWordLeft(
417 word8 *word, // known to be word aligned
418 unsigned rotCount) // in bits
419 {
420 word8 tmp[BC_128_OPT] __attribute__((aligned(4)));
421 unsigned bytes = rotCount / 8;
422
423 tmp[0] = word[bytes & (BC_128_OPT-1)];
424 tmp[1] = word[(1+bytes) & (BC_128_OPT-1)];
425 tmp[2] = word[(2+bytes) & (BC_128_OPT-1)];
426 tmp[3] = word[(3+bytes) & (BC_128_OPT-1)];
427 *((word32 *)word) = *((word32 *)tmp);
428 }
429 #endif
430
431 static inline void ShiftRow128(
432 word8 a[4][BC_128_OPT],
433 word8 d) {
434 /* Row 0 remains unchanged
435 * The other three rows are shifted (actually rotated) a variable amount
436 */
437 int i;
438
439 for(i = 1; i < 4; i++) {
440 rotateWordLeft(a[i], shifts128[i][d]);
441 }
442 }
443
444 /*
445 * The following two routines are where most of the time is spent in this
446 * module. Further optimization would have to focus here.
447 */
448 static void MixColumn128(word8 a[4][BC_128_OPT]) {
449 /* Mix the four bytes of every column in a linear way
450 */
451 word8 b[4][BC_128_OPT];
452 int i, j;
453
454 for(j = 0; j < BC_128_OPT; j++) {
455 for(i = 0; i < 4; i++) {
456 #if AES_MUL_BY_LOOKUP
457 b[i][j] = mulBy0x02[a[i][j]]
458 ^ mulBy0x03[a[(i + 1) % 4][j]]
459 ^ a[(i + 2) % 4][j]
460 ^ a[(i + 3) % 4][j];
461 #else
462 b[i][j] = mul(2,a[i][j])
463 ^ mul(3,a[(i + 1) % 4][j])
464 ^ a[(i + 2) % 4][j]
465 ^ a[(i + 3) % 4][j];
466 #endif
467 }
468 }
469 memmove(a, b, 4 * BC_128_OPT);
470 }
471
472 static void InvMixColumn128(word8 a[4][BC_128_OPT]) {
473 /* Mix the four bytes of every column in a linear way
474 * This is the opposite operation of Mixcolumn
475 */
476 word8 b[4][BC_128_OPT];
477 int i, j;
478
479 for(j = 0; j < BC_128_OPT; j++) {
480 for(i = 0; i < 4; i++) {
481 #if AES_MUL_BY_LOOKUP
482 b[i][j] = mulBy0x0e[a[i][j]]
483 ^ mulBy0x0b[a[(i + 1) % 4][j]]
484 ^ mulBy0x0d[a[(i + 2) % 4][j]]
485 ^ mulBy0x09[a[(i + 3) % 4][j]];
486 #else
487 b[i][j] = mul(0xe,a[i][j])
488 ^ mul(0xb,a[(i + 1) % 4][j])
489 ^ mul(0xd,a[(i + 2) % 4][j])
490 ^ mul(0x9,a[(i + 3) % 4][j]);
491 #endif
492 }
493 }
494 memmove(a, b, 4 * BC_128_OPT);
495 }
496
497 int rijndaelKeySched128 (
498 word8 k[4][KC_128_OPT],
499 word8 W[MAXROUNDS+1][4][MAXBC]) {
500
501 /* Calculate the necessary round keys
502 * The number of calculations depends on keyBits and blockBits
503 */
504 int i, j, t, rconpointer = 0;
505 word8 tk[4][KC_128_OPT];
506 unsigned numSchedRows = (ROUNDS_128_OPT + 1) * BC_128_OPT;
507
508 for(j = 0; j < KC_128_OPT; j++)
509 for(i = 0; i < 4; i++)
510 tk[i][j] = k[i][j];
511 t = 0;
512 /* copy values into round key array */
513 for(j = 0; (j < KC_128_OPT) && (t < numSchedRows); j++, t++) {
514 for(i = 0; i < 4; i++) {
515 W[t / BC_128_OPT][i][t % BC_128_OPT] = tk[i][j];
516 }
517 }
518
519 while (t < numSchedRows) {
520 /* while not enough round key material calculated */
521 /* calculate new values */
522 for(i = 0; i < 4; i++) {
523 tk[i][0] ^= S[tk[(i+1)%4][KC_128_OPT-1]];
524 }
525 tk[0][0] ^= rcon[rconpointer++];
526
527 for(j = 1; j < KC_128_OPT; j++) {
528 for(i = 0; i < 4; i++) {
529 tk[i][j] ^= tk[i][j-1];
530 }
531 }
532
533 /* copy values into round key array */
534 for(j = 0; (j < KC_128_OPT) && (t < numSchedRows); j++, t++) {
535 for(i = 0; i < 4; i++) {
536 W[t / BC_128_OPT][i][t % BC_128_OPT] = tk[i][j];
537 }
538 }
539 }
540
541 return 0;
542 }
543
544 int rijndaelEncrypt128 (
545 word8 a[4][BC_128_OPT],
546 word8 rk[MAXROUNDS+1][4][MAXBC])
547 {
548 /* Encryption of one block.
549 */
550 int r;
551
552 /* begin with a key addition
553 */
554 KeyAddition128(a,rk[0]);
555
556 /* ROUNDS-1 ordinary rounds
557 */
558 for(r = 1; r < ROUNDS_128_OPT; r++) {
559 Substitution128(a,S);
560 ShiftRow128(a,0);
561 MixColumn128(a);
562 KeyAddition128(a,rk[r]);
563 }
564
565 /* Last round is special: there is no MixColumn
566 */
567 Substitution128(a,S);
568 ShiftRow128(a,0);
569 KeyAddition128(a,rk[ROUNDS_128_OPT]);
570
571 return 0;
572 }
573
574 int rijndaelDecrypt128 (
575 word8 a[4][BC_128_OPT],
576 word8 rk[MAXROUNDS+1][4][MAXBC])
577 {
578 int r;
579
580 /* To decrypt: apply the inverse operations of the encrypt routine,
581 * in opposite order
582 *
583 * (KeyAddition is an involution: it 's equal to its inverse)
584 * (the inverse of Substitution with table S is Substitution with the
585 * inverse table of S)
586 * (the inverse of Shiftrow is Shiftrow over a suitable distance)
587 */
588
589 /* First the special round:
590 * without InvMixColumn
591 * with extra KeyAddition
592 */
593 KeyAddition128(a,rk[ROUNDS_128_OPT]);
594 Substitution128(a,Si);
595 ShiftRow128(a,1);
596
597 /* ROUNDS-1 ordinary rounds
598 */
599 for(r = ROUNDS_128_OPT-1; r > 0; r--) {
600 KeyAddition128(a,rk[r]);
601 InvMixColumn128(a);
602 Substitution128(a,Si);
603 ShiftRow128(a,1);
604 }
605
606 /* End with the extra key addition
607 */
608
609 KeyAddition128(a,rk[0]);
610
611 return 0;
612 }
613
614 #endif /* !GLADMAN_AES_128_ENABLE */
615