Security-30.1.tar.gz
[apple/security.git] / AppleCSP / AES / rijndael-alg-ref.c
1 /*
2 * Copyright (c) 2000-2001 Apple Computer, Inc. All Rights Reserved.
3 *
4 * The contents of this file constitute Original Code as defined in and are
5 * subject to the Apple Public Source License Version 1.2 (the 'License').
6 * You may not use this file except in compliance with the License. Please obtain
7 * a copy of the License at http://www.apple.com/publicsource and read it before
8 * using this file.
9 *
10 * This Original Code and all software distributed under the License are
11 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS
12 * OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, INCLUDING WITHOUT
13 * LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
14 * PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. Please see the License for the
15 * specific language governing rights and limitations under the License.
16 */
17
18
19 /* rijndael-alg-ref.c v2.0 August '99
20 * Reference ANSI C code
21 * authors: Paulo Barreto
22 * Vincent Rijmen
23 *
24 * PPC and 128-bit block optimization by Doug Mitchell May 2001.
25 */
26
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30
31 #include "rijndael-alg-ref.h"
32 #include <AppleCSP/cspdebugging.h>
33
34 #define SC ((BC - 4) >> 1)
35
36 #include "boxes-ref.h"
37
38 static const word8 shifts[3][4][2] = {
39 { { 0, 0 },
40 { 1, 3 },
41 { 2, 2 },
42 { 3, 1 }
43 },
44 { { 0, 0 },
45 { 1, 5 },
46 { 2, 4 },
47 { 3, 3 }
48 },
49 { { 0, 0 },
50 { 1, 7 },
51 { 3, 5 },
52 { 4, 4 }
53 }
54 };
55
56 /* 128 bit key/word shift table in bits */
57 static const word8 shifts128[4][2] = {
58 { 0, 0 },
59 { 8, 24 },
60 { 16, 16 },
61 { 24, 8 }
62 };
63
64 #if !AES_MUL_BY_LOOKUP
65 /*
66 * Profiling measurements showed that the mul routine is where a large propertion of
67 * the time is spent. Since the first argument to mul is always one of six
68 * constants (2, 3, 0xe, etc.), we implement six 256x256 byte lookup tables to
69 * do the multiplies. This eliminates the need for the log/antilog tables, so
70 * it's only adding one kilobyte of const data. Throughput improvement for this
71 * mod is a factor of 3.3 for encrypt and 4.1 for decrypt in the 128-bit optimized
72 * case. Improvement for the general case (with a 256 bit key) is 1.46 for encrypt
73 * and 1.88 for decrypt. (Decrypt wins more for this enhancement because the
74 * InvMixColumn does four muls, vs. 2 muls for MixColumn). Measurements taken
75 * on a 500 MHz G4 with 1 MB of L2 cache.
76
77 /*
78 * The mod 255 op in mul is really expensive...
79 *
80 * We know that b <= (254 * 2), so there are only two cases. Either return b,
81 * or return b-255.
82 *
83 * On a G4 this single optimization results in a 24% speedup for encrypt and
84 * a 25% speedup for decrypt.
85 */
86 static inline word8 mod255(word32 b)
87 {
88 if(b >= 255) {
89 b -= 255;
90 }
91 return b;
92 }
93
94 word8 mul(word8 a, word8 b) {
95 /* multiply two elements of GF(2^m)
96 * needed for MixColumn and InvMixColumn
97 */
98 if (a && b) return Alogtable[mod255(Logtable[a] + Logtable[b])];
99 else return 0;
100 }
101 #endif /* !AES_MUL_BY_LOOKUP */
102
103 void KeyAddition(word8 a[4][MAXBC], word8 rk[4][MAXBC], word8 BC) {
104 /* Exor corresponding text input and round key input bytes
105 */
106 int i, j;
107
108 for(i = 0; i < 4; i++)
109 for(j = 0; j < BC; j++) a[i][j] ^= rk[i][j];
110 }
111
112 void ShiftRow(word8 a[4][MAXBC], word8 d, word8 BC) {
113 /* Row 0 remains unchanged
114 * The other three rows are shifted a variable amount
115 */
116 word8 tmp[MAXBC];
117 int i, j;
118
119 for(i = 1; i < 4; i++) {
120 for(j = 0; j < BC; j++) tmp[j] = a[i][(j + shifts[SC][i][d]) % BC];
121 for(j = 0; j < BC; j++) a[i][j] = tmp[j];
122 }
123 }
124
125 void Substitution(word8 a[4][MAXBC], const word8 box[256], word8 BC) {
126 /* Replace every byte of the input by the byte at that place
127 * in the nonlinear S-box
128 */
129 int i, j;
130
131 for(i = 0; i < 4; i++)
132 for(j = 0; j < BC; j++) a[i][j] = box[a[i][j]] ;
133 }
134
135 void MixColumn(word8 a[4][MAXBC], word8 BC) {
136 /* Mix the four bytes of every column in a linear way
137 */
138 word8 b[4][MAXBC];
139 int i, j;
140
141 for(j = 0; j < BC; j++) {
142 for(i = 0; i < 4; i++) {
143 #if AES_MUL_BY_LOOKUP
144 b[i][j] = mulBy0x02[a[i][j]]
145 ^ mulBy0x03[a[(i + 1) % 4][j]]
146 ^ a[(i + 2) % 4][j]
147 ^ a[(i + 3) % 4][j];
148 #else
149 b[i][j] = mul(2,a[i][j])
150 ^ mul(3,a[(i + 1) % 4][j])
151 ^ a[(i + 2) % 4][j]
152 ^ a[(i + 3) % 4][j];
153 #endif
154 }
155 }
156 for(i = 0; i < 4; i++) {
157 for(j = 0; j < BC; j++) a[i][j] = b[i][j];
158 }
159 }
160
161 void InvMixColumn(word8 a[4][MAXBC], word8 BC) {
162 /* Mix the four bytes of every column in a linear way
163 * This is the opposite operation of Mixcolumn
164 */
165 word8 b[4][MAXBC];
166 int i, j;
167
168 for(j = 0; j < BC; j++) {
169 for(i = 0; i < 4; i++) {
170 #if AES_MUL_BY_LOOKUP
171 b[i][j] = mulBy0x0e[a[i][j]]
172 ^ mulBy0x0b[a[(i + 1) % 4][j]]
173 ^ mulBy0x0d[a[(i + 2) % 4][j]]
174 ^ mulBy0x09[a[(i + 3) % 4][j]];
175 #else
176 b[i][j] = mul(0xe,a[i][j])
177 ^ mul(0xb,a[(i + 1) % 4][j])
178 ^ mul(0xd,a[(i + 2) % 4][j])
179 ^ mul(0x9,a[(i + 3) % 4][j]);
180 #endif
181 }
182 }
183 for(i = 0; i < 4; i++) {
184 for(j = 0; j < BC; j++) a[i][j] = b[i][j];
185 }
186 }
187
188 int rijndaelKeySched (
189 word8 k[4][MAXKC],
190 int keyBits,
191 int blockBits,
192 word8 W[MAXROUNDS+1][4][MAXBC]) {
193
194 /* Calculate the necessary round keys
195 * The number of calculations depends on keyBits and blockBits
196 */
197 int KC, BC, ROUNDS;
198 int i, j, t, rconpointer = 0;
199 word8 tk[4][MAXKC];
200
201 switch (keyBits) {
202 case 128: KC = 4; break;
203 case 192: KC = 6; break;
204 case 256: KC = 8; break;
205 default : return (-1);
206 }
207
208 switch (blockBits) {
209 case 128: BC = 4; break;
210 case 192: BC = 6; break;
211 case 256: BC = 8; break;
212 default : return (-2);
213 }
214
215 switch (keyBits >= blockBits ? keyBits : blockBits) {
216 case 128: ROUNDS = 10; break;
217 case 192: ROUNDS = 12; break;
218 case 256: ROUNDS = 14; break;
219 default : return (-3); /* this cannot happen */
220 }
221
222
223 for(j = 0; j < KC; j++)
224 for(i = 0; i < 4; i++)
225 tk[i][j] = k[i][j];
226 t = 0;
227 /* copy values into round key array */
228 for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++)
229 for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk[i][j];
230
231 while (t < (ROUNDS+1)*BC) { /* while not enough round key material calculated */
232 /* calculate new values */
233 for(i = 0; i < 4; i++)
234 tk[i][0] ^= S[tk[(i+1)%4][KC-1]];
235 tk[0][0] ^= rcon[rconpointer++];
236
237 if (KC != 8)
238 for(j = 1; j < KC; j++)
239 for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
240 else {
241 for(j = 1; j < KC/2; j++)
242 for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
243 for(i = 0; i < 4; i++) tk[i][KC/2] ^= S[tk[i][KC/2 - 1]];
244 for(j = KC/2 + 1; j < KC; j++)
245 for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
246 }
247 /* copy values into round key array */
248 for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++)
249 for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk[i][j];
250 }
251
252 return 0;
253 }
254
255 int rijndaelEncrypt (
256 word8 a[4][MAXBC],
257 int keyBits,
258 int blockBits,
259 word8 rk[MAXROUNDS+1][4][MAXBC])
260 {
261 /* Encryption of one block, general case.
262 */
263 int r, BC, ROUNDS;
264
265 switch (blockBits) {
266 case 128: BC = 4; break;
267 case 192: BC = 6; break;
268 case 256: BC = 8; break;
269 default : return (-2);
270 }
271
272 switch (keyBits >= blockBits ? keyBits : blockBits) {
273 case 128: ROUNDS = 10; break;
274 case 192: ROUNDS = 12; break;
275 case 256: ROUNDS = 14; break;
276 default : return (-3); /* this cannot happen */
277 }
278
279 /* begin with a key addition
280 */
281 KeyAddition(a,rk[0],BC);
282
283 /* ROUNDS-1 ordinary rounds
284 */
285 for(r = 1; r < ROUNDS; r++) {
286 Substitution(a,S,BC);
287 ShiftRow(a,0,BC);
288 MixColumn(a,BC);
289 KeyAddition(a,rk[r],BC);
290 }
291
292 /* Last round is special: there is no MixColumn
293 */
294 Substitution(a,S,BC);
295 ShiftRow(a,0,BC);
296 KeyAddition(a,rk[ROUNDS],BC);
297
298 return 0;
299 }
300
301 int rijndaelDecrypt (
302 word8 a[4][MAXBC],
303 int keyBits,
304 int blockBits,
305 word8 rk[MAXROUNDS+1][4][MAXBC])
306 {
307 int r, BC, ROUNDS;
308
309 switch (blockBits) {
310 case 128: BC = 4; break;
311 case 192: BC = 6; break;
312 case 256: BC = 8; break;
313 default : return (-2);
314 }
315
316 switch (keyBits >= blockBits ? keyBits : blockBits) {
317 case 128: ROUNDS = 10; break;
318 case 192: ROUNDS = 12; break;
319 case 256: ROUNDS = 14; break;
320 default : return (-3); /* this cannot happen */
321 }
322
323 /* To decrypt: apply the inverse operations of the encrypt routine,
324 * in opposite order
325 *
326 * (KeyAddition is an involution: it 's equal to its inverse)
327 * (the inverse of Substitution with table S is Substitution with the
328 * inverse table of S)
329 * (the inverse of Shiftrow is Shiftrow over a suitable distance)
330 */
331
332 /* First the special round:
333 * without InvMixColumn
334 * with extra KeyAddition
335 */
336 KeyAddition(a,rk[ROUNDS],BC);
337 Substitution(a,Si,BC);
338 ShiftRow(a,1,BC);
339
340 /* ROUNDS-1 ordinary rounds
341 */
342 for(r = ROUNDS-1; r > 0; r--) {
343 KeyAddition(a,rk[r],BC);
344 InvMixColumn(a,BC);
345 Substitution(a,Si,BC);
346 ShiftRow(a,1,BC);
347 }
348
349 /* End with the extra key addition
350 */
351
352 KeyAddition(a,rk[0],BC);
353
354 return 0;
355 }
356
357 /*
358 * All of these 128-bit-key-and-block routines require 32-bit word-aligned
359 * char array pointers.ÊThe key schedule arrays are easy; they come from
360 * keyInstance which has a 4-byte-aligned element preceeding the key schedule.
361 * Others require manual alignment of a local variable by the caller.
362 */
363
364 static inline void KeyAddition128(
365 word8 a[4][BC_128_OPT],
366 word8 rk[4][MAXBC]) {
367
368 /* these casts are endian-independent */
369 ((word32 *)a)[0] ^= *((word32 *)(&rk[0]));
370 ((word32 *)a)[1] ^= *((word32 *)(&rk[1]));
371 ((word32 *)a)[2] ^= *((word32 *)(&rk[2]));
372 ((word32 *)a)[3] ^= *((word32 *)(&rk[3]));
373 }
374
375 static void Substitution128(
376 word8 a[4][BC_128_OPT],
377 const word8 box[256]) {
378 /* Replace every byte of the input by the byte at that place
379 * in the nonlinear S-box
380 */
381 int i, j;
382
383 /* still to be optimized - larger S boxes? */
384 for(i = 0; i < 4; i++) {
385 for(j = 0; j < BC_128_OPT; j++) {
386 a[i][j] = box[a[i][j]];
387 }
388 }
389 }
390
391 #if defined(__ppc__) && defined(__GNUC__)
392
393 static inline void rotateWordLeft(
394 word8 *word, // known to be word aligned
395 unsigned rotCount) // in bits
396 {
397 word32 lword = *((word32 *)word);
398 asm("rlwnm %0,%1,%2,0,31" : "=r"(lword) : "0"(lword), "r"(rotCount));
399 *((word32 *)word) = lword;
400 }
401
402 #else
403
404 /*
405 * Insert your machine/compiler dependent code here,
406 * or just use this, which works on any platform and compiler
407 * which supports the __attribute__((aligned(4))) directive.
408 */
409 static void rotateWordLeft(
410 word8 *word, // known to be word aligned
411 unsigned rotCount) // in bits
412 {
413 word8 tmp[BC_128_OPT] __attribute__((aligned(4)));
414 unsigned bytes = rotCount / 8;
415
416 tmp[0] = word[bytes & (BC_128_OPT-1)];
417 tmp[1] = word[(1+bytes) & (BC_128_OPT-1)];
418 tmp[2] = word[(2+bytes) & (BC_128_OPT-1)];
419 tmp[3] = word[(3+bytes) & (BC_128_OPT-1)];
420 *((word32 *)word) = *((word32 *)tmp);
421 }
422 #endif
423
424 static inline void ShiftRow128(
425 word8 a[4][BC_128_OPT],
426 word8 d) {
427 /* Row 0 remains unchanged
428 * The other three rows are shifted (actually rotated) a variable amount
429 */
430 int i;
431
432 for(i = 1; i < 4; i++) {
433 rotateWordLeft(a[i], shifts128[i][d]);
434 }
435 }
436
437 /*
438 * The following two routines are where most of the time is spent in this
439 * module. Further optimization would have to focus here.
440 */
441 static void MixColumn128(word8 a[4][BC_128_OPT]) {
442 /* Mix the four bytes of every column in a linear way
443 */
444 word8 b[4][BC_128_OPT];
445 int i, j;
446
447 for(j = 0; j < BC_128_OPT; j++) {
448 for(i = 0; i < 4; i++) {
449 #if AES_MUL_BY_LOOKUP
450 b[i][j] = mulBy0x02[a[i][j]]
451 ^ mulBy0x03[a[(i + 1) % 4][j]]
452 ^ a[(i + 2) % 4][j]
453 ^ a[(i + 3) % 4][j];
454 #else
455 b[i][j] = mul(2,a[i][j])
456 ^ mul(3,a[(i + 1) % 4][j])
457 ^ a[(i + 2) % 4][j]
458 ^ a[(i + 3) % 4][j];
459 #endif
460 }
461 }
462 memmove(a, b, 4 * BC_128_OPT);
463 }
464
465 static void InvMixColumn128(word8 a[4][BC_128_OPT]) {
466 /* Mix the four bytes of every column in a linear way
467 * This is the opposite operation of Mixcolumn
468 */
469 word8 b[4][BC_128_OPT];
470 int i, j;
471
472 for(j = 0; j < BC_128_OPT; j++) {
473 for(i = 0; i < 4; i++) {
474 #if AES_MUL_BY_LOOKUP
475 b[i][j] = mulBy0x0e[a[i][j]]
476 ^ mulBy0x0b[a[(i + 1) % 4][j]]
477 ^ mulBy0x0d[a[(i + 2) % 4][j]]
478 ^ mulBy0x09[a[(i + 3) % 4][j]];
479 #else
480 b[i][j] = mul(0xe,a[i][j])
481 ^ mul(0xb,a[(i + 1) % 4][j])
482 ^ mul(0xd,a[(i + 2) % 4][j])
483 ^ mul(0x9,a[(i + 3) % 4][j]);
484 #endif
485 }
486 }
487 memmove(a, b, 4 * BC_128_OPT);
488 }
489
490 int rijndaelKeySched128 (
491 word8 k[4][KC_128_OPT],
492 word8 W[MAXROUNDS+1][4][MAXBC]) {
493
494 /* Calculate the necessary round keys
495 * The number of calculations depends on keyBits and blockBits
496 */
497 int i, j, t, rconpointer = 0;
498 word8 tk[4][KC_128_OPT];
499 unsigned numSchedRows = (ROUNDS_128_OPT + 1) * BC_128_OPT;
500
501 for(j = 0; j < KC_128_OPT; j++)
502 for(i = 0; i < 4; i++)
503 tk[i][j] = k[i][j];
504 t = 0;
505 /* copy values into round key array */
506 for(j = 0; (j < KC_128_OPT) && (t < numSchedRows); j++, t++) {
507 for(i = 0; i < 4; i++) {
508 W[t / BC_128_OPT][i][t % BC_128_OPT] = tk[i][j];
509 }
510 }
511
512 while (t < numSchedRows) {
513 /* while not enough round key material calculated */
514 /* calculate new values */
515 for(i = 0; i < 4; i++) {
516 tk[i][0] ^= S[tk[(i+1)%4][KC_128_OPT-1]];
517 }
518 tk[0][0] ^= rcon[rconpointer++];
519
520 for(j = 1; j < KC_128_OPT; j++) {
521 for(i = 0; i < 4; i++) {
522 tk[i][j] ^= tk[i][j-1];
523 }
524 }
525
526 /* copy values into round key array */
527 for(j = 0; (j < KC_128_OPT) && (t < numSchedRows); j++, t++) {
528 for(i = 0; i < 4; i++) {
529 W[t / BC_128_OPT][i][t % BC_128_OPT] = tk[i][j];
530 }
531 }
532 }
533
534 return 0;
535 }
536
537 int rijndaelEncrypt128 (
538 word8 a[4][BC_128_OPT],
539 word8 rk[MAXROUNDS+1][4][MAXBC])
540 {
541 /* Encryption of one block.
542 */
543 int r;
544
545 /* begin with a key addition
546 */
547 KeyAddition128(a,rk[0]);
548
549 /* ROUNDS-1 ordinary rounds
550 */
551 for(r = 1; r < ROUNDS_128_OPT; r++) {
552 Substitution128(a,S);
553 ShiftRow128(a,0);
554 MixColumn128(a);
555 KeyAddition128(a,rk[r]);
556 }
557
558 /* Last round is special: there is no MixColumn
559 */
560 Substitution128(a,S);
561 ShiftRow128(a,0);
562 KeyAddition128(a,rk[ROUNDS_128_OPT]);
563
564 return 0;
565 }
566
567 int rijndaelDecrypt128 (
568 word8 a[4][BC_128_OPT],
569 word8 rk[MAXROUNDS+1][4][MAXBC])
570 {
571 int r;
572
573 /* To decrypt: apply the inverse operations of the encrypt routine,
574 * in opposite order
575 *
576 * (KeyAddition is an involution: it 's equal to its inverse)
577 * (the inverse of Substitution with table S is Substitution with the
578 * inverse table of S)
579 * (the inverse of Shiftrow is Shiftrow over a suitable distance)
580 */
581
582 /* First the special round:
583 * without InvMixColumn
584 * with extra KeyAddition
585 */
586 KeyAddition128(a,rk[ROUNDS_128_OPT]);
587 Substitution128(a,Si);
588 ShiftRow128(a,1);
589
590 /* ROUNDS-1 ordinary rounds
591 */
592 for(r = ROUNDS_128_OPT-1; r > 0; r--) {
593 KeyAddition128(a,rk[r]);
594 InvMixColumn128(a);
595 Substitution128(a,Si);
596 ShiftRow128(a,1);
597 }
598
599 /* End with the extra key addition
600 */
601
602 KeyAddition128(a,rk[0]);
603
604 return 0;
605 }
606