]> git.saurik.com Git - apple/security.git/blob - libsecurity_apple_csp/lib/rijndael-alg-ref.c
Security-55471.14.8.tar.gz
[apple/security.git] / libsecurity_apple_csp / lib / rijndael-alg-ref.c
1 /*
2 * Copyright (c) 2000-2001 Apple Computer, Inc. All Rights Reserved.
3 *
4 * The contents of this file constitute Original Code as defined in and are
5 * subject to the Apple Public Source License Version 1.2 (the 'License').
6 * You may not use this file except in compliance with the License. Please obtain
7 * a copy of the License at http://www.apple.com/publicsource and read it before
8 * using this file.
9 *
10 * This Original Code and all software distributed under the License are
11 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS
12 * OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, INCLUDING WITHOUT
13 * LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
14 * PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. Please see the License for the
15 * specific language governing rights and limitations under the License.
16 */
17
18
19 /* rijndael-alg-ref.c v2.0 August '99
20 * Reference ANSI C code
21 * authors: Paulo Barreto
22 * Vincent Rijmen
23 *
24 * PPC and 128-bit block optimization by Doug Mitchell May 2001.
25 */
26
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30
31 #include "rijndael-alg-ref.h"
32 #include <cspdebugging.h>
33
34 #define SC ((BC - 4) >> 1)
35
36 #include "boxes-ref.h"
37
38 static const word8 shifts[3][4][2] = {
39 { { 0, 0 },
40 { 1, 3 },
41 { 2, 2 },
42 { 3, 1 }
43 },
44 { { 0, 0 },
45 { 1, 5 },
46 { 2, 4 },
47 { 3, 3 }
48 },
49 { { 0, 0 },
50 { 1, 7 },
51 { 3, 5 },
52 { 4, 4 }
53 }
54 };
55
56 #if !GLADMAN_AES_128_ENABLE
57
58 /* 128 bit key/word shift table in bits */
59 static const word8 shifts128[4][2] = {
60 { 0, 0 },
61 { 8, 24 },
62 { 16, 16 },
63 { 24, 8 }
64 };
65
66 #endif /* GLADMAN_AES_128_ENABLE */
67
68 #if !AES_MUL_BY_LOOKUP
69 /*
70 * Profiling measurements showed that the mul routine is where a large propertion of
71 * the time is spent. Since the first argument to mul is always one of six
72 * constants (2, 3, 0xe, etc.), we implement six 256x256 byte lookup tables to
73 * do the multiplies. This eliminates the need for the log/antilog tables, so
74 * it's only adding one kilobyte of const data. Throughput improvement for this
75 * mod is a factor of 3.3 for encrypt and 4.1 for decrypt in the 128-bit optimized
76 * case. Improvement for the general case (with a 256 bit key) is 1.46 for encrypt
77 * and 1.88 for decrypt. (Decrypt wins more for this enhancement because the
78 * InvMixColumn does four muls, vs. 2 muls for MixColumn). Measurements taken
79 * on a 500 MHz G4 with 1 MB of L2 cache.
80 */
81
82 /*
83 * The mod 255 op in mul is really expensive...
84 *
85 * We know that b <= (254 * 2), so there are only two cases. Either return b,
86 * or return b-255.
87 *
88 * On a G4 this single optimization results in a 24% speedup for encrypt and
89 * a 25% speedup for decrypt.
90 */
91 static inline word8 mod255(word32 b)
92 {
93 if(b >= 255) {
94 b -= 255;
95 }
96 return b;
97 }
98
99 word8 mul(word8 a, word8 b) {
100 /* multiply two elements of GF(2^m)
101 * needed for MixColumn and InvMixColumn
102 */
103 if (a && b) return Alogtable[mod255(Logtable[a] + Logtable[b])];
104 else return 0;
105 }
106 #endif /* !AES_MUL_BY_LOOKUP */
107
108 static
109 void KeyAddition(word8 a[4][MAXBC], word8 rk[4][MAXBC], word8 BC) {
110 /* Exor corresponding text input and round key input bytes
111 */
112 int i, j;
113
114 for(i = 0; i < 4; i++)
115 for(j = 0; j < BC; j++) a[i][j] ^= rk[i][j];
116 }
117
118 static
119 void ShiftRow(word8 a[4][MAXBC], word8 d, word8 BC) {
120 /* Row 0 remains unchanged
121 * The other three rows are shifted a variable amount
122 */
123 word8 tmp[MAXBC];
124 int i, j;
125
126 for(i = 1; i < 4; i++) {
127 for(j = 0; j < BC; j++) tmp[j] = a[i][(j + shifts[SC][i][d]) % BC];
128 for(j = 0; j < BC; j++) a[i][j] = tmp[j];
129 }
130 }
131
132 static
133 void Substitution(word8 a[4][MAXBC], const word8 box[256], word8 BC) {
134 /* Replace every byte of the input by the byte at that place
135 * in the nonlinear S-box
136 */
137 int i, j;
138
139 for(i = 0; i < 4; i++)
140 for(j = 0; j < BC; j++) a[i][j] = box[a[i][j]] ;
141 }
142
143 static
144 void MixColumn(word8 a[4][MAXBC], word8 BC) {
145 /* Mix the four bytes of every column in a linear way
146 */
147 word8 b[4][MAXBC];
148 int i, j;
149
150 for(j = 0; j < BC; j++) {
151 for(i = 0; i < 4; i++) {
152 #if AES_MUL_BY_LOOKUP
153 b[i][j] = mulBy0x02[a[i][j]]
154 ^ mulBy0x03[a[(i + 1) % 4][j]]
155 ^ a[(i + 2) % 4][j]
156 ^ a[(i + 3) % 4][j];
157 #else
158 b[i][j] = mul(2,a[i][j])
159 ^ mul(3,a[(i + 1) % 4][j])
160 ^ a[(i + 2) % 4][j]
161 ^ a[(i + 3) % 4][j];
162 #endif
163 }
164 }
165 for(i = 0; i < 4; i++) {
166 for(j = 0; j < BC; j++) a[i][j] = b[i][j];
167 }
168 }
169
170 static
171 void InvMixColumn(word8 a[4][MAXBC], word8 BC) {
172 /* Mix the four bytes of every column in a linear way
173 * This is the opposite operation of Mixcolumn
174 */
175 word8 b[4][MAXBC];
176 int i, j;
177
178 for(j = 0; j < BC; j++) {
179 for(i = 0; i < 4; i++) {
180 #if AES_MUL_BY_LOOKUP
181 b[i][j] = mulBy0x0e[a[i][j]]
182 ^ mulBy0x0b[a[(i + 1) % 4][j]]
183 ^ mulBy0x0d[a[(i + 2) % 4][j]]
184 ^ mulBy0x09[a[(i + 3) % 4][j]];
185 #else
186 b[i][j] = mul(0xe,a[i][j])
187 ^ mul(0xb,a[(i + 1) % 4][j])
188 ^ mul(0xd,a[(i + 2) % 4][j])
189 ^ mul(0x9,a[(i + 3) % 4][j]);
190 #endif
191 }
192 }
193 for(i = 0; i < 4; i++) {
194 for(j = 0; j < BC; j++) a[i][j] = b[i][j];
195 }
196 }
197
198 int rijndaelKeySched (
199 word8 k[4][MAXKC],
200 int keyBits,
201 int blockBits,
202 word8 W[MAXROUNDS+1][4][MAXBC]) {
203
204 /* Calculate the necessary round keys
205 * The number of calculations depends on keyBits and blockBits
206 */
207 int KC, BC, ROUNDS;
208 int i, j, t, rconpointer = 0;
209 word8 tk[4][MAXKC];
210
211 switch (keyBits) {
212 case 128: KC = 4; break;
213 case 192: KC = 6; break;
214 case 256: KC = 8; break;
215 default : return (-1);
216 }
217
218 switch (blockBits) {
219 case 128: BC = 4; break;
220 case 192: BC = 6; break;
221 case 256: BC = 8; break;
222 default : return (-2);
223 }
224
225 switch (keyBits >= blockBits ? keyBits : blockBits) {
226 case 128: ROUNDS = 10; break;
227 case 192: ROUNDS = 12; break;
228 case 256: ROUNDS = 14; break;
229 default : return (-3); /* this cannot happen */
230 }
231
232
233 for(j = 0; j < KC; j++)
234 for(i = 0; i < 4; i++)
235 tk[i][j] = k[i][j];
236 t = 0;
237 /* copy values into round key array */
238 for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++)
239 for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk[i][j];
240
241 while (t < (ROUNDS+1)*BC) { /* while not enough round key material calculated */
242 /* calculate new values */
243 for(i = 0; i < 4; i++)
244 tk[i][0] ^= S[tk[(i+1)%4][KC-1]];
245 tk[0][0] ^= rcon[rconpointer++];
246
247 if (KC != 8)
248 for(j = 1; j < KC; j++)
249 for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
250 else {
251 for(j = 1; j < KC/2; j++)
252 for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
253 for(i = 0; i < 4; i++) tk[i][KC/2] ^= S[tk[i][KC/2 - 1]];
254 for(j = KC/2 + 1; j < KC; j++)
255 for(i = 0; i < 4; i++) tk[i][j] ^= tk[i][j-1];
256 }
257 /* copy values into round key array */
258 for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++)
259 for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk[i][j];
260 }
261
262 return 0;
263 }
264
265 int rijndaelEncrypt (
266 word8 a[4][MAXBC],
267 int keyBits,
268 int blockBits,
269 word8 rk[MAXROUNDS+1][4][MAXBC])
270 {
271 /* Encryption of one block, general case.
272 */
273 int r, BC, ROUNDS;
274
275 switch (blockBits) {
276 case 128: BC = 4; break;
277 case 192: BC = 6; break;
278 case 256: BC = 8; break;
279 default : return (-2);
280 }
281
282 switch (keyBits >= blockBits ? keyBits : blockBits) {
283 case 128: ROUNDS = 10; break;
284 case 192: ROUNDS = 12; break;
285 case 256: ROUNDS = 14; break;
286 default : return (-3); /* this cannot happen */
287 }
288
289 /* begin with a key addition
290 */
291 KeyAddition(a,rk[0],BC);
292
293 /* ROUNDS-1 ordinary rounds
294 */
295 for(r = 1; r < ROUNDS; r++) {
296 Substitution(a,S,BC);
297 ShiftRow(a,0,BC);
298 MixColumn(a,BC);
299 KeyAddition(a,rk[r],BC);
300 }
301
302 /* Last round is special: there is no MixColumn
303 */
304 Substitution(a,S,BC);
305 ShiftRow(a,0,BC);
306 KeyAddition(a,rk[ROUNDS],BC);
307
308 return 0;
309 }
310
311 int rijndaelDecrypt (
312 word8 a[4][MAXBC],
313 int keyBits,
314 int blockBits,
315 word8 rk[MAXROUNDS+1][4][MAXBC])
316 {
317 int r, BC, ROUNDS;
318
319 switch (blockBits) {
320 case 128: BC = 4; break;
321 case 192: BC = 6; break;
322 case 256: BC = 8; break;
323 default : return (-2);
324 }
325
326 switch (keyBits >= blockBits ? keyBits : blockBits) {
327 case 128: ROUNDS = 10; break;
328 case 192: ROUNDS = 12; break;
329 case 256: ROUNDS = 14; break;
330 default : return (-3); /* this cannot happen */
331 }
332
333 /* To decrypt: apply the inverse operations of the encrypt routine,
334 * in opposite order
335 *
336 * (KeyAddition is an involution: it 's equal to its inverse)
337 * (the inverse of Substitution with table S is Substitution with the
338 * inverse table of S)
339 * (the inverse of Shiftrow is Shiftrow over a suitable distance)
340 */
341
342 /* First the special round:
343 * without InvMixColumn
344 * with extra KeyAddition
345 */
346 KeyAddition(a,rk[ROUNDS],BC);
347 Substitution(a,Si,BC);
348 ShiftRow(a,1,BC);
349
350 /* ROUNDS-1 ordinary rounds
351 */
352 for(r = ROUNDS-1; r > 0; r--) {
353 KeyAddition(a,rk[r],BC);
354 InvMixColumn(a,BC);
355 Substitution(a,Si,BC);
356 ShiftRow(a,1,BC);
357 }
358
359 /* End with the extra key addition
360 */
361
362 KeyAddition(a,rk[0],BC);
363
364 return 0;
365 }
366
367 #if !GLADMAN_AES_128_ENABLE
368
369 /*
370 * All of these 128-bit-key-and-block routines require 32-bit word-aligned
371 * char array pointers.ÊThe key schedule arrays are easy; they come from
372 * keyInstance which has a 4-byte-aligned element preceeding the key schedule.
373 * Others require manual alignment of a local variable by the caller.
374 */
375
376 static inline void KeyAddition128(
377 word8 a[4][BC_128_OPT],
378 word8 rk[4][MAXBC]) {
379
380 /* these casts are endian-independent */
381 ((word32 *)a)[0] ^= *((word32 *)(&rk[0]));
382 ((word32 *)a)[1] ^= *((word32 *)(&rk[1]));
383 ((word32 *)a)[2] ^= *((word32 *)(&rk[2]));
384 ((word32 *)a)[3] ^= *((word32 *)(&rk[3]));
385 }
386
387 static void Substitution128(
388 word8 a[4][BC_128_OPT],
389 const word8 box[256]) {
390 /* Replace every byte of the input by the byte at that place
391 * in the nonlinear S-box
392 */
393 int i, j;
394
395 /* still to be optimized - larger S boxes? */
396 for(i = 0; i < 4; i++) {
397 for(j = 0; j < BC_128_OPT; j++) {
398 a[i][j] = box[a[i][j]];
399 }
400 }
401 }
402
403 #if defined(__ppc__) && defined(__GNUC__)
404
405 static inline void rotateWordLeft(
406 word8 *word, // known to be word aligned
407 unsigned rotCount) // in bits
408 {
409 word32 lword = *((word32 *)word);
410 asm("rlwnm %0,%1,%2,0,31" : "=r"(lword) : "0"(lword), "r"(rotCount));
411 *((word32 *)word) = lword;
412 }
413
414 #else
415
416 /*
417 * Insert your machine/compiler dependent code here,
418 * or just use this, which works on any platform and compiler
419 * which supports the __attribute__((aligned(4))) directive.
420 */
421 static void rotateWordLeft(
422 word8 *word, // known to be word aligned
423 unsigned rotCount) // in bits
424 {
425 word8 tmp[BC_128_OPT] __attribute__((aligned(4)));
426 unsigned bytes = rotCount / 8;
427
428 tmp[0] = word[bytes & (BC_128_OPT-1)];
429 tmp[1] = word[(1+bytes) & (BC_128_OPT-1)];
430 tmp[2] = word[(2+bytes) & (BC_128_OPT-1)];
431 tmp[3] = word[(3+bytes) & (BC_128_OPT-1)];
432 *((word32 *)word) = *((word32 *)tmp);
433 }
434 #endif
435
436 static inline void ShiftRow128(
437 word8 a[4][BC_128_OPT],
438 word8 d) {
439 /* Row 0 remains unchanged
440 * The other three rows are shifted (actually rotated) a variable amount
441 */
442 int i;
443
444 for(i = 1; i < 4; i++) {
445 rotateWordLeft(a[i], shifts128[i][d]);
446 }
447 }
448
449 /*
450 * The following two routines are where most of the time is spent in this
451 * module. Further optimization would have to focus here.
452 */
453 static void MixColumn128(word8 a[4][BC_128_OPT]) {
454 /* Mix the four bytes of every column in a linear way
455 */
456 word8 b[4][BC_128_OPT];
457 int i, j;
458
459 for(j = 0; j < BC_128_OPT; j++) {
460 for(i = 0; i < 4; i++) {
461 #if AES_MUL_BY_LOOKUP
462 b[i][j] = mulBy0x02[a[i][j]]
463 ^ mulBy0x03[a[(i + 1) % 4][j]]
464 ^ a[(i + 2) % 4][j]
465 ^ a[(i + 3) % 4][j];
466 #else
467 b[i][j] = mul(2,a[i][j])
468 ^ mul(3,a[(i + 1) % 4][j])
469 ^ a[(i + 2) % 4][j]
470 ^ a[(i + 3) % 4][j];
471 #endif
472 }
473 }
474 memmove(a, b, 4 * BC_128_OPT);
475 }
476
477 static void InvMixColumn128(word8 a[4][BC_128_OPT]) {
478 /* Mix the four bytes of every column in a linear way
479 * This is the opposite operation of Mixcolumn
480 */
481 word8 b[4][BC_128_OPT];
482 int i, j;
483
484 for(j = 0; j < BC_128_OPT; j++) {
485 for(i = 0; i < 4; i++) {
486 #if AES_MUL_BY_LOOKUP
487 b[i][j] = mulBy0x0e[a[i][j]]
488 ^ mulBy0x0b[a[(i + 1) % 4][j]]
489 ^ mulBy0x0d[a[(i + 2) % 4][j]]
490 ^ mulBy0x09[a[(i + 3) % 4][j]];
491 #else
492 b[i][j] = mul(0xe,a[i][j])
493 ^ mul(0xb,a[(i + 1) % 4][j])
494 ^ mul(0xd,a[(i + 2) % 4][j])
495 ^ mul(0x9,a[(i + 3) % 4][j]);
496 #endif
497 }
498 }
499 memmove(a, b, 4 * BC_128_OPT);
500 }
501
502 int rijndaelKeySched128 (
503 word8 k[4][KC_128_OPT],
504 word8 W[MAXROUNDS+1][4][MAXBC]) {
505
506 /* Calculate the necessary round keys
507 * The number of calculations depends on keyBits and blockBits
508 */
509 int i, j, t, rconpointer = 0;
510 word8 tk[4][KC_128_OPT];
511 unsigned numSchedRows = (ROUNDS_128_OPT + 1) * BC_128_OPT;
512
513 for(j = 0; j < KC_128_OPT; j++)
514 for(i = 0; i < 4; i++)
515 tk[i][j] = k[i][j];
516 t = 0;
517 /* copy values into round key array */
518 for(j = 0; (j < KC_128_OPT) && (t < numSchedRows); j++, t++) {
519 for(i = 0; i < 4; i++) {
520 W[t / BC_128_OPT][i][t % BC_128_OPT] = tk[i][j];
521 }
522 }
523
524 while (t < numSchedRows) {
525 /* while not enough round key material calculated */
526 /* calculate new values */
527 for(i = 0; i < 4; i++) {
528 tk[i][0] ^= S[tk[(i+1)%4][KC_128_OPT-1]];
529 }
530 tk[0][0] ^= rcon[rconpointer++];
531
532 for(j = 1; j < KC_128_OPT; j++) {
533 for(i = 0; i < 4; i++) {
534 tk[i][j] ^= tk[i][j-1];
535 }
536 }
537
538 /* copy values into round key array */
539 for(j = 0; (j < KC_128_OPT) && (t < numSchedRows); j++, t++) {
540 for(i = 0; i < 4; i++) {
541 W[t / BC_128_OPT][i][t % BC_128_OPT] = tk[i][j];
542 }
543 }
544 }
545
546 return 0;
547 }
548
549 int rijndaelEncrypt128 (
550 word8 a[4][BC_128_OPT],
551 word8 rk[MAXROUNDS+1][4][MAXBC])
552 {
553 /* Encryption of one block.
554 */
555 int r;
556
557 /* begin with a key addition
558 */
559 KeyAddition128(a,rk[0]);
560
561 /* ROUNDS-1 ordinary rounds
562 */
563 for(r = 1; r < ROUNDS_128_OPT; r++) {
564 Substitution128(a,S);
565 ShiftRow128(a,0);
566 MixColumn128(a);
567 KeyAddition128(a,rk[r]);
568 }
569
570 /* Last round is special: there is no MixColumn
571 */
572 Substitution128(a,S);
573 ShiftRow128(a,0);
574 KeyAddition128(a,rk[ROUNDS_128_OPT]);
575
576 return 0;
577 }
578
579 int rijndaelDecrypt128 (
580 word8 a[4][BC_128_OPT],
581 word8 rk[MAXROUNDS+1][4][MAXBC])
582 {
583 int r;
584
585 /* To decrypt: apply the inverse operations of the encrypt routine,
586 * in opposite order
587 *
588 * (KeyAddition is an involution: it 's equal to its inverse)
589 * (the inverse of Substitution with table S is Substitution with the
590 * inverse table of S)
591 * (the inverse of Shiftrow is Shiftrow over a suitable distance)
592 */
593
594 /* First the special round:
595 * without InvMixColumn
596 * with extra KeyAddition
597 */
598 KeyAddition128(a,rk[ROUNDS_128_OPT]);
599 Substitution128(a,Si);
600 ShiftRow128(a,1);
601
602 /* ROUNDS-1 ordinary rounds
603 */
604 for(r = ROUNDS_128_OPT-1; r > 0; r--) {
605 KeyAddition128(a,rk[r]);
606 InvMixColumn128(a);
607 Substitution128(a,Si);
608 ShiftRow128(a,1);
609 }
610
611 /* End with the extra key addition
612 */
613
614 KeyAddition128(a,rk[0]);
615
616 return 0;
617 }
618
619 #endif /* !GLADMAN_AES_128_ENABLE */
620