]> git.saurik.com Git - apple/security.git/blob - OSX/libsecurity_apple_csp/lib/vRijndael-alg-ref.c
Security-57336.1.9.tar.gz
[apple/security.git] / OSX / libsecurity_apple_csp / lib / vRijndael-alg-ref.c
1 /*
2 * Copyright (c) 2000-2001,2011,2014 Apple Inc. All Rights Reserved.
3 *
4 * The contents of this file constitute Original Code as defined in and are
5 * subject to the Apple Public Source License Version 1.2 (the 'License').
6 * You may not use this file except in compliance with the License. Please obtain
7 * a copy of the License at http://www.apple.com/publicsource and read it before
8 * using this file.
9 *
10 * This Original Code and all software distributed under the License are
11 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS
12 * OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, INCLUDING WITHOUT
13 * LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
14 * PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. Please see the License for the
15 * specific language governing rights and limitations under the License.
16 */
17
18
19 /*
20 * vRijndael-alg-ref.c
21 *
22 * Copyright (c) 2001,2011,2014 Apple Inc. All Rights Reserved.
23 *
24 */
25
26 #include "rijndaelApi.h"
27 #include "rijndael-alg-ref.h"
28 #include "boxes-ref.h"
29 #include <string.h>
30
31 /* debugger seems to have trouble with this code... */
32 #define VAES_DEBUG 1
33 #if VAES_DEBUG
34 #include <stdio.h>
35 #define vdprintf(s) printf s
36 #else
37 #define vdprintf(s)
38 #endif
39
40 #define SC ((BC - 4) >> 1)
41
42 #if defined(__ppc__) && defined(ALTIVEC_ENABLE)
43
44 typedef union {
45 unsigned char s[4][8];
46 unsigned long l[8];
47 vector unsigned char v[2];
48 } doubleVec;
49
50 typedef union {
51 unsigned long s[4];
52 vector unsigned long v;
53 } vecLong;
54
55 static word8 shifts[3][4][2] = {
56 { { 0, 0 },
57 { 1, 3 },
58 { 2, 2 },
59 { 3, 1 }
60 },
61 { { 0, 0 },
62 { 1, 5 },
63 { 2, 4 },
64 { 3, 3 }
65 },
66 { { 0, 0 },
67 { 1, 7 },
68 { 3, 5 },
69 { 4, 4 }
70 }
71 };
72
73 int vRijndaelKeySched ( vector unsigned char vk[2], int keyBits, int blockBits,
74 unsigned char W[MAXROUNDS+1][4][MAXBC])
75 {
76 /* Calculate the necessary round keys
77 * The number of calculations depends on keyBits and blockBits
78 */
79 int KC, BC, ROUNDS;
80 int i, j, t, rconpointer = 0;
81 doubleVec tk;
82 register vector unsigned char v1, v2, mask;
83
84 switch (keyBits) {
85 case 128: KC = 4; break;
86 case 192: KC = 6; break;
87 case 256: KC = 8; break;
88 default : return (-1);
89 }
90
91 switch (blockBits) {
92 case 128: BC = 4; break;
93 case 192: BC = 6; break;
94 case 256: BC = 8; break;
95 default : return (-2);
96 }
97
98 switch (keyBits >= blockBits ? keyBits : blockBits) {
99 case 128: ROUNDS = 10; break;
100 case 192: ROUNDS = 12; break;
101 case 256: ROUNDS = 14; break;
102 default : return (-3); /* this cannot happen */
103 }
104
105 tk.v[0] = vk[0];
106 tk.v[1] = vk[1];
107
108 t = 0;
109 /* copy values into round key array */
110 for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++)
111 for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk.s[i][j];
112
113 while (t < (ROUNDS+1)*BC) { /* while not enough round key material calculated */
114 /* calculate new values */
115 for(i = 0; i < 4; i++)
116 tk.s[i][0] ^= *((word8 *)S + tk.s[(i+1)%4][KC-1]);
117 tk.s[0][0] ^= rcon[rconpointer++];
118
119 if (KC != 8) {
120 /* xor bytes 1-7 of each row with previous byte */
121 mask = (vector unsigned char) ( 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff );
122 for ( i = 0; i < 2; i++ ) {
123 v1 = vec_sld( tk.v[i], tk.v[i], 15 );
124 v2 = vec_and( v1, mask );
125 tk.v[i] = vec_xor( tk.v[i], v2 );
126 }
127 }
128 else {
129 /* xor bytes 1-3 of each row with previous byte */
130 mask = (vector unsigned char) ( 0, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0, 0, 0, 0 );
131 for ( i = 0; i < 2; i++ ) {
132 v1 = vec_sld( tk.v[i], tk.v[i], 15 );
133 v2 = vec_and( v1, mask );
134 tk.v[i] = vec_xor( tk.v[i], v2 );
135 for(j = 0; j < 4; j++) tk.s[i][KC/2] ^= *((word8 *)S + tk.s[i][KC/2 - 1]);
136 /* xor bytes 5-7 of each row with previous byte */
137 mask = vec_sld( mask, mask, 4 );
138 v2 = vec_and( v1, mask );
139 tk.v[i] = vec_xor( tk.v[i], v2 );
140 mask = vec_sld( mask, mask, 4 );
141 }
142 }
143 /* copy values into round key array */
144 for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++)
145 for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk.s[i][j];
146 }
147 return 0;
148 }
149
150
151 void vMakeKey(BYTE *keyMaterial, keyInstance *key)
152 {
153 register vector unsigned char v1, v2, v3, mask;
154 vector unsigned char vk[2];
155
156 /* load and align input */
157 v1 = vec_ld( 0, (vector unsigned char *) keyMaterial );
158 v2 = vec_ld( 16, (vector unsigned char *) keyMaterial );
159 if ( (long) keyMaterial & 0x0fL )
160 { // this is required if keyMaterial is not on a 16-byte boundary
161 v3 = vec_ld( 32, (vector unsigned char *) keyMaterial );
162 mask = vec_lvsl( 0, keyMaterial );
163 v1 = vec_perm( v1, v2, mask );
164 v2 = vec_perm( v2, v3, mask );
165 }
166
167 /* parse input stream into rectangular array */
168 vk[0] = vec_perm( v1, v2, (vector unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29 ) );
169 vk[1] = vec_perm( v1, v2, (vector unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31 ) );
170 vRijndaelKeySched (vk, key->keyLen, key->blockLen, key->keySched);
171 memset( (char *) vk, 0, 4 * MAXKC);
172 }
173
174
175 /* This routine does 16 simultaneous lookups in a 256-byte table. */
176 vector unsigned char rimskyKorsakov ( vector unsigned char v, vector unsigned char * table )
177 {
178 register vector unsigned char upperBits000, upperBits001, upperBits010, upperBits011,
179 upperBits100, upperBits101, upperBits110, upperBits111,
180 lookupBit00, lookupBit01, lookupBit10, lookupBit11,
181 lookupBit0, lookupBit1, lookup,
182 maskForBit6, maskForBit7, maskForBit8, seven;
183 register vector unsigned char *tabeven, *tabodd;
184
185 seven = vec_splat_u8 ( 7 );
186 tabeven = table++;
187 tabodd = table;
188
189 // Each variable contains the correct values for the corresponding bits 6, 7 and 8.
190 upperBits000 = vec_perm ( *tabeven, *tabodd, v );
191 tabeven += 2; tabodd += 2;
192 upperBits001 = vec_perm ( *tabeven, *tabodd, v );
193 tabeven += 2; tabodd += 2;
194 upperBits010 = vec_perm ( *tabeven, *tabodd, v );
195 tabeven += 2; tabodd += 2;
196 upperBits011 = vec_perm ( *tabeven, *tabodd, v );
197 tabeven += 2; tabodd += 2;
198 upperBits100 = vec_perm ( *tabeven, *tabodd, v );
199 tabeven += 2; tabodd += 2;
200 upperBits101 = vec_perm ( *tabeven, *tabodd, v );
201 tabeven += 2; tabodd += 2;
202 upperBits110 = vec_perm ( *tabeven, *tabodd, v );
203 tabeven += 2; tabodd += 2;
204 upperBits111 = vec_perm ( *tabeven, *tabodd, v );
205
206 // Here we extract all the correct values for bit 6.
207 maskForBit6 = vec_sl ( v, vec_splat_u8 ( 2 ) );
208 maskForBit6 = vec_sra ( maskForBit6, seven );
209 lookupBit00 = vec_sel ( upperBits000, upperBits001, maskForBit6 );
210 lookupBit01 = vec_sel ( upperBits010, upperBits011, maskForBit6 );
211 lookupBit10 = vec_sel ( upperBits100, upperBits101, maskForBit6 );
212 lookupBit11 = vec_sel ( upperBits110, upperBits111, maskForBit6 );
213
214 // Then we get the correct values for bit 7.
215 maskForBit7 = vec_sl ( v, vec_splat_u8 ( 1 ) );
216 maskForBit7 = vec_sra ( maskForBit7, seven );
217 lookupBit0 = vec_sel ( lookupBit00, lookupBit01, maskForBit7 );
218 lookupBit1 = vec_sel ( lookupBit10, lookupBit11, maskForBit7 );
219
220 // Finally, the entire correct result vector.
221 maskForBit8 = vec_sra ( v, seven );
222
223 lookup = vec_sel ( lookupBit0, lookupBit1, maskForBit8 );
224
225 return lookup;
226 }
227
228 vector unsigned char vmul(vector unsigned char a, vector unsigned char b)
229 {
230 register vector unsigned char x, y, zero;
231 register vector unsigned short xh, yh, zhi, zlo, two54, two55;
232
233 zero = vec_splat_u8( 0 );
234 two55 = vec_splat_u16( -1 );
235 two55 = (vector unsigned short) vec_mergeh( zero, (vector unsigned char) two55 );
236 two54 = vec_sub( two55, vec_splat_u16( 1 ) );
237
238 x = rimskyKorsakov( a, (vector unsigned char *)Logtable ); // Logtable[a]
239 y = rimskyKorsakov( b, (vector unsigned char *)Logtable ); // Logtable[b]
240
241 // Convert upper 8 bytes to shorts for addition ond modulo
242 xh = (vector unsigned short) vec_mergeh( zero, x );
243 yh = (vector unsigned short) vec_mergeh( zero, y );
244 xh = vec_add( xh, yh ); // xh = Logtable[a] + Logtable[b]
245 yh = vec_sub( xh, two55 );
246 zhi = vec_sel( xh, yh, vec_cmpgt( xh, two54 ) ); // xh%255
247
248 // Convert lower 8 bytes to shorts for addition ond modulo
249 xh = (vector unsigned short) vec_mergel( zero, x );
250 yh = (vector unsigned short) vec_mergel( zero, y );
251 xh = vec_add( xh, yh );
252 yh = vec_sub( xh, two55 );
253 zlo = vec_sel( xh, yh, vec_cmpgt( xh, two54 ) );
254
255 x = vec_pack( zhi, zlo ); // recombine into single byte vector
256 x = rimskyKorsakov( x, (vector unsigned char *)Alogtable ); // Alogtable[x]
257 x = vec_sel( x, zero, vec_cmpeq( a, zero ) ); // check a = 0
258 x = vec_sel( x, zero, vec_cmpeq( b, zero ) ); // check b = 0
259 return x;
260 }
261
262 void vKeyAddition(vector unsigned char v[2], vector unsigned char rk[2])
263 {
264 v[0] = vec_xor( v[0], rk[0] ); // first vector contains rows 0 and 1
265 v[1] = vec_xor( v[1], rk[1] ); // second vector contains rows 2 and 3
266 }
267
268
269 void vShiftRow(vector unsigned char v[2], word8 d, word8 BC)
270 {
271 vecLong sh;
272 register vector unsigned char mask, mask1, t;
273 register vector bool char c;
274 register int i, j;
275
276 sh.s[0] = 0;
277 for (i = 1; i < 4; i++)
278 sh.s[i] = shifts[SC][i][d] % BC; // contains the number of elements to shift each row
279
280 // each vector contains two BC-byte long rows
281 j = 0;
282 for ( i = 0; i < 2; i++ ) {
283 mask = vec_lvsl( 0, (int *) sh.s[j++]); // mask for even row
284 mask1 = vec_lvsl( 0, (int *) sh.s[j++]); // mask for odd row
285 if (BC == 4) {
286 mask = vec_sld( mask, mask1, 8 ); // combined rotation mask for both rows
287 mask = vec_and( mask, vec_splat_u8( 3 ) );
288 } else if (BC == 6) {
289 mask = vec_sld( mask, mask, 8 );
290 mask = vec_sld( mask, mask1, 8 ); // combined rotation mask for both rows
291 t = vec_sub( mask, vec_splat_u8( 6 ) );
292 c = vec_cmpgt( mask, vec_splat_u8( 5 ) );
293 mask = vec_sel( mask, t, c );
294 } else {
295 mask = vec_sld( mask, mask1, 8 ); // combined rotation mask for both rows
296 mask = vec_and( mask, vec_splat_u8( 7 ) );
297 }
298 mask1 = vec_sld( vec_splat_u8( 0 ), vec_splat_u8( 8 ), 8 );
299 mask = vec_add( mask, mask1 );
300 v[i] = vec_perm( v[i], v[i], mask ); // rotate each row as required
301 }
302 }
303
304 void vSubstitution( vector unsigned char v[2], vector unsigned char box[16] )
305 {
306 v[0] = rimskyKorsakov( v[0], box ); // first vector contains rows 0 and 1
307 v[1] = rimskyKorsakov( v[1], box ); // second vector contains rows 2 and 3
308 }
309
310 void vMixColumn(vector unsigned char v[2])
311 {
312 // vector 0 contains row 0 in bytes 0-7 and row 1 in bytes 8-f
313 // vector 1 contains row 2 in bytes 0-7 and row 3 in bytes 8-f
314
315 register vector unsigned char a0, a1, a2, a3, b0, b1, b2, b3;
316 register vector unsigned char two, three;
317
318 two = vec_splat_u8( 2 );
319 three = vec_splat_u8( 3 );
320
321 a1 = vec_sld( v[0], v[1], 8 ); // equivalent to a[i+1] % 4
322 b1 = vec_sld( v[1], v[0], 8 );
323 a2 = vec_sld( a1, b1, 8 ); // equivalent to a[i+2] % 4
324 b2 = vec_sld( b1, a1, 8 );
325 a3 = vec_sld( a2, b2, 8 ); // equivalent to a[i+3] % 4
326 b3 = vec_sld( b2, a2, 8 );
327
328 // Calculations for rows 0 and 1
329 a0 = vmul( two, v[0] ); // mul(2,a[i][j])
330 a0 = vec_xor( a0, vmul( three, a1 ) ); // ^ mul(3,a[(i + 1) % 4][j])
331 a0 = vec_xor( a0, a2 ); // ^ a[(i + 2) % 4][j]
332 v[0] = vec_xor( a0, a3 ); // ^ a[(i + 3) % 4][j]
333
334 // Calculations for rows 2 and 3
335 b0 = vmul( two, v[1] );
336 b0 = vec_xor( b0, vmul( three, b1 ) );
337 b0 = vec_xor( b0, b2 );
338 v[1] = vec_xor( b0, b3 );
339 }
340
341 void vInvMixColumn(vector unsigned char v[2])
342 {
343 // vector 0 contains row 0 in bytes 0-7 and row 1 in bytes 8-f
344 // vector 1 contains row 2 in bytes 0-7 and row 3 in bytes 8-f
345
346 register vector unsigned char a0, a1, a2, a3, b0, b1, b2, b3;
347 register vector unsigned char nine, eleven, thirteen, fourteen;;
348
349 nine = vec_splat_u8( 0x9 );
350 eleven = vec_splat_u8( 0xb );
351 thirteen = vec_splat_u8( 0xd );
352 fourteen = vec_splat_u8( 0xe );
353
354 a1 = vec_sld( v[0], v[1], 8 ); // equivalent to a[i+1] % 4
355 b1 = vec_sld( v[1], v[0], 8 );
356 a2 = vec_sld( a1, b1, 8 ); // equivalent to a[i+2] % 4
357 b2 = vec_sld( b1, a1, 8 );
358 a3 = vec_sld( a2, b2, 8 ); // equivalent to a[i+3] % 4
359 b3 = vec_sld( b2, a2, 8 );
360
361 // Calculations for rows 0 and 1
362 a0 = vmul( fourteen, v[0] ); // mul(0xe,a[i][j])
363 a0 = vec_xor( a0, vmul( eleven, a1 ) ); // ^ mul(0xb,a[(i + 1) % 4][j])
364 a0 = vec_xor( a0, vmul( thirteen, a2 ) ); // ^ mul(0xd,a[(i + 2) % 4][j])
365 v[0] = vec_xor( a0, vmul( nine, a3 ) ); // ^ mul(0x9,a[(i + 3) % 4][j])
366
367 // Calculations for rows 2 and 3
368 b0 = vmul( fourteen, v[1] );
369 b0 = vec_xor( b0, vmul( eleven, b1 ) );
370 b0 = vec_xor( b0, vmul( thirteen, b2 ) );
371 v[1] = vec_xor( b0, vmul( nine, b3 ) );
372 }
373
374 int vRijndaelEncrypt (vector unsigned char a[2], int keyBits, int blockBits, vector unsigned char rk[MAXROUNDS+1][2])
375 {
376 /* Encryption of one block.
377 */
378 int r, BC, ROUNDS;
379
380 switch (blockBits) {
381 case 128: BC = 4; break;
382 case 192: BC = 6; break;
383 case 256: BC = 8; break;
384 default : return (-2);
385 }
386
387 switch (keyBits >= blockBits ? keyBits : blockBits) {
388 case 128: ROUNDS = 10; break;
389 case 192: ROUNDS = 12; break;
390 case 256: ROUNDS = 14; break;
391 default : return (-3); /* this cannot happen */
392 }
393
394 vKeyAddition( a, rk[0] );
395 for(r = 1; r < ROUNDS; r++) {
396 vSubstitution( a, (vector unsigned char *)S);
397 vShiftRow( a, 0, BC);
398 vMixColumn( a );
399 vKeyAddition( a, rk[r] );
400 }
401 vSubstitution( a, (vector unsigned char *)S);
402 vShiftRow( a, 0, BC);
403 vKeyAddition( a, rk[ROUNDS] );
404
405 return 0;
406 }
407
408 int vRijndaelDecrypt (vector unsigned char a[2], int keyBits, int blockBits, vector unsigned char rk[MAXROUNDS+1][2])
409 {
410 int r, BC, ROUNDS;
411
412 switch (blockBits) {
413 case 128: BC = 4; break;
414 case 192: BC = 6; break;
415 case 256: BC = 8; break;
416 default : return (-2);
417 }
418
419 switch (keyBits >= blockBits ? keyBits : blockBits) {
420 case 128: ROUNDS = 10; break;
421 case 192: ROUNDS = 12; break;
422 case 256: ROUNDS = 14; break;
423 default : return (-3); /* this cannot happen */
424 }
425
426 vKeyAddition( a, rk[ROUNDS] );
427 vSubstitution( a, (vector unsigned char *)Si);
428 vShiftRow( a, 1, BC);
429 for(r = ROUNDS-1; r > 0; r--) {
430 vKeyAddition( a, rk[r] );
431 vInvMixColumn( a );
432 vSubstitution( a, (vector unsigned char *)Si);
433 vShiftRow( a, 1, BC);
434 }
435 vKeyAddition( a, rk[0] );
436
437 return 0;
438 }
439
440 #if 0
441 /* Murley's code, to be deleted */
442 void vBlockEncrypt(cipherInstance *cipher, keyInstance *key, BYTE *input, int inputLen, BYTE *outBuffer)
443 {
444 register vector unsigned char v1, v2, v3, v4, mask;
445 register vector bool char cmp;
446
447 /* load and align input */
448 v1 = vec_ld( 0, (vector unsigned char *) input );
449 v2 = vec_ld( 16, (vector unsigned char *) input );
450 if ( (long) input & 0x0fL )
451 { // this is required if input is not on a 16-byte boundary
452 v3 = vec_ld( 32, (vector unsigned char *) input );
453 mask = vec_lvsl( 0, input );
454 v1 = vec_perm( v1, v2, mask );
455 v2 = vec_perm( v2, v3, mask );
456 }
457
458 /* parse input stream into rectangular array */
459 v3 = vec_perm( v1, v2, (vector unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29 ) );
460 v4 = vec_perm( v1, v2, (vector unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31 ) );
461
462 /* store into cipher structure */
463 if (cipher->mode == MODE_CBC) {
464 v3 = vec_xor( v3, *((vector unsigned char *) cipher->chainBlock ) );
465 v4 = vec_xor( v4, *((vector unsigned char *) cipher->chainBlock + 1 ) );
466 }
467 vec_st( v3, 0, (vector unsigned char *) cipher->chainBlock );
468 vec_st( v4, 16, (vector unsigned char *) cipher->chainBlock );
469
470 vRijndaelEncrypt((vector unsigned char *) cipher->chainBlock, key->keyLen, cipher->blockLen, (vector unsigned char *) key->keySched);
471
472 v1 = vec_ld( 0, (vector unsigned char *) cipher->chainBlock );
473 v2 = vec_ld( 16, (vector unsigned char *) cipher->chainBlock );
474
475 /* parse rectangular array into output ciphertext bytes */
476 v3 = vec_perm( v1, v2, (vector unsigned char) ( 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27 ) );
477 v4 = vec_perm( v1, v2, (vector unsigned char) ( 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31 ) );
478
479 if ( (long) outBuffer & 0x0fL )
480 {
481 /* store output data into a non-aligned buffer */
482 mask = vec_lvsr( 0, outBuffer );
483 cmp = vec_cmpgt( mask, vec_splat_u8( 0x0f ) );
484 v1 = vec_perm( v3, v3, mask );
485 v2 = vec_perm( v4, v4, mask );
486 v3 = vec_ld( 0, (vector unsigned char *) outBuffer );
487 v4 = vec_sel( v3, v1, cmp );
488 vec_st( v4, 0, (vector unsigned char *) outBuffer );
489 v1 = vec_sel( v1, v2, cmp );
490 vec_st( v1, 16, (vector unsigned char *) outBuffer );
491 v3 = vec_ld( 32, (vector unsigned char *) outBuffer );
492 v2 = vec_sel( v2, v3, cmp );
493 vec_st( v2, 32, (vector unsigned char *) outBuffer );
494 } else {
495 // store output data into an aligned buffer
496 vec_st( v3, 0, (vector unsigned char *) outBuffer );
497 vec_st( v4, 16, (vector unsigned char *) outBuffer );
498 }
499 return;
500 }
501
502 void vBlockDecrypt(cipherInstance *cipher, keyInstance *key, BYTE *input, int inputLen, BYTE *outBuffer)
503 {
504 // for vector machines
505 register vector unsigned char v1, v2, v3, v4, mask;
506 register vector bool char cmp;
507 vector unsigned char block[2], cblock[2];
508
509 /* load and align input */
510 v1 = vec_ld( 0, (vector unsigned char *) input );
511 v2 = vec_ld( 16, (vector unsigned char *) input );
512 if ( (long) input & 0x0fL )
513 { // this is required if input is not on a 16-byte boundary
514 v3 = vec_ld( 32, (vector unsigned char *) input );
515 mask = vec_lvsl( 0, input );
516 v1 = vec_perm( v1, v2, mask );
517 v2 = vec_perm( v2, v3, mask );
518 }
519
520 /* parse input stream into rectangular array */
521 v3 = vec_perm( v1, v2, (vector unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29 ) );
522 v4 = vec_perm( v1, v2, (vector unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31 ) );
523 block[0] = v3;
524 block[1] = v4;
525
526 /* save a copy of incoming ciphertext for later chain */
527 if (cipher->mode == MODE_CBC) {
528 cblock[0] = v3;
529 cblock[1] = v4;
530 }
531
532 vRijndaelDecrypt ((vector unsigned char *) block, key->keyLen, cipher->blockLen, (vector unsigned char *) key->keySched);
533
534 v1 = block[0];
535 v2 = block[1];
536
537 /* exor with last ciphertext */
538 if (cipher->mode == MODE_CBC) {
539 v1 = vec_xor( v1, *((vector unsigned char *) cipher->chainBlock) );
540 v2 = vec_xor( v2, *((vector unsigned char *) cipher->chainBlock + 1) );
541 vec_st( cblock[0], 0, (vector unsigned char *) cipher->chainBlock );
542 vec_st( cblock[1], 16, (vector unsigned char *) cipher->chainBlock );
543 }
544
545 /* parse rectangular array into output ciphertext bytes */
546 v3 = vec_perm( v1, v2, (vector unsigned char) ( 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27 ) );
547 v4 = vec_perm( v1, v2, (vector unsigned char) ( 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31 ) );
548
549 if ( (long) outBuffer & 0x0fL )
550 { /* store output data into a non-aligned buffer */
551 mask = vec_lvsr( 0, outBuffer );
552 cmp = vec_cmpgt( mask, vec_splat_u8( 0x0f ) );
553 v1 = vec_perm( v3, v3, mask );
554 v2 = vec_perm( v4, v4, mask );
555 v3 = vec_ld( 0, (vector unsigned char *) outBuffer );
556 v4 = vec_sel( v3, v1, cmp );
557 vec_st( v4, 0, (vector unsigned char *) outBuffer );
558 v1 = vec_sel( v1, v2, cmp );
559 vec_st( v1, 16, (vector unsigned char *) outBuffer );
560 v3 = vec_ld( 32, (vector unsigned char *) outBuffer );
561 v2 = vec_sel( v2, v3, cmp );
562 vec_st( v2, 32, (vector unsigned char *) outBuffer );
563 } else {
564 // store output data into an aligned buffer
565 vec_st( v3, 0, (vector unsigned char *) outBuffer );
566 vec_st( v4, 16, (vector unsigned char *) outBuffer );
567 }
568 }
569 #endif /* Murley's code, to be deleted */
570
571 /*
572 * dmitch addenda 4/11/2001: 128-bit only encrypt/decrypt with no CBC
573 */
574 void vBlockEncrypt128(
575 keyInstance *key,
576 BYTE *input,
577 BYTE *outBuffer)
578 {
579 vector unsigned char block[2];
580 register vector unsigned char v1, v2;
581
582 if ( (long) input & 0x0fL ) {
583 BYTE localBuf[16];
584 vdprintf(("vBlockEncrypt128: unaligned input\n"));
585 /* manually re-align - the compiler is supposed to 16-byte align this for us */
586 if((unsigned)localBuf & 0xf) {
587 vdprintf(("vBlockEncrypt128: unaligned localBuf!\n"));
588 }
589 memmove(localBuf, input, 16);
590 v1 = vec_ld(0, (vector unsigned char *)localBuf);
591 }
592 else {
593 vdprintf(("vBlockEncrypt128: aligned input\n"));
594 v1 = vec_ld( 0, (vector unsigned char *) input );
595 }
596
597 /* parse input stream into rectangular array */
598 /* FIXME - do we need to zero v2 (or something)? */
599 block[0] = vec_perm(v1, v2,
600 (vector unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1,
601 5, 9, 13, 17, 21, 25, 29 ) );
602 block[1] = vec_perm( v1, v2,
603 (vector unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3,
604 7, 11, 15, 19, 23, 27, 31 ) );
605
606 vRijndaelEncrypt(block, key->keyLen, 128, (vector unsigned char *) key->keySched);
607
608 /* parse rectangular array into output ciphertext bytes */
609 v1 = vec_perm(block[0], block[1],
610 (vector unsigned char) ( 0, 8, 16, 24, 1, 9, 17, 25, 2,
611 10, 18, 26, 3, 11, 19, 27 ) );
612 v2 = vec_perm(block[0], block[1],
613 (vector unsigned char) ( 4, 12, 20, 28, 5, 13, 21, 29, 6,
614 14, 22, 30, 7, 15, 23, 31 ) );
615
616 if ( (long) outBuffer & 0x0fL )
617 {
618 /* store output data into a non-aligned buffer */
619 BYTE localBuf[16];
620 vec_st(v1, 0, (vector unsigned char *) localBuf );
621 memmove(outBuffer, localBuf, 16);
622 } else {
623 /* store output data into an aligned buffer */
624 vec_st( v1, 0, (vector unsigned char *) outBuffer );
625 }
626 return;
627 }
628
629 void vBlockDecrypt128(
630 keyInstance *key,
631 BYTE *input,
632 BYTE *outBuffer)
633 {
634 vector unsigned char block[2];
635 register vector unsigned char v1, v2;
636
637 if ( (long) input & 0x0fL ) {
638 /* manually re-align - the compiler is supposed to 16-byte align this for us */
639 BYTE localBuf[16];
640 vdprintf(("vBlockDecrypt128: unaligned input\n"));
641 if((unsigned)localBuf & 0xf) {
642 vdprintf(("vBlockDecrypt128: unaligned localBuf!\n"));
643 }
644 memmove(localBuf, input, 16);
645 v1 = vec_ld(0, (vector unsigned char *)localBuf);
646 }
647 else {
648 vdprintf(("vBlockDecrypt128: aligned input\n"));
649 v1 = vec_ld( 0, (vector unsigned char *) input );
650 }
651
652 /* parse input stream into rectangular array */
653 /* FIXME - do we need to zero v2 (or something)? */
654 block[0] = vec_perm(v1, v2,
655 (vector unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1,
656 5, 9, 13, 17, 21, 25, 29 ) );
657 block[1] = vec_perm( v1, v2,
658 (vector unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3,
659 7, 11, 15, 19, 23, 27, 31 ) );
660
661 vRijndaelDecrypt(block, key->keyLen, 128, (vector unsigned char *) key->keySched);
662
663 /* parse rectangular array into output ciphertext bytes */
664 v1 = vec_perm(block[0], block[1],
665 (vector unsigned char) ( 0, 8, 16, 24, 1, 9, 17, 25, 2,
666 10, 18, 26, 3, 11, 19, 27 ) );
667 v2 = vec_perm(block[0], block[1],
668 (vector unsigned char) ( 4, 12, 20, 28, 5, 13, 21, 29, 6,
669 14, 22, 30, 7, 15, 23, 31 ) );
670
671 if ( (long) outBuffer & 0x0fL ) {
672 /* store output data into a non-aligned buffer */
673 BYTE localBuf[16];
674 vec_st(v1, 0, (vector unsigned char *) localBuf );
675 memmove(outBuffer, localBuf, 16);
676 } else {
677 /* store output data into an aligned buffer */
678 vec_st( v1, 0, (vector unsigned char *) outBuffer );
679 }
680 return;
681 }
682
683 #endif /* defined(__ppc__) && defined(ALTIVEC_ENABLE) */