]> git.saurik.com Git - apple/security.git/blob - libsecurity_apple_csp/lib/vRijndael-alg-ref.c
Security-55471.14.18.tar.gz
[apple/security.git] / libsecurity_apple_csp / lib / vRijndael-alg-ref.c
1 /*
2 * Copyright (c) 2000-2001 Apple Computer, Inc. All Rights Reserved.
3 *
4 * The contents of this file constitute Original Code as defined in and are
5 * subject to the Apple Public Source License Version 1.2 (the 'License').
6 * You may not use this file except in compliance with the License. Please obtain
7 * a copy of the License at http://www.apple.com/publicsource and read it before
8 * using this file.
9 *
10 * This Original Code and all software distributed under the License are
11 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS
12 * OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, INCLUDING WITHOUT
13 * LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
14 * PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. Please see the License for the
15 * specific language governing rights and limitations under the License.
16 */
17
18
19 /*
20 * vRijndael-alg-ref.c
21 *
22 * Created by Robert A. Murley on Mon Jan 22 2001.
23 * Copyright (c) 2001 Apple Computer, Inc. All rights reserved.
24 *
25 */
26
27 #include "rijndaelApi.h"
28 #include "rijndael-alg-ref.h"
29 #include "boxes-ref.h"
30 #include <string.h>
31
32 /* debugger seems to have trouble with this code... */
33 #define VAES_DEBUG 1
34 #if VAES_DEBUG
35 #include <stdio.h>
36 #define vdprintf(s) printf s
37 #else
38 #define vdprintf(s)
39 #endif
40
41 #define SC ((BC - 4) >> 1)
42
43 #if defined(__ppc__) && defined(ALTIVEC_ENABLE)
44
45 typedef union {
46 unsigned char s[4][8];
47 unsigned long l[8];
48 vector unsigned char v[2];
49 } doubleVec;
50
51 typedef union {
52 unsigned long s[4];
53 vector unsigned long v;
54 } vecLong;
55
56 static word8 shifts[3][4][2] = {
57 { { 0, 0 },
58 { 1, 3 },
59 { 2, 2 },
60 { 3, 1 }
61 },
62 { { 0, 0 },
63 { 1, 5 },
64 { 2, 4 },
65 { 3, 3 }
66 },
67 { { 0, 0 },
68 { 1, 7 },
69 { 3, 5 },
70 { 4, 4 }
71 }
72 };
73
74 int vRijndaelKeySched ( vector unsigned char vk[2], int keyBits, int blockBits,
75 unsigned char W[MAXROUNDS+1][4][MAXBC])
76 {
77 /* Calculate the necessary round keys
78 * The number of calculations depends on keyBits and blockBits
79 */
80 int KC, BC, ROUNDS;
81 int i, j, t, rconpointer = 0;
82 doubleVec tk;
83 register vector unsigned char v1, v2, mask;
84
85 switch (keyBits) {
86 case 128: KC = 4; break;
87 case 192: KC = 6; break;
88 case 256: KC = 8; break;
89 default : return (-1);
90 }
91
92 switch (blockBits) {
93 case 128: BC = 4; break;
94 case 192: BC = 6; break;
95 case 256: BC = 8; break;
96 default : return (-2);
97 }
98
99 switch (keyBits >= blockBits ? keyBits : blockBits) {
100 case 128: ROUNDS = 10; break;
101 case 192: ROUNDS = 12; break;
102 case 256: ROUNDS = 14; break;
103 default : return (-3); /* this cannot happen */
104 }
105
106 tk.v[0] = vk[0];
107 tk.v[1] = vk[1];
108
109 t = 0;
110 /* copy values into round key array */
111 for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++)
112 for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk.s[i][j];
113
114 while (t < (ROUNDS+1)*BC) { /* while not enough round key material calculated */
115 /* calculate new values */
116 for(i = 0; i < 4; i++)
117 tk.s[i][0] ^= *((word8 *)S + tk.s[(i+1)%4][KC-1]);
118 tk.s[0][0] ^= rcon[rconpointer++];
119
120 if (KC != 8) {
121 /* xor bytes 1-7 of each row with previous byte */
122 mask = (vector unsigned char) ( 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff );
123 for ( i = 0; i < 2; i++ ) {
124 v1 = vec_sld( tk.v[i], tk.v[i], 15 );
125 v2 = vec_and( v1, mask );
126 tk.v[i] = vec_xor( tk.v[i], v2 );
127 }
128 }
129 else {
130 /* xor bytes 1-3 of each row with previous byte */
131 mask = (vector unsigned char) ( 0, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0, 0, 0, 0 );
132 for ( i = 0; i < 2; i++ ) {
133 v1 = vec_sld( tk.v[i], tk.v[i], 15 );
134 v2 = vec_and( v1, mask );
135 tk.v[i] = vec_xor( tk.v[i], v2 );
136 for(j = 0; j < 4; j++) tk.s[i][KC/2] ^= *((word8 *)S + tk.s[i][KC/2 - 1]);
137 /* xor bytes 5-7 of each row with previous byte */
138 mask = vec_sld( mask, mask, 4 );
139 v2 = vec_and( v1, mask );
140 tk.v[i] = vec_xor( tk.v[i], v2 );
141 mask = vec_sld( mask, mask, 4 );
142 }
143 }
144 /* copy values into round key array */
145 for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++)
146 for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk.s[i][j];
147 }
148 return 0;
149 }
150
151
152 void vMakeKey(BYTE *keyMaterial, keyInstance *key)
153 {
154 register vector unsigned char v1, v2, v3, mask;
155 vector unsigned char vk[2];
156
157 /* load and align input */
158 v1 = vec_ld( 0, (vector unsigned char *) keyMaterial );
159 v2 = vec_ld( 16, (vector unsigned char *) keyMaterial );
160 if ( (long) keyMaterial & 0x0fL )
161 { // this is required if keyMaterial is not on a 16-byte boundary
162 v3 = vec_ld( 32, (vector unsigned char *) keyMaterial );
163 mask = vec_lvsl( 0, keyMaterial );
164 v1 = vec_perm( v1, v2, mask );
165 v2 = vec_perm( v2, v3, mask );
166 }
167
168 /* parse input stream into rectangular array */
169 vk[0] = vec_perm( v1, v2, (vector unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29 ) );
170 vk[1] = vec_perm( v1, v2, (vector unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31 ) );
171 vRijndaelKeySched (vk, key->keyLen, key->blockLen, key->keySched);
172 memset( (char *) vk, 0, 4 * MAXKC);
173 }
174
175
176 /* This routine does 16 simultaneous lookups in a 256-byte table. */
177 vector unsigned char rimskyKorsakov ( vector unsigned char v, vector unsigned char * table )
178 {
179 register vector unsigned char upperBits000, upperBits001, upperBits010, upperBits011,
180 upperBits100, upperBits101, upperBits110, upperBits111,
181 lookupBit00, lookupBit01, lookupBit10, lookupBit11,
182 lookupBit0, lookupBit1, lookup,
183 maskForBit6, maskForBit7, maskForBit8, seven;
184 register vector unsigned char *tabeven, *tabodd;
185
186 seven = vec_splat_u8 ( 7 );
187 tabeven = table++;
188 tabodd = table;
189
190 // Each variable contains the correct values for the corresponding bits 6, 7 and 8.
191 upperBits000 = vec_perm ( *tabeven, *tabodd, v );
192 tabeven += 2; tabodd += 2;
193 upperBits001 = vec_perm ( *tabeven, *tabodd, v );
194 tabeven += 2; tabodd += 2;
195 upperBits010 = vec_perm ( *tabeven, *tabodd, v );
196 tabeven += 2; tabodd += 2;
197 upperBits011 = vec_perm ( *tabeven, *tabodd, v );
198 tabeven += 2; tabodd += 2;
199 upperBits100 = vec_perm ( *tabeven, *tabodd, v );
200 tabeven += 2; tabodd += 2;
201 upperBits101 = vec_perm ( *tabeven, *tabodd, v );
202 tabeven += 2; tabodd += 2;
203 upperBits110 = vec_perm ( *tabeven, *tabodd, v );
204 tabeven += 2; tabodd += 2;
205 upperBits111 = vec_perm ( *tabeven, *tabodd, v );
206
207 // Here we extract all the correct values for bit 6.
208 maskForBit6 = vec_sl ( v, vec_splat_u8 ( 2 ) );
209 maskForBit6 = vec_sra ( maskForBit6, seven );
210 lookupBit00 = vec_sel ( upperBits000, upperBits001, maskForBit6 );
211 lookupBit01 = vec_sel ( upperBits010, upperBits011, maskForBit6 );
212 lookupBit10 = vec_sel ( upperBits100, upperBits101, maskForBit6 );
213 lookupBit11 = vec_sel ( upperBits110, upperBits111, maskForBit6 );
214
215 // Then we get the correct values for bit 7.
216 maskForBit7 = vec_sl ( v, vec_splat_u8 ( 1 ) );
217 maskForBit7 = vec_sra ( maskForBit7, seven );
218 lookupBit0 = vec_sel ( lookupBit00, lookupBit01, maskForBit7 );
219 lookupBit1 = vec_sel ( lookupBit10, lookupBit11, maskForBit7 );
220
221 // Finally, the entire correct result vector.
222 maskForBit8 = vec_sra ( v, seven );
223
224 lookup = vec_sel ( lookupBit0, lookupBit1, maskForBit8 );
225
226 return lookup;
227 }
228
229 vector unsigned char vmul(vector unsigned char a, vector unsigned char b)
230 {
231 register vector unsigned char x, y, zero;
232 register vector unsigned short xh, yh, zhi, zlo, two54, two55;
233
234 zero = vec_splat_u8( 0 );
235 two55 = vec_splat_u16( -1 );
236 two55 = (vector unsigned short) vec_mergeh( zero, (vector unsigned char) two55 );
237 two54 = vec_sub( two55, vec_splat_u16( 1 ) );
238
239 x = rimskyKorsakov( a, (vector unsigned char *)Logtable ); // Logtable[a]
240 y = rimskyKorsakov( b, (vector unsigned char *)Logtable ); // Logtable[b]
241
242 // Convert upper 8 bytes to shorts for addition ond modulo
243 xh = (vector unsigned short) vec_mergeh( zero, x );
244 yh = (vector unsigned short) vec_mergeh( zero, y );
245 xh = vec_add( xh, yh ); // xh = Logtable[a] + Logtable[b]
246 yh = vec_sub( xh, two55 );
247 zhi = vec_sel( xh, yh, vec_cmpgt( xh, two54 ) ); // xh%255
248
249 // Convert lower 8 bytes to shorts for addition ond modulo
250 xh = (vector unsigned short) vec_mergel( zero, x );
251 yh = (vector unsigned short) vec_mergel( zero, y );
252 xh = vec_add( xh, yh );
253 yh = vec_sub( xh, two55 );
254 zlo = vec_sel( xh, yh, vec_cmpgt( xh, two54 ) );
255
256 x = vec_pack( zhi, zlo ); // recombine into single byte vector
257 x = rimskyKorsakov( x, (vector unsigned char *)Alogtable ); // Alogtable[x]
258 x = vec_sel( x, zero, vec_cmpeq( a, zero ) ); // check a = 0
259 x = vec_sel( x, zero, vec_cmpeq( b, zero ) ); // check b = 0
260 return x;
261 }
262
263 void vKeyAddition(vector unsigned char v[2], vector unsigned char rk[2])
264 {
265 v[0] = vec_xor( v[0], rk[0] ); // first vector contains rows 0 and 1
266 v[1] = vec_xor( v[1], rk[1] ); // second vector contains rows 2 and 3
267 }
268
269
270 void vShiftRow(vector unsigned char v[2], word8 d, word8 BC)
271 {
272 vecLong sh;
273 register vector unsigned char mask, mask1, t;
274 register vector bool char c;
275 register int i, j;
276
277 sh.s[0] = 0;
278 for (i = 1; i < 4; i++)
279 sh.s[i] = shifts[SC][i][d] % BC; // contains the number of elements to shift each row
280
281 // each vector contains two BC-byte long rows
282 j = 0;
283 for ( i = 0; i < 2; i++ ) {
284 mask = vec_lvsl( 0, (int *) sh.s[j++]); // mask for even row
285 mask1 = vec_lvsl( 0, (int *) sh.s[j++]); // mask for odd row
286 if (BC == 4) {
287 mask = vec_sld( mask, mask1, 8 ); // combined rotation mask for both rows
288 mask = vec_and( mask, vec_splat_u8( 3 ) );
289 } else if (BC == 6) {
290 mask = vec_sld( mask, mask, 8 );
291 mask = vec_sld( mask, mask1, 8 ); // combined rotation mask for both rows
292 t = vec_sub( mask, vec_splat_u8( 6 ) );
293 c = vec_cmpgt( mask, vec_splat_u8( 5 ) );
294 mask = vec_sel( mask, t, c );
295 } else {
296 mask = vec_sld( mask, mask1, 8 ); // combined rotation mask for both rows
297 mask = vec_and( mask, vec_splat_u8( 7 ) );
298 }
299 mask1 = vec_sld( vec_splat_u8( 0 ), vec_splat_u8( 8 ), 8 );
300 mask = vec_add( mask, mask1 );
301 v[i] = vec_perm( v[i], v[i], mask ); // rotate each row as required
302 }
303 }
304
305 void vSubstitution( vector unsigned char v[2], vector unsigned char box[16] )
306 {
307 v[0] = rimskyKorsakov( v[0], box ); // first vector contains rows 0 and 1
308 v[1] = rimskyKorsakov( v[1], box ); // second vector contains rows 2 and 3
309 }
310
311 void vMixColumn(vector unsigned char v[2])
312 {
313 // vector 0 contains row 0 in bytes 0-7 and row 1 in bytes 8-f
314 // vector 1 contains row 2 in bytes 0-7 and row 3 in bytes 8-f
315
316 register vector unsigned char a0, a1, a2, a3, b0, b1, b2, b3;
317 register vector unsigned char two, three;
318
319 two = vec_splat_u8( 2 );
320 three = vec_splat_u8( 3 );
321
322 a1 = vec_sld( v[0], v[1], 8 ); // equivalent to a[i+1] % 4
323 b1 = vec_sld( v[1], v[0], 8 );
324 a2 = vec_sld( a1, b1, 8 ); // equivalent to a[i+2] % 4
325 b2 = vec_sld( b1, a1, 8 );
326 a3 = vec_sld( a2, b2, 8 ); // equivalent to a[i+3] % 4
327 b3 = vec_sld( b2, a2, 8 );
328
329 // Calculations for rows 0 and 1
330 a0 = vmul( two, v[0] ); // mul(2,a[i][j])
331 a0 = vec_xor( a0, vmul( three, a1 ) ); // ^ mul(3,a[(i + 1) % 4][j])
332 a0 = vec_xor( a0, a2 ); // ^ a[(i + 2) % 4][j]
333 v[0] = vec_xor( a0, a3 ); // ^ a[(i + 3) % 4][j]
334
335 // Calculations for rows 2 and 3
336 b0 = vmul( two, v[1] );
337 b0 = vec_xor( b0, vmul( three, b1 ) );
338 b0 = vec_xor( b0, b2 );
339 v[1] = vec_xor( b0, b3 );
340 }
341
342 void vInvMixColumn(vector unsigned char v[2])
343 {
344 // vector 0 contains row 0 in bytes 0-7 and row 1 in bytes 8-f
345 // vector 1 contains row 2 in bytes 0-7 and row 3 in bytes 8-f
346
347 register vector unsigned char a0, a1, a2, a3, b0, b1, b2, b3;
348 register vector unsigned char nine, eleven, thirteen, fourteen;;
349
350 nine = vec_splat_u8( 0x9 );
351 eleven = vec_splat_u8( 0xb );
352 thirteen = vec_splat_u8( 0xd );
353 fourteen = vec_splat_u8( 0xe );
354
355 a1 = vec_sld( v[0], v[1], 8 ); // equivalent to a[i+1] % 4
356 b1 = vec_sld( v[1], v[0], 8 );
357 a2 = vec_sld( a1, b1, 8 ); // equivalent to a[i+2] % 4
358 b2 = vec_sld( b1, a1, 8 );
359 a3 = vec_sld( a2, b2, 8 ); // equivalent to a[i+3] % 4
360 b3 = vec_sld( b2, a2, 8 );
361
362 // Calculations for rows 0 and 1
363 a0 = vmul( fourteen, v[0] ); // mul(0xe,a[i][j])
364 a0 = vec_xor( a0, vmul( eleven, a1 ) ); // ^ mul(0xb,a[(i + 1) % 4][j])
365 a0 = vec_xor( a0, vmul( thirteen, a2 ) ); // ^ mul(0xd,a[(i + 2) % 4][j])
366 v[0] = vec_xor( a0, vmul( nine, a3 ) ); // ^ mul(0x9,a[(i + 3) % 4][j])
367
368 // Calculations for rows 2 and 3
369 b0 = vmul( fourteen, v[1] );
370 b0 = vec_xor( b0, vmul( eleven, b1 ) );
371 b0 = vec_xor( b0, vmul( thirteen, b2 ) );
372 v[1] = vec_xor( b0, vmul( nine, b3 ) );
373 }
374
375 int vRijndaelEncrypt (vector unsigned char a[2], int keyBits, int blockBits, vector unsigned char rk[MAXROUNDS+1][2])
376 {
377 /* Encryption of one block.
378 */
379 int r, BC, ROUNDS;
380
381 switch (blockBits) {
382 case 128: BC = 4; break;
383 case 192: BC = 6; break;
384 case 256: BC = 8; break;
385 default : return (-2);
386 }
387
388 switch (keyBits >= blockBits ? keyBits : blockBits) {
389 case 128: ROUNDS = 10; break;
390 case 192: ROUNDS = 12; break;
391 case 256: ROUNDS = 14; break;
392 default : return (-3); /* this cannot happen */
393 }
394
395 vKeyAddition( a, rk[0] );
396 for(r = 1; r < ROUNDS; r++) {
397 vSubstitution( a, (vector unsigned char *)S);
398 vShiftRow( a, 0, BC);
399 vMixColumn( a );
400 vKeyAddition( a, rk[r] );
401 }
402 vSubstitution( a, (vector unsigned char *)S);
403 vShiftRow( a, 0, BC);
404 vKeyAddition( a, rk[ROUNDS] );
405
406 return 0;
407 }
408
409 int vRijndaelDecrypt (vector unsigned char a[2], int keyBits, int blockBits, vector unsigned char rk[MAXROUNDS+1][2])
410 {
411 int r, BC, ROUNDS;
412
413 switch (blockBits) {
414 case 128: BC = 4; break;
415 case 192: BC = 6; break;
416 case 256: BC = 8; break;
417 default : return (-2);
418 }
419
420 switch (keyBits >= blockBits ? keyBits : blockBits) {
421 case 128: ROUNDS = 10; break;
422 case 192: ROUNDS = 12; break;
423 case 256: ROUNDS = 14; break;
424 default : return (-3); /* this cannot happen */
425 }
426
427 vKeyAddition( a, rk[ROUNDS] );
428 vSubstitution( a, (vector unsigned char *)Si);
429 vShiftRow( a, 1, BC);
430 for(r = ROUNDS-1; r > 0; r--) {
431 vKeyAddition( a, rk[r] );
432 vInvMixColumn( a );
433 vSubstitution( a, (vector unsigned char *)Si);
434 vShiftRow( a, 1, BC);
435 }
436 vKeyAddition( a, rk[0] );
437
438 return 0;
439 }
440
441 #if 0
442 /* Murley's code, to be deleted */
443 void vBlockEncrypt(cipherInstance *cipher, keyInstance *key, BYTE *input, int inputLen, BYTE *outBuffer)
444 {
445 register vector unsigned char v1, v2, v3, v4, mask;
446 register vector bool char cmp;
447
448 /* load and align input */
449 v1 = vec_ld( 0, (vector unsigned char *) input );
450 v2 = vec_ld( 16, (vector unsigned char *) input );
451 if ( (long) input & 0x0fL )
452 { // this is required if input is not on a 16-byte boundary
453 v3 = vec_ld( 32, (vector unsigned char *) input );
454 mask = vec_lvsl( 0, input );
455 v1 = vec_perm( v1, v2, mask );
456 v2 = vec_perm( v2, v3, mask );
457 }
458
459 /* parse input stream into rectangular array */
460 v3 = vec_perm( v1, v2, (vector unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29 ) );
461 v4 = vec_perm( v1, v2, (vector unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31 ) );
462
463 /* store into cipher structure */
464 if (cipher->mode == MODE_CBC) {
465 v3 = vec_xor( v3, *((vector unsigned char *) cipher->chainBlock ) );
466 v4 = vec_xor( v4, *((vector unsigned char *) cipher->chainBlock + 1 ) );
467 }
468 vec_st( v3, 0, (vector unsigned char *) cipher->chainBlock );
469 vec_st( v4, 16, (vector unsigned char *) cipher->chainBlock );
470
471 vRijndaelEncrypt((vector unsigned char *) cipher->chainBlock, key->keyLen, cipher->blockLen, (vector unsigned char *) key->keySched);
472
473 v1 = vec_ld( 0, (vector unsigned char *) cipher->chainBlock );
474 v2 = vec_ld( 16, (vector unsigned char *) cipher->chainBlock );
475
476 /* parse rectangular array into output ciphertext bytes */
477 v3 = vec_perm( v1, v2, (vector unsigned char) ( 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27 ) );
478 v4 = vec_perm( v1, v2, (vector unsigned char) ( 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31 ) );
479
480 if ( (long) outBuffer & 0x0fL )
481 {
482 /* store output data into a non-aligned buffer */
483 mask = vec_lvsr( 0, outBuffer );
484 cmp = vec_cmpgt( mask, vec_splat_u8( 0x0f ) );
485 v1 = vec_perm( v3, v3, mask );
486 v2 = vec_perm( v4, v4, mask );
487 v3 = vec_ld( 0, (vector unsigned char *) outBuffer );
488 v4 = vec_sel( v3, v1, cmp );
489 vec_st( v4, 0, (vector unsigned char *) outBuffer );
490 v1 = vec_sel( v1, v2, cmp );
491 vec_st( v1, 16, (vector unsigned char *) outBuffer );
492 v3 = vec_ld( 32, (vector unsigned char *) outBuffer );
493 v2 = vec_sel( v2, v3, cmp );
494 vec_st( v2, 32, (vector unsigned char *) outBuffer );
495 } else {
496 // store output data into an aligned buffer
497 vec_st( v3, 0, (vector unsigned char *) outBuffer );
498 vec_st( v4, 16, (vector unsigned char *) outBuffer );
499 }
500 return;
501 }
502
503 void vBlockDecrypt(cipherInstance *cipher, keyInstance *key, BYTE *input, int inputLen, BYTE *outBuffer)
504 {
505 // for vector machines
506 register vector unsigned char v1, v2, v3, v4, mask;
507 register vector bool char cmp;
508 vector unsigned char block[2], cblock[2];
509
510 /* load and align input */
511 v1 = vec_ld( 0, (vector unsigned char *) input );
512 v2 = vec_ld( 16, (vector unsigned char *) input );
513 if ( (long) input & 0x0fL )
514 { // this is required if input is not on a 16-byte boundary
515 v3 = vec_ld( 32, (vector unsigned char *) input );
516 mask = vec_lvsl( 0, input );
517 v1 = vec_perm( v1, v2, mask );
518 v2 = vec_perm( v2, v3, mask );
519 }
520
521 /* parse input stream into rectangular array */
522 v3 = vec_perm( v1, v2, (vector unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29 ) );
523 v4 = vec_perm( v1, v2, (vector unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31 ) );
524 block[0] = v3;
525 block[1] = v4;
526
527 /* save a copy of incoming ciphertext for later chain */
528 if (cipher->mode == MODE_CBC) {
529 cblock[0] = v3;
530 cblock[1] = v4;
531 }
532
533 vRijndaelDecrypt ((vector unsigned char *) block, key->keyLen, cipher->blockLen, (vector unsigned char *) key->keySched);
534
535 v1 = block[0];
536 v2 = block[1];
537
538 /* exor with last ciphertext */
539 if (cipher->mode == MODE_CBC) {
540 v1 = vec_xor( v1, *((vector unsigned char *) cipher->chainBlock) );
541 v2 = vec_xor( v2, *((vector unsigned char *) cipher->chainBlock + 1) );
542 vec_st( cblock[0], 0, (vector unsigned char *) cipher->chainBlock );
543 vec_st( cblock[1], 16, (vector unsigned char *) cipher->chainBlock );
544 }
545
546 /* parse rectangular array into output ciphertext bytes */
547 v3 = vec_perm( v1, v2, (vector unsigned char) ( 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27 ) );
548 v4 = vec_perm( v1, v2, (vector unsigned char) ( 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31 ) );
549
550 if ( (long) outBuffer & 0x0fL )
551 { /* store output data into a non-aligned buffer */
552 mask = vec_lvsr( 0, outBuffer );
553 cmp = vec_cmpgt( mask, vec_splat_u8( 0x0f ) );
554 v1 = vec_perm( v3, v3, mask );
555 v2 = vec_perm( v4, v4, mask );
556 v3 = vec_ld( 0, (vector unsigned char *) outBuffer );
557 v4 = vec_sel( v3, v1, cmp );
558 vec_st( v4, 0, (vector unsigned char *) outBuffer );
559 v1 = vec_sel( v1, v2, cmp );
560 vec_st( v1, 16, (vector unsigned char *) outBuffer );
561 v3 = vec_ld( 32, (vector unsigned char *) outBuffer );
562 v2 = vec_sel( v2, v3, cmp );
563 vec_st( v2, 32, (vector unsigned char *) outBuffer );
564 } else {
565 // store output data into an aligned buffer
566 vec_st( v3, 0, (vector unsigned char *) outBuffer );
567 vec_st( v4, 16, (vector unsigned char *) outBuffer );
568 }
569 }
570 #endif /* Murley's code, to be deleted */
571
572 /*
573 * dmitch addenda 4/11/2001: 128-bit only encrypt/decrypt with no CBC
574 */
575 void vBlockEncrypt128(
576 keyInstance *key,
577 BYTE *input,
578 BYTE *outBuffer)
579 {
580 vector unsigned char block[2];
581 register vector unsigned char v1, v2;
582
583 if ( (long) input & 0x0fL ) {
584 BYTE localBuf[16];
585 vdprintf(("vBlockEncrypt128: unaligned input\n"));
586 /* manually re-align - the compiler is supposed to 16-byte align this for us */
587 if((unsigned)localBuf & 0xf) {
588 vdprintf(("vBlockEncrypt128: unaligned localBuf!\n"));
589 }
590 memmove(localBuf, input, 16);
591 v1 = vec_ld(0, (vector unsigned char *)localBuf);
592 }
593 else {
594 vdprintf(("vBlockEncrypt128: aligned input\n"));
595 v1 = vec_ld( 0, (vector unsigned char *) input );
596 }
597
598 /* parse input stream into rectangular array */
599 /* FIXME - do we need to zero v2 (or something)? */
600 block[0] = vec_perm(v1, v2,
601 (vector unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1,
602 5, 9, 13, 17, 21, 25, 29 ) );
603 block[1] = vec_perm( v1, v2,
604 (vector unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3,
605 7, 11, 15, 19, 23, 27, 31 ) );
606
607 vRijndaelEncrypt(block, key->keyLen, 128, (vector unsigned char *) key->keySched);
608
609 /* parse rectangular array into output ciphertext bytes */
610 v1 = vec_perm(block[0], block[1],
611 (vector unsigned char) ( 0, 8, 16, 24, 1, 9, 17, 25, 2,
612 10, 18, 26, 3, 11, 19, 27 ) );
613 v2 = vec_perm(block[0], block[1],
614 (vector unsigned char) ( 4, 12, 20, 28, 5, 13, 21, 29, 6,
615 14, 22, 30, 7, 15, 23, 31 ) );
616
617 if ( (long) outBuffer & 0x0fL )
618 {
619 /* store output data into a non-aligned buffer */
620 BYTE localBuf[16];
621 vec_st(v1, 0, (vector unsigned char *) localBuf );
622 memmove(outBuffer, localBuf, 16);
623 } else {
624 /* store output data into an aligned buffer */
625 vec_st( v1, 0, (vector unsigned char *) outBuffer );
626 }
627 return;
628 }
629
630 void vBlockDecrypt128(
631 keyInstance *key,
632 BYTE *input,
633 BYTE *outBuffer)
634 {
635 vector unsigned char block[2];
636 register vector unsigned char v1, v2;
637
638 if ( (long) input & 0x0fL ) {
639 /* manually re-align - the compiler is supposed to 16-byte align this for us */
640 BYTE localBuf[16];
641 vdprintf(("vBlockDecrypt128: unaligned input\n"));
642 if((unsigned)localBuf & 0xf) {
643 vdprintf(("vBlockDecrypt128: unaligned localBuf!\n"));
644 }
645 memmove(localBuf, input, 16);
646 v1 = vec_ld(0, (vector unsigned char *)localBuf);
647 }
648 else {
649 vdprintf(("vBlockDecrypt128: aligned input\n"));
650 v1 = vec_ld( 0, (vector unsigned char *) input );
651 }
652
653 /* parse input stream into rectangular array */
654 /* FIXME - do we need to zero v2 (or something)? */
655 block[0] = vec_perm(v1, v2,
656 (vector unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1,
657 5, 9, 13, 17, 21, 25, 29 ) );
658 block[1] = vec_perm( v1, v2,
659 (vector unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3,
660 7, 11, 15, 19, 23, 27, 31 ) );
661
662 vRijndaelDecrypt(block, key->keyLen, 128, (vector unsigned char *) key->keySched);
663
664 /* parse rectangular array into output ciphertext bytes */
665 v1 = vec_perm(block[0], block[1],
666 (vector unsigned char) ( 0, 8, 16, 24, 1, 9, 17, 25, 2,
667 10, 18, 26, 3, 11, 19, 27 ) );
668 v2 = vec_perm(block[0], block[1],
669 (vector unsigned char) ( 4, 12, 20, 28, 5, 13, 21, 29, 6,
670 14, 22, 30, 7, 15, 23, 31 ) );
671
672 if ( (long) outBuffer & 0x0fL ) {
673 /* store output data into a non-aligned buffer */
674 BYTE localBuf[16];
675 vec_st(v1, 0, (vector unsigned char *) localBuf );
676 memmove(outBuffer, localBuf, 16);
677 } else {
678 /* store output data into an aligned buffer */
679 vec_st( v1, 0, (vector unsigned char *) outBuffer );
680 }
681 return;
682 }
683
684 #endif /* defined(__ppc__) && defined(ALTIVEC_ENABLE) */