]>
git.saurik.com Git - apple/security.git/blob - OSX/libsecurity_apple_csp/lib/vRijndael-alg-ref.c
2 * Copyright (c) 2000-2001,2011,2014 Apple Inc. All Rights Reserved.
4 * The contents of this file constitute Original Code as defined in and are
5 * subject to the Apple Public Source License Version 1.2 (the 'License').
6 * You may not use this file except in compliance with the License. Please obtain
7 * a copy of the License at http://www.apple.com/publicsource and read it before
10 * This Original Code and all software distributed under the License are
11 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS
12 * OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, INCLUDING WITHOUT
13 * LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
14 * PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. Please see the License for the
15 * specific language governing rights and limitations under the License.
22 * Copyright (c) 2001,2011,2014 Apple Inc. All Rights Reserved.
26 #include "rijndaelApi.h"
27 #include "rijndael-alg-ref.h"
28 #include "boxes-ref.h"
31 /* debugger seems to have trouble with this code... */
35 #define vdprintf(s) printf s
40 #define SC ((BC - 4) >> 1)
42 #if defined(__ppc__) && defined(ALTIVEC_ENABLE)
45 unsigned char s
[4][8];
47 vector
unsigned char v
[2];
52 vector
unsigned long v
;
55 static word8 shifts
[3][4][2] = {
73 int vRijndaelKeySched ( vector
unsigned char vk
[2], int keyBits
, int blockBits
,
74 unsigned char W
[MAXROUNDS
+1][4][MAXBC
])
76 /* Calculate the necessary round keys
77 * The number of calculations depends on keyBits and blockBits
80 int i
, j
, t
, rconpointer
= 0;
82 register vector
unsigned char v1
, v2
, mask
;
85 case 128: KC
= 4; break;
86 case 192: KC
= 6; break;
87 case 256: KC
= 8; break;
88 default : return (-1);
92 case 128: BC
= 4; break;
93 case 192: BC
= 6; break;
94 case 256: BC
= 8; break;
95 default : return (-2);
98 switch (keyBits
>= blockBits
? keyBits
: blockBits
) {
99 case 128: ROUNDS
= 10; break;
100 case 192: ROUNDS
= 12; break;
101 case 256: ROUNDS
= 14; break;
102 default : return (-3); /* this cannot happen */
109 /* copy values into round key array */
110 for(j
= 0; (j
< KC
) && (t
< (ROUNDS
+1)*BC
); j
++, t
++)
111 for(i
= 0; i
< 4; i
++) W
[t
/ BC
][i
][t
% BC
] = tk
.s
[i
][j
];
113 while (t
< (ROUNDS
+1)*BC
) { /* while not enough round key material calculated */
114 /* calculate new values */
115 for(i
= 0; i
< 4; i
++)
116 tk
.s
[i
][0] ^= *((word8
*)S
+ tk
.s
[(i
+1)%4
][KC
-1]);
117 tk
.s
[0][0] ^= rcon
[rconpointer
++];
120 /* xor bytes 1-7 of each row with previous byte */
121 mask
= (vector
unsigned char) ( 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff );
122 for ( i
= 0; i
< 2; i
++ ) {
123 v1
= vec_sld( tk
.v
[i
], tk
.v
[i
], 15 );
124 v2
= vec_and( v1
, mask
);
125 tk
.v
[i
] = vec_xor( tk
.v
[i
], v2
);
129 /* xor bytes 1-3 of each row with previous byte */
130 mask
= (vector
unsigned char) ( 0, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0, 0, 0, 0 );
131 for ( i
= 0; i
< 2; i
++ ) {
132 v1
= vec_sld( tk
.v
[i
], tk
.v
[i
], 15 );
133 v2
= vec_and( v1
, mask
);
134 tk
.v
[i
] = vec_xor( tk
.v
[i
], v2
);
135 for(j
= 0; j
< 4; j
++) tk
.s
[i
][KC
/2] ^= *((word8
*)S
+ tk
.s
[i
][KC
/2 - 1]);
136 /* xor bytes 5-7 of each row with previous byte */
137 mask
= vec_sld( mask
, mask
, 4 );
138 v2
= vec_and( v1
, mask
);
139 tk
.v
[i
] = vec_xor( tk
.v
[i
], v2
);
140 mask
= vec_sld( mask
, mask
, 4 );
143 /* copy values into round key array */
144 for(j
= 0; (j
< KC
) && (t
< (ROUNDS
+1)*BC
); j
++, t
++)
145 for(i
= 0; i
< 4; i
++) W
[t
/ BC
][i
][t
% BC
] = tk
.s
[i
][j
];
151 void vMakeKey(BYTE
*keyMaterial
, keyInstance
*key
)
153 register vector
unsigned char v1
, v2
, v3
, mask
;
154 vector
unsigned char vk
[2];
156 /* load and align input */
157 v1
= vec_ld( 0, (vector
unsigned char *) keyMaterial
);
158 v2
= vec_ld( 16, (vector
unsigned char *) keyMaterial
);
159 if ( (long) keyMaterial
& 0x0fL
)
160 { // this is required if keyMaterial is not on a 16-byte boundary
161 v3
= vec_ld( 32, (vector
unsigned char *) keyMaterial
);
162 mask
= vec_lvsl( 0, keyMaterial
);
163 v1
= vec_perm( v1
, v2
, mask
);
164 v2
= vec_perm( v2
, v3
, mask
);
167 /* parse input stream into rectangular array */
168 vk
[0] = vec_perm( v1
, v2
, (vector
unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29 ) );
169 vk
[1] = vec_perm( v1
, v2
, (vector
unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31 ) );
170 vRijndaelKeySched (vk
, key
->keyLen
, key
->blockLen
, key
->keySched
);
171 memset( (char *) vk
, 0, 4 * MAXKC
);
175 /* This routine does 16 simultaneous lookups in a 256-byte table. */
176 vector
unsigned char rimskyKorsakov ( vector
unsigned char v
, vector
unsigned char * table
)
178 register vector
unsigned char upperBits000
, upperBits001
, upperBits010
, upperBits011
,
179 upperBits100
, upperBits101
, upperBits110
, upperBits111
,
180 lookupBit00
, lookupBit01
, lookupBit10
, lookupBit11
,
181 lookupBit0
, lookupBit1
, lookup
,
182 maskForBit6
, maskForBit7
, maskForBit8
, seven
;
183 register vector
unsigned char *tabeven
, *tabodd
;
185 seven
= vec_splat_u8 ( 7 );
189 // Each variable contains the correct values for the corresponding bits 6, 7 and 8.
190 upperBits000
= vec_perm ( *tabeven
, *tabodd
, v
);
191 tabeven
+= 2; tabodd
+= 2;
192 upperBits001
= vec_perm ( *tabeven
, *tabodd
, v
);
193 tabeven
+= 2; tabodd
+= 2;
194 upperBits010
= vec_perm ( *tabeven
, *tabodd
, v
);
195 tabeven
+= 2; tabodd
+= 2;
196 upperBits011
= vec_perm ( *tabeven
, *tabodd
, v
);
197 tabeven
+= 2; tabodd
+= 2;
198 upperBits100
= vec_perm ( *tabeven
, *tabodd
, v
);
199 tabeven
+= 2; tabodd
+= 2;
200 upperBits101
= vec_perm ( *tabeven
, *tabodd
, v
);
201 tabeven
+= 2; tabodd
+= 2;
202 upperBits110
= vec_perm ( *tabeven
, *tabodd
, v
);
203 tabeven
+= 2; tabodd
+= 2;
204 upperBits111
= vec_perm ( *tabeven
, *tabodd
, v
);
206 // Here we extract all the correct values for bit 6.
207 maskForBit6
= vec_sl ( v
, vec_splat_u8 ( 2 ) );
208 maskForBit6
= vec_sra ( maskForBit6
, seven
);
209 lookupBit00
= vec_sel ( upperBits000
, upperBits001
, maskForBit6
);
210 lookupBit01
= vec_sel ( upperBits010
, upperBits011
, maskForBit6
);
211 lookupBit10
= vec_sel ( upperBits100
, upperBits101
, maskForBit6
);
212 lookupBit11
= vec_sel ( upperBits110
, upperBits111
, maskForBit6
);
214 // Then we get the correct values for bit 7.
215 maskForBit7
= vec_sl ( v
, vec_splat_u8 ( 1 ) );
216 maskForBit7
= vec_sra ( maskForBit7
, seven
);
217 lookupBit0
= vec_sel ( lookupBit00
, lookupBit01
, maskForBit7
);
218 lookupBit1
= vec_sel ( lookupBit10
, lookupBit11
, maskForBit7
);
220 // Finally, the entire correct result vector.
221 maskForBit8
= vec_sra ( v
, seven
);
223 lookup
= vec_sel ( lookupBit0
, lookupBit1
, maskForBit8
);
228 vector
unsigned char vmul(vector
unsigned char a
, vector
unsigned char b
)
230 register vector
unsigned char x
, y
, zero
;
231 register vector
unsigned short xh
, yh
, zhi
, zlo
, two54
, two55
;
233 zero
= vec_splat_u8( 0 );
234 two55
= vec_splat_u16( -1 );
235 two55
= (vector
unsigned short) vec_mergeh( zero
, (vector
unsigned char) two55
);
236 two54
= vec_sub( two55
, vec_splat_u16( 1 ) );
238 x
= rimskyKorsakov( a
, (vector
unsigned char *)Logtable
); // Logtable[a]
239 y
= rimskyKorsakov( b
, (vector
unsigned char *)Logtable
); // Logtable[b]
241 // Convert upper 8 bytes to shorts for addition ond modulo
242 xh
= (vector
unsigned short) vec_mergeh( zero
, x
);
243 yh
= (vector
unsigned short) vec_mergeh( zero
, y
);
244 xh
= vec_add( xh
, yh
); // xh = Logtable[a] + Logtable[b]
245 yh
= vec_sub( xh
, two55
);
246 zhi
= vec_sel( xh
, yh
, vec_cmpgt( xh
, two54
) ); // xh%255
248 // Convert lower 8 bytes to shorts for addition ond modulo
249 xh
= (vector
unsigned short) vec_mergel( zero
, x
);
250 yh
= (vector
unsigned short) vec_mergel( zero
, y
);
251 xh
= vec_add( xh
, yh
);
252 yh
= vec_sub( xh
, two55
);
253 zlo
= vec_sel( xh
, yh
, vec_cmpgt( xh
, two54
) );
255 x
= vec_pack( zhi
, zlo
); // recombine into single byte vector
256 x
= rimskyKorsakov( x
, (vector
unsigned char *)Alogtable
); // Alogtable[x]
257 x
= vec_sel( x
, zero
, vec_cmpeq( a
, zero
) ); // check a = 0
258 x
= vec_sel( x
, zero
, vec_cmpeq( b
, zero
) ); // check b = 0
262 void vKeyAddition(vector
unsigned char v
[2], vector
unsigned char rk
[2])
264 v
[0] = vec_xor( v
[0], rk
[0] ); // first vector contains rows 0 and 1
265 v
[1] = vec_xor( v
[1], rk
[1] ); // second vector contains rows 2 and 3
269 void vShiftRow(vector
unsigned char v
[2], word8 d
, word8 BC
)
272 register vector
unsigned char mask
, mask1
, t
;
273 register vector
bool char c
;
277 for (i
= 1; i
< 4; i
++)
278 sh
.s
[i
] = shifts
[SC
][i
][d
] % BC
; // contains the number of elements to shift each row
280 // each vector contains two BC-byte long rows
282 for ( i
= 0; i
< 2; i
++ ) {
283 mask
= vec_lvsl( 0, (int *) sh
.s
[j
++]); // mask for even row
284 mask1
= vec_lvsl( 0, (int *) sh
.s
[j
++]); // mask for odd row
286 mask
= vec_sld( mask
, mask1
, 8 ); // combined rotation mask for both rows
287 mask
= vec_and( mask
, vec_splat_u8( 3 ) );
288 } else if (BC
== 6) {
289 mask
= vec_sld( mask
, mask
, 8 );
290 mask
= vec_sld( mask
, mask1
, 8 ); // combined rotation mask for both rows
291 t
= vec_sub( mask
, vec_splat_u8( 6 ) );
292 c
= vec_cmpgt( mask
, vec_splat_u8( 5 ) );
293 mask
= vec_sel( mask
, t
, c
);
295 mask
= vec_sld( mask
, mask1
, 8 ); // combined rotation mask for both rows
296 mask
= vec_and( mask
, vec_splat_u8( 7 ) );
298 mask1
= vec_sld( vec_splat_u8( 0 ), vec_splat_u8( 8 ), 8 );
299 mask
= vec_add( mask
, mask1
);
300 v
[i
] = vec_perm( v
[i
], v
[i
], mask
); // rotate each row as required
304 void vSubstitution( vector
unsigned char v
[2], vector
unsigned char box
[16] )
306 v
[0] = rimskyKorsakov( v
[0], box
); // first vector contains rows 0 and 1
307 v
[1] = rimskyKorsakov( v
[1], box
); // second vector contains rows 2 and 3
310 void vMixColumn(vector
unsigned char v
[2])
312 // vector 0 contains row 0 in bytes 0-7 and row 1 in bytes 8-f
313 // vector 1 contains row 2 in bytes 0-7 and row 3 in bytes 8-f
315 register vector
unsigned char a0
, a1
, a2
, a3
, b0
, b1
, b2
, b3
;
316 register vector
unsigned char two
, three
;
318 two
= vec_splat_u8( 2 );
319 three
= vec_splat_u8( 3 );
321 a1
= vec_sld( v
[0], v
[1], 8 ); // equivalent to a[i+1] % 4
322 b1
= vec_sld( v
[1], v
[0], 8 );
323 a2
= vec_sld( a1
, b1
, 8 ); // equivalent to a[i+2] % 4
324 b2
= vec_sld( b1
, a1
, 8 );
325 a3
= vec_sld( a2
, b2
, 8 ); // equivalent to a[i+3] % 4
326 b3
= vec_sld( b2
, a2
, 8 );
328 // Calculations for rows 0 and 1
329 a0
= vmul( two
, v
[0] ); // mul(2,a[i][j])
330 a0
= vec_xor( a0
, vmul( three
, a1
) ); // ^ mul(3,a[(i + 1) % 4][j])
331 a0
= vec_xor( a0
, a2
); // ^ a[(i + 2) % 4][j]
332 v
[0] = vec_xor( a0
, a3
); // ^ a[(i + 3) % 4][j]
334 // Calculations for rows 2 and 3
335 b0
= vmul( two
, v
[1] );
336 b0
= vec_xor( b0
, vmul( three
, b1
) );
337 b0
= vec_xor( b0
, b2
);
338 v
[1] = vec_xor( b0
, b3
);
341 void vInvMixColumn(vector
unsigned char v
[2])
343 // vector 0 contains row 0 in bytes 0-7 and row 1 in bytes 8-f
344 // vector 1 contains row 2 in bytes 0-7 and row 3 in bytes 8-f
346 register vector
unsigned char a0
, a1
, a2
, a3
, b0
, b1
, b2
, b3
;
347 register vector
unsigned char nine
, eleven
, thirteen
, fourteen
;;
349 nine
= vec_splat_u8( 0x9 );
350 eleven
= vec_splat_u8( 0xb );
351 thirteen
= vec_splat_u8( 0xd );
352 fourteen
= vec_splat_u8( 0xe );
354 a1
= vec_sld( v
[0], v
[1], 8 ); // equivalent to a[i+1] % 4
355 b1
= vec_sld( v
[1], v
[0], 8 );
356 a2
= vec_sld( a1
, b1
, 8 ); // equivalent to a[i+2] % 4
357 b2
= vec_sld( b1
, a1
, 8 );
358 a3
= vec_sld( a2
, b2
, 8 ); // equivalent to a[i+3] % 4
359 b3
= vec_sld( b2
, a2
, 8 );
361 // Calculations for rows 0 and 1
362 a0
= vmul( fourteen
, v
[0] ); // mul(0xe,a[i][j])
363 a0
= vec_xor( a0
, vmul( eleven
, a1
) ); // ^ mul(0xb,a[(i + 1) % 4][j])
364 a0
= vec_xor( a0
, vmul( thirteen
, a2
) ); // ^ mul(0xd,a[(i + 2) % 4][j])
365 v
[0] = vec_xor( a0
, vmul( nine
, a3
) ); // ^ mul(0x9,a[(i + 3) % 4][j])
367 // Calculations for rows 2 and 3
368 b0
= vmul( fourteen
, v
[1] );
369 b0
= vec_xor( b0
, vmul( eleven
, b1
) );
370 b0
= vec_xor( b0
, vmul( thirteen
, b2
) );
371 v
[1] = vec_xor( b0
, vmul( nine
, b3
) );
374 int vRijndaelEncrypt (vector
unsigned char a
[2], int keyBits
, int blockBits
, vector
unsigned char rk
[MAXROUNDS
+1][2])
376 /* Encryption of one block.
381 case 128: BC
= 4; break;
382 case 192: BC
= 6; break;
383 case 256: BC
= 8; break;
384 default : return (-2);
387 switch (keyBits
>= blockBits
? keyBits
: blockBits
) {
388 case 128: ROUNDS
= 10; break;
389 case 192: ROUNDS
= 12; break;
390 case 256: ROUNDS
= 14; break;
391 default : return (-3); /* this cannot happen */
394 vKeyAddition( a
, rk
[0] );
395 for(r
= 1; r
< ROUNDS
; r
++) {
396 vSubstitution( a
, (vector
unsigned char *)S
);
397 vShiftRow( a
, 0, BC
);
399 vKeyAddition( a
, rk
[r
] );
401 vSubstitution( a
, (vector
unsigned char *)S
);
402 vShiftRow( a
, 0, BC
);
403 vKeyAddition( a
, rk
[ROUNDS
] );
408 int vRijndaelDecrypt (vector
unsigned char a
[2], int keyBits
, int blockBits
, vector
unsigned char rk
[MAXROUNDS
+1][2])
413 case 128: BC
= 4; break;
414 case 192: BC
= 6; break;
415 case 256: BC
= 8; break;
416 default : return (-2);
419 switch (keyBits
>= blockBits
? keyBits
: blockBits
) {
420 case 128: ROUNDS
= 10; break;
421 case 192: ROUNDS
= 12; break;
422 case 256: ROUNDS
= 14; break;
423 default : return (-3); /* this cannot happen */
426 vKeyAddition( a
, rk
[ROUNDS
] );
427 vSubstitution( a
, (vector
unsigned char *)Si
);
428 vShiftRow( a
, 1, BC
);
429 for(r
= ROUNDS
-1; r
> 0; r
--) {
430 vKeyAddition( a
, rk
[r
] );
432 vSubstitution( a
, (vector
unsigned char *)Si
);
433 vShiftRow( a
, 1, BC
);
435 vKeyAddition( a
, rk
[0] );
441 /* Murley's code, to be deleted */
442 void vBlockEncrypt(cipherInstance
*cipher
, keyInstance
*key
, BYTE
*input
, int inputLen
, BYTE
*outBuffer
)
444 register vector
unsigned char v1
, v2
, v3
, v4
, mask
;
445 register vector
bool char cmp
;
447 /* load and align input */
448 v1
= vec_ld( 0, (vector
unsigned char *) input
);
449 v2
= vec_ld( 16, (vector
unsigned char *) input
);
450 if ( (long) input
& 0x0fL
)
451 { // this is required if input is not on a 16-byte boundary
452 v3
= vec_ld( 32, (vector
unsigned char *) input
);
453 mask
= vec_lvsl( 0, input
);
454 v1
= vec_perm( v1
, v2
, mask
);
455 v2
= vec_perm( v2
, v3
, mask
);
458 /* parse input stream into rectangular array */
459 v3
= vec_perm( v1
, v2
, (vector
unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29 ) );
460 v4
= vec_perm( v1
, v2
, (vector
unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31 ) );
462 /* store into cipher structure */
463 if (cipher
->mode
== MODE_CBC
) {
464 v3
= vec_xor( v3
, *((vector
unsigned char *) cipher
->chainBlock
) );
465 v4
= vec_xor( v4
, *((vector
unsigned char *) cipher
->chainBlock
+ 1 ) );
467 vec_st( v3
, 0, (vector
unsigned char *) cipher
->chainBlock
);
468 vec_st( v4
, 16, (vector
unsigned char *) cipher
->chainBlock
);
470 vRijndaelEncrypt((vector
unsigned char *) cipher
->chainBlock
, key
->keyLen
, cipher
->blockLen
, (vector
unsigned char *) key
->keySched
);
472 v1
= vec_ld( 0, (vector
unsigned char *) cipher
->chainBlock
);
473 v2
= vec_ld( 16, (vector
unsigned char *) cipher
->chainBlock
);
475 /* parse rectangular array into output ciphertext bytes */
476 v3
= vec_perm( v1
, v2
, (vector
unsigned char) ( 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27 ) );
477 v4
= vec_perm( v1
, v2
, (vector
unsigned char) ( 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31 ) );
479 if ( (long) outBuffer
& 0x0fL
)
481 /* store output data into a non-aligned buffer */
482 mask
= vec_lvsr( 0, outBuffer
);
483 cmp
= vec_cmpgt( mask
, vec_splat_u8( 0x0f ) );
484 v1
= vec_perm( v3
, v3
, mask
);
485 v2
= vec_perm( v4
, v4
, mask
);
486 v3
= vec_ld( 0, (vector
unsigned char *) outBuffer
);
487 v4
= vec_sel( v3
, v1
, cmp
);
488 vec_st( v4
, 0, (vector
unsigned char *) outBuffer
);
489 v1
= vec_sel( v1
, v2
, cmp
);
490 vec_st( v1
, 16, (vector
unsigned char *) outBuffer
);
491 v3
= vec_ld( 32, (vector
unsigned char *) outBuffer
);
492 v2
= vec_sel( v2
, v3
, cmp
);
493 vec_st( v2
, 32, (vector
unsigned char *) outBuffer
);
495 // store output data into an aligned buffer
496 vec_st( v3
, 0, (vector
unsigned char *) outBuffer
);
497 vec_st( v4
, 16, (vector
unsigned char *) outBuffer
);
502 void vBlockDecrypt(cipherInstance
*cipher
, keyInstance
*key
, BYTE
*input
, int inputLen
, BYTE
*outBuffer
)
504 // for vector machines
505 register vector
unsigned char v1
, v2
, v3
, v4
, mask
;
506 register vector
bool char cmp
;
507 vector
unsigned char block
[2], cblock
[2];
509 /* load and align input */
510 v1
= vec_ld( 0, (vector
unsigned char *) input
);
511 v2
= vec_ld( 16, (vector
unsigned char *) input
);
512 if ( (long) input
& 0x0fL
)
513 { // this is required if input is not on a 16-byte boundary
514 v3
= vec_ld( 32, (vector
unsigned char *) input
);
515 mask
= vec_lvsl( 0, input
);
516 v1
= vec_perm( v1
, v2
, mask
);
517 v2
= vec_perm( v2
, v3
, mask
);
520 /* parse input stream into rectangular array */
521 v3
= vec_perm( v1
, v2
, (vector
unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29 ) );
522 v4
= vec_perm( v1
, v2
, (vector
unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31 ) );
526 /* save a copy of incoming ciphertext for later chain */
527 if (cipher
->mode
== MODE_CBC
) {
532 vRijndaelDecrypt ((vector
unsigned char *) block
, key
->keyLen
, cipher
->blockLen
, (vector
unsigned char *) key
->keySched
);
537 /* exor with last ciphertext */
538 if (cipher
->mode
== MODE_CBC
) {
539 v1
= vec_xor( v1
, *((vector
unsigned char *) cipher
->chainBlock
) );
540 v2
= vec_xor( v2
, *((vector
unsigned char *) cipher
->chainBlock
+ 1) );
541 vec_st( cblock
[0], 0, (vector
unsigned char *) cipher
->chainBlock
);
542 vec_st( cblock
[1], 16, (vector
unsigned char *) cipher
->chainBlock
);
545 /* parse rectangular array into output ciphertext bytes */
546 v3
= vec_perm( v1
, v2
, (vector
unsigned char) ( 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27 ) );
547 v4
= vec_perm( v1
, v2
, (vector
unsigned char) ( 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31 ) );
549 if ( (long) outBuffer
& 0x0fL
)
550 { /* store output data into a non-aligned buffer */
551 mask
= vec_lvsr( 0, outBuffer
);
552 cmp
= vec_cmpgt( mask
, vec_splat_u8( 0x0f ) );
553 v1
= vec_perm( v3
, v3
, mask
);
554 v2
= vec_perm( v4
, v4
, mask
);
555 v3
= vec_ld( 0, (vector
unsigned char *) outBuffer
);
556 v4
= vec_sel( v3
, v1
, cmp
);
557 vec_st( v4
, 0, (vector
unsigned char *) outBuffer
);
558 v1
= vec_sel( v1
, v2
, cmp
);
559 vec_st( v1
, 16, (vector
unsigned char *) outBuffer
);
560 v3
= vec_ld( 32, (vector
unsigned char *) outBuffer
);
561 v2
= vec_sel( v2
, v3
, cmp
);
562 vec_st( v2
, 32, (vector
unsigned char *) outBuffer
);
564 // store output data into an aligned buffer
565 vec_st( v3
, 0, (vector
unsigned char *) outBuffer
);
566 vec_st( v4
, 16, (vector
unsigned char *) outBuffer
);
569 #endif /* Murley's code, to be deleted */
572 * dmitch addenda 4/11/2001: 128-bit only encrypt/decrypt with no CBC
574 void vBlockEncrypt128(
579 vector
unsigned char block
[2];
580 register vector
unsigned char v1
, v2
;
582 if ( (long) input
& 0x0fL
) {
584 vdprintf(("vBlockEncrypt128: unaligned input\n"));
585 /* manually re-align - the compiler is supposed to 16-byte align this for us */
586 if((unsigned)localBuf
& 0xf) {
587 vdprintf(("vBlockEncrypt128: unaligned localBuf!\n"));
589 memmove(localBuf
, input
, 16);
590 v1
= vec_ld(0, (vector
unsigned char *)localBuf
);
593 vdprintf(("vBlockEncrypt128: aligned input\n"));
594 v1
= vec_ld( 0, (vector
unsigned char *) input
);
597 /* parse input stream into rectangular array */
598 /* FIXME - do we need to zero v2 (or something)? */
599 block
[0] = vec_perm(v1
, v2
,
600 (vector
unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1,
601 5, 9, 13, 17, 21, 25, 29 ) );
602 block
[1] = vec_perm( v1
, v2
,
603 (vector
unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3,
604 7, 11, 15, 19, 23, 27, 31 ) );
606 vRijndaelEncrypt(block
, key
->keyLen
, 128, (vector
unsigned char *) key
->keySched
);
608 /* parse rectangular array into output ciphertext bytes */
609 v1
= vec_perm(block
[0], block
[1],
610 (vector
unsigned char) ( 0, 8, 16, 24, 1, 9, 17, 25, 2,
611 10, 18, 26, 3, 11, 19, 27 ) );
612 v2
= vec_perm(block
[0], block
[1],
613 (vector
unsigned char) ( 4, 12, 20, 28, 5, 13, 21, 29, 6,
614 14, 22, 30, 7, 15, 23, 31 ) );
616 if ( (long) outBuffer
& 0x0fL
)
618 /* store output data into a non-aligned buffer */
620 vec_st(v1
, 0, (vector
unsigned char *) localBuf
);
621 memmove(outBuffer
, localBuf
, 16);
623 /* store output data into an aligned buffer */
624 vec_st( v1
, 0, (vector
unsigned char *) outBuffer
);
629 void vBlockDecrypt128(
634 vector
unsigned char block
[2];
635 register vector
unsigned char v1
, v2
;
637 if ( (long) input
& 0x0fL
) {
638 /* manually re-align - the compiler is supposed to 16-byte align this for us */
640 vdprintf(("vBlockDecrypt128: unaligned input\n"));
641 if((unsigned)localBuf
& 0xf) {
642 vdprintf(("vBlockDecrypt128: unaligned localBuf!\n"));
644 memmove(localBuf
, input
, 16);
645 v1
= vec_ld(0, (vector
unsigned char *)localBuf
);
648 vdprintf(("vBlockDecrypt128: aligned input\n"));
649 v1
= vec_ld( 0, (vector
unsigned char *) input
);
652 /* parse input stream into rectangular array */
653 /* FIXME - do we need to zero v2 (or something)? */
654 block
[0] = vec_perm(v1
, v2
,
655 (vector
unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1,
656 5, 9, 13, 17, 21, 25, 29 ) );
657 block
[1] = vec_perm( v1
, v2
,
658 (vector
unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3,
659 7, 11, 15, 19, 23, 27, 31 ) );
661 vRijndaelDecrypt(block
, key
->keyLen
, 128, (vector
unsigned char *) key
->keySched
);
663 /* parse rectangular array into output ciphertext bytes */
664 v1
= vec_perm(block
[0], block
[1],
665 (vector
unsigned char) ( 0, 8, 16, 24, 1, 9, 17, 25, 2,
666 10, 18, 26, 3, 11, 19, 27 ) );
667 v2
= vec_perm(block
[0], block
[1],
668 (vector
unsigned char) ( 4, 12, 20, 28, 5, 13, 21, 29, 6,
669 14, 22, 30, 7, 15, 23, 31 ) );
671 if ( (long) outBuffer
& 0x0fL
) {
672 /* store output data into a non-aligned buffer */
674 vec_st(v1
, 0, (vector
unsigned char *) localBuf
);
675 memmove(outBuffer
, localBuf
, 16);
677 /* store output data into an aligned buffer */
678 vec_st( v1
, 0, (vector
unsigned char *) outBuffer
);
683 #endif /* defined(__ppc__) && defined(ALTIVEC_ENABLE) */