2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
30 * ---------------------------------------------------------------------------
31 * Copyright (c) 2002, Dr Brian Gladman, Worcester, UK. All rights reserved.
35 * The free distribution and use of this software in both source and binary
36 * form is allowed (with or without changes) provided that:
38 * 1. distributions of this source code include the above copyright
39 * notice, this list of conditions and the following disclaimer;
41 * 2. distributions in binary form include the above copyright
42 * notice, this list of conditions and the following disclaimer
43 * in the documentation and/or other associated materials;
45 * 3. the copyright holder's name is not used to endorse products
46 * built using this software without specific written permission.
48 * ALTERNATIVELY, provided that this notice is retained in full, this product
49 * may be distributed under the terms of the GNU General Public License (GPL),
50 * in which case the provisions of the GPL apply INSTEAD OF those given above.
54 * This software is provided 'as is' with no explicit or implied warranties
55 * in respect of its properties, including, but not limited to, correctness
56 * and/or fitness for purpose.
57 * ---------------------------------------------------------------------------
60 * This code requires either ASM_X86_V2 or ASM_X86_V2C to be set in aesopt.h
61 * and the same define to be set here as well. If AES_V2C is set this file
62 * requires the C files aeskey.c and aestab.c for support.
64 * This is a full assembler implementation covering encryption, decryption and
65 * key scheduling. It uses 2k bytes of tables but its encryption and decryption
66 * performance is very close to that obtained using large tables. Key schedule
67 * expansion is slower for both encryption and decryption but this is likely to
68 * be offset by the much smaller load that this version places on the processor
69 * cache. I acknowledge the contribution made by Daniel Bernstein to aspects of
70 * the design of the AES round function used here.
72 * This code provides the standard AES block size (128 bits, 16 bytes) and the
73 * three standard AES key sizes (128, 192 and 256 bits). It has the same call
74 * interface as my C implementation. The ebx, esi, edi and ebp registers are
75 * preserved across calls but eax, ecx and edx and the artihmetic status flags
79 #include <mach/i386/asm.h>
81 #define AES_128 /* define if AES with 128 bit keys is needed */
82 #define AES_192 /* define if AES with 192 bit keys is needed */
83 #define AES_256 /* define if AES with 256 bit keys is needed */
84 #define AES_VAR /* define if a variable key size is needed */
85 #define ENCRYPTION /* define if encryption is needed */
86 #define DECRYPTION /* define if decryption is needed */
87 #define AES_REV_DKS /* define if key decryption schedule is reversed */
90 #define ENCRYPTION_KEY_SCHEDULE /* define if enc. key expansion is needed */
91 #define DECRYPTION_KEY_SCHEDULE /* define if dec. key expansion is needed */
95 * The encryption key schedule has the following in memory layout where N is the
96 * number of rounds (10, 12 or 14):
98 * lo: | input key (round 0) | ; each round is four 32-bit words
99 * | encryption round 1 |
100 * | encryption round 2 |
102 * | encryption round N-1 |
103 * hi: | encryption round N |
105 * The decryption key schedule is normally set up so that it has the same
106 * layout as above by actually reversing the order of the encryption key
107 * schedule in memory (this happens when AES_REV_DKS is set):
109 * lo: | decryption round 0 | = | encryption round N |
110 * | decryption round 1 | = INV_MIX_COL[ | encryption round N-1 | ]
111 * | decryption round 2 | = INV_MIX_COL[ | encryption round N-2 | ]
113 * | decryption round N-1 | = INV_MIX_COL[ | encryption round 1 | ]
114 * hi: | decryption round N | = | input key (round 0) |
116 * with rounds except the first and last modified using inv_mix_column()
117 * But if AES_REV_DKS is NOT set the order of keys is left as it is for
118 * encryption so that it has to be accessed in reverse when used for
119 * decryption (although the inverse mix column modifications are done)
121 * lo: | decryption round 0 | = | input key (round 0) |
122 * | decryption round 1 | = INV_MIX_COL[ | encryption round 1 | ]
123 * | decryption round 2 | = INV_MIX_COL[ | encryption round 2 | ]
125 * | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ]
126 * hi: | decryption round N | = | encryption round N |
128 * This layout is faster when the assembler key scheduling provided here
132 /* End of user defines */
161 * These macros implement stack based local variables
166 #define restore(r1) \
169 #define do_call(f, n) \
174 * finite field multiplies by {02}, {04} and {08}
176 #define f2(x) ((x<<1)^(((x>>7)&1)*0x11b))
177 #define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
178 #define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))
181 * finite field multiplies required in table generation
183 #define f3(x) (f2(x) ^ x)
184 #define f9(x) (f8(x) ^ x)
185 #define fb(x) (f8(x) ^ f2(x) ^ x)
186 #define fd(x) (f8(x) ^ f4(x) ^ x)
187 #define fe(x) (f8(x) ^ f4(x) ^ f2(x))
189 #define etab_0(x) enc_tab+4(,x,8)
190 #define etab_1(x) enc_tab+3(,x,8)
191 #define etab_2(x) enc_tab+2(,x,8)
192 #define etab_3(x) enc_tab+1(,x,8)
194 #define etab_b(x) etab_3(x)
196 #define btab_0(x) enc_tab+6(,x,8)
197 #define btab_1(x) enc_tab+5(,x,8)
198 #define btab_2(x) enc_tab+4(,x,8)
199 #define btab_3(x) enc_tab+3(,x,8)
202 * ROUND FUNCTION. Build column[2] on ESI and column[3] on EDI that have the
203 * round keys pre-loaded. Build column[0] in EBP and column[1] in EBX.
211 * ESI column key[round][2]
212 * EDI column key[round][3]
217 * EBP column[0] unkeyed
218 * EBX column[1] unkeyed
219 * ESI column[2] keyed
220 * EDI column[3] keyed
225 #define rnd_fun(m1, m2) \
228 ## m1 ## _zo(esi, cl, 0, ebp); \
229 m1(esi, dh, 1, ebp); \
230 m1(esi, bh, 3, ebp); \
231 ## m1 ## _zo(edi, dl, 0, ebp); \
232 m1(edi, ah, 1, ebp); \
233 m1(edi, bl, 2, ebp); \
234 ## m2 ## _zo(ebp, al, 0, ebp); \
237 andl $0xffff0000, %eax; \
241 m1(ebp, ah, 1, ebx); \
242 m1(ebp, dh, 3, ebx); \
243 m2(ebx, dl, 2, ebx); \
244 m1(ebx, ch, 1, edx); \
245 ## m1 ## _zo(ebx, al, 0, edx); \
250 m1(ebp, cl, 2, edx); \
251 m1(edi, ch, 3, edx); \
252 m1(esi, al, 2, edx); \
256 * Basic MOV and XOR Operations for normal rounds
258 #define nr_xor_zo nr_xor
259 #define nr_xor(r1, r2, r3, r4) \
261 xorl etab_ ## r3(%r4), %r1;
263 #define nr_mov_zo nr_mov
264 #define nr_mov(r1, r2, r3, r4) \
266 movl etab_ ## r3(%r4), %r1;
269 * Basic MOV and XOR Operations for last round
274 #define lr_xor_zo(r1, r2, r3, r4) \
276 movzbl etab_b(%r4), %r4; \
279 #define lr_xor(r1, r2, r3, r4) \
281 movzbl etab_b(%r4), %r4; \
285 #define lr_mov_zo(r1, r2, r3, r4) \
287 movzbl etab_b(%r4), %r1;
289 #define lr_mov(r1, r2, r3, r4) \
291 movzbl etab_b(%r4), %r1; \
294 #else /* less effective but worth leaving as an option */
296 #define lr_xor_zo lr_xor
297 #define lr_xor(r1, r2, r3, r4) \
299 mov btab_ ## r3(%r4), %r4; \
300 andl $(0x000000ff << 8 * r3), %r4; \
303 #define lr_mov_zo lr_mov
304 #define lr_mov(r1, r2, r3, r4) \
306 mov btab_ ## r3(%r4), %r1; \
307 andl $(0x000000ff << 8 * r3), %r1;
312 * Apply S-Box to the 4 bytes in a 32-bit word and rotate left 3 byte positions
314 * r1 : output is xored into this register
315 * r2 : input: a => eax, b => ebx, c => ecx, d => edx
316 * r3 : scratch register
319 #define l3s_col(r1, r2, r3) \
320 lr_xor_zo(r1, ## r2 ## h, 0, r3); \
321 lr_xor(r1, ## r2 ## l, 3, r3); \
322 shrl $16, %e ## r2 ## x; \
323 lr_xor(r1, ## r2 ## h, 2, r3); \
324 lr_xor(r1, ## r2 ## l, 1, r3);
327 * offsets to parameters
329 #define in_blk 4 /* input byte array address parameter */
330 #define out_blk 8 /* output byte array address parameter */
331 #define ctx 12 /* AES context structure */
332 #define stk_spc 20 /* stack space */
336 #define ENCRYPTION_TABLE
341 movl 8(%ebp), %esi; \
342 movl 12(%ebp), %edi; \
344 rnd_fun(nr_xor, nr_mov); \
353 #define enc_last_round \
356 movl 8(%ebp), %esi; \
357 movl 12(%ebp), %edi; \
359 rnd_fun(lr_xor, lr_mov); \
366 .section __TEXT, __text
369 * AES Encryption Subroutine
379 movl in_blk+stk_spc(%esp), %esi /* input pointer */
385 movl ctx+stk_spc(%esp), %ebp /* key pointer */
386 movzbl 4*KS_LENGTH(%ebp), %edi
393 * determine the number of rounds
422 movl out_blk+stk_spc(%esp), %edx
440 * For r2 == 16, or r2 == 24 && r1 == 7, or r2 ==32 && r1 == 6
442 #define f_key(r1, r2, rc_val) \
443 l3s_col(esi, a, ebx); \
444 xorl $rc_val, %esi; \
446 movl %esi, r1*r2(%ebp); \
448 movl %edi, r1*r2+4(%ebp); \
450 movl %ecx, r1*r2+8(%ebp); \
452 movl %edx, r1*r2+12(%ebp); \
456 * For r2 == 24 && r1 == 0 to 6
458 #define f_key_24(r1, r2, rc_val) \
459 f_key(r1, r2, rc_val); \
461 xorl r1*r2+16-r2(%ebp), %eax; \
462 movl %eax, r1*r2+16(%ebp); \
463 xorl r1*r2+20-r2(%ebp), %eax; \
464 movl %eax, r1*r2+20(%ebp);
467 * For r2 ==32 && r1 == 0 to 5
469 #define f_key_32(r1, r2, rc_val) \
470 f_key(r1, r2, rc_val); \
474 movl r1*r2+16-r2(%ebp), %edx; \
475 l3s_col(edx, a, ebx); \
478 movl %eax, r1*r2+16(%ebp); \
479 xorl r1*r2+20-r2(%ebp), %eax; \
480 movl %eax, r1*r2+20(%ebp); \
481 xorl r1*r2+24-r2(%ebp), %eax; \
482 movl %eax, r1*r2+24(%ebp); \
483 xorl r1*r2+28-r2(%ebp), %eax; \
484 movl %eax, r1*r2+28(%ebp);
486 #ifdef ENCRYPTION_KEY_SCHEDULE
490 #ifndef ENCRYPTION_TABLE
491 #define ENCRYPTION_TABLE
494 Entry(aes_encrypt_key128)
502 movl $10*16, 4*KS_LENGTH(%ebp)
538 #ifndef ENCRYPTION_TABLE
539 #define ENCRYPTION_TABLE
542 Entry(aes_encrypt_key192)
550 movl $12*16, 4*KS_LENGTH(%ebp)
587 #ifndef ENCRYPTION_TABLE
588 #define ENCRYPTION_TABLE
591 Entry(aes_encrypt_key256)
599 movl $14*16, 4*KS_LENGTH(%ebp)
639 #ifndef ENCRYPTION_TABLE
640 #define ENCRYPTION_TABLE
643 Entry(aes_encrypt_key)
670 do_call(aes_encrypt_key128, 8)
673 do_call(aes_encrypt_key192, 8)
676 do_call(aes_encrypt_key256, 8)
683 #ifdef ENCRYPTION_TABLE
685 # S-box data - 256 entries
687 .section __DATA, __data
690 #define u8(x) 0, x, x, f3(x), f2(x), x, x, f3(x)
693 .byte u8(0x63),u8(0x7c),u8(0x77),u8(0x7b),u8(0xf2),u8(0x6b),u8(0x6f),u8(0xc5)
694 .byte u8(0x30),u8(0x01),u8(0x67),u8(0x2b),u8(0xfe),u8(0xd7),u8(0xab),u8(0x76)
695 .byte u8(0xca),u8(0x82),u8(0xc9),u8(0x7d),u8(0xfa),u8(0x59),u8(0x47),u8(0xf0)
696 .byte u8(0xad),u8(0xd4),u8(0xa2),u8(0xaf),u8(0x9c),u8(0xa4),u8(0x72),u8(0xc0)
697 .byte u8(0xb7),u8(0xfd),u8(0x93),u8(0x26),u8(0x36),u8(0x3f),u8(0xf7),u8(0xcc)
698 .byte u8(0x34),u8(0xa5),u8(0xe5),u8(0xf1),u8(0x71),u8(0xd8),u8(0x31),u8(0x15)
699 .byte u8(0x04),u8(0xc7),u8(0x23),u8(0xc3),u8(0x18),u8(0x96),u8(0x05),u8(0x9a)
700 .byte u8(0x07),u8(0x12),u8(0x80),u8(0xe2),u8(0xeb),u8(0x27),u8(0xb2),u8(0x75)
701 .byte u8(0x09),u8(0x83),u8(0x2c),u8(0x1a),u8(0x1b),u8(0x6e),u8(0x5a),u8(0xa0)
702 .byte u8(0x52),u8(0x3b),u8(0xd6),u8(0xb3),u8(0x29),u8(0xe3),u8(0x2f),u8(0x84)
703 .byte u8(0x53),u8(0xd1),u8(0x00),u8(0xed),u8(0x20),u8(0xfc),u8(0xb1),u8(0x5b)
704 .byte u8(0x6a),u8(0xcb),u8(0xbe),u8(0x39),u8(0x4a),u8(0x4c),u8(0x58),u8(0xcf)
705 .byte u8(0xd0),u8(0xef),u8(0xaa),u8(0xfb),u8(0x43),u8(0x4d),u8(0x33),u8(0x85)
706 .byte u8(0x45),u8(0xf9),u8(0x02),u8(0x7f),u8(0x50),u8(0x3c),u8(0x9f),u8(0xa8)
707 .byte u8(0x51),u8(0xa3),u8(0x40),u8(0x8f),u8(0x92),u8(0x9d),u8(0x38),u8(0xf5)
708 .byte u8(0xbc),u8(0xb6),u8(0xda),u8(0x21),u8(0x10),u8(0xff),u8(0xf3),u8(0xd2)
709 .byte u8(0xcd),u8(0x0c),u8(0x13),u8(0xec),u8(0x5f),u8(0x97),u8(0x44),u8(0x17)
710 .byte u8(0xc4),u8(0xa7),u8(0x7e),u8(0x3d),u8(0x64),u8(0x5d),u8(0x19),u8(0x73)
711 .byte u8(0x60),u8(0x81),u8(0x4f),u8(0xdc),u8(0x22),u8(0x2a),u8(0x90),u8(0x88)
712 .byte u8(0x46),u8(0xee),u8(0xb8),u8(0x14),u8(0xde),u8(0x5e),u8(0x0b),u8(0xdb)
713 .byte u8(0xe0),u8(0x32),u8(0x3a),u8(0x0a),u8(0x49),u8(0x06),u8(0x24),u8(0x5c)
714 .byte u8(0xc2),u8(0xd3),u8(0xac),u8(0x62),u8(0x91),u8(0x95),u8(0xe4),u8(0x79)
715 .byte u8(0xe7),u8(0xc8),u8(0x37),u8(0x6d),u8(0x8d),u8(0xd5),u8(0x4e),u8(0xa9)
716 .byte u8(0x6c),u8(0x56),u8(0xf4),u8(0xea),u8(0x65),u8(0x7a),u8(0xae),u8(0x08)
717 .byte u8(0xba),u8(0x78),u8(0x25),u8(0x2e),u8(0x1c),u8(0xa6),u8(0xb4),u8(0xc6)
718 .byte u8(0xe8),u8(0xdd),u8(0x74),u8(0x1f),u8(0x4b),u8(0xbd),u8(0x8b),u8(0x8a)
719 .byte u8(0x70),u8(0x3e),u8(0xb5),u8(0x66),u8(0x48),u8(0x03),u8(0xf6),u8(0x0e)
720 .byte u8(0x61),u8(0x35),u8(0x57),u8(0xb9),u8(0x86),u8(0xc1),u8(0x1d),u8(0x9e)
721 .byte u8(0xe1),u8(0xf8),u8(0x98),u8(0x11),u8(0x69),u8(0xd9),u8(0x8e),u8(0x94)
722 .byte u8(0x9b),u8(0x1e),u8(0x87),u8(0xe9),u8(0xce),u8(0x55),u8(0x28),u8(0xdf)
723 .byte u8(0x8c),u8(0xa1),u8(0x89),u8(0x0d),u8(0xbf),u8(0xe6),u8(0x42),u8(0x68)
724 .byte u8(0x41),u8(0x99),u8(0x2d),u8(0x0f),u8(0xb0),u8(0x54),u8(0xbb),u8(0x16)
730 #define DECRYPTION_TABLE
732 #define dtab_0(x) dec_tab(,x,8)
733 #define dtab_1(x) dec_tab+3(,x,8)
734 #define dtab_2(x) dec_tab+2(,x,8)
735 #define dtab_3(x) dec_tab+1(,x,8)
736 #define dtab_x(x) dec_tab+7(,x,8)
738 #define irn_fun(m1, m2) \
741 ## m1 ## _zo(esi, cl, 0, ebp); \
742 m1(esi, bh, 1, ebp); \
743 m1(esi, al, 2, ebp); \
744 ## m1 ## _zo(edi, dl, 0, ebp); \
745 m1(edi, ch, 1, ebp); \
746 m1(edi, ah, 3, ebp); \
747 ## m2 ## _zo(ebp, bl, 0, ebp); \
750 andl $0xffff0000, %ebx; \
754 m1(ebp, bh, 1, eax); \
755 m1(ebp, ch, 3, eax); \
756 m2(eax, cl, 2, ecx); \
757 ## m1 ## _zo(eax, bl, 0, ecx); \
758 m1(eax, dh, 1, ecx); \
763 m1(esi, dh, 3, ecx); \
764 m1(ebp, dl, 2, ecx); \
765 m1(eax, bh, 3, ecx); \
769 * Basic MOV and XOR Operations for normal rounds
771 #define ni_xor_zo ni_xor
772 #define ni_xor(r1, r2, r3, r4) \
774 xorl dtab_ ## r3 ## (%r4), %r1;
776 #define ni_mov_zo ni_mov
777 #define ni_mov(r1, r2, r3, r4) \
779 movl dtab_ ## r3 ## (%r4), %r1;
782 * Basic MOV and XOR Operations for last round
785 #define li_xor_zo(r1, r2, r3, r4) \
787 movzbl dtab_x(%r4), %r4; \
790 #define li_xor(r1, r2, r3, r4) \
792 movzbl dtab_x(%r4), %r4; \
796 #define li_mov_zo(r1, r2, r3, r4) \
798 movzbl dtab_x(%r4), %r1;
800 #define li_mov(r1, r2, r3, r4) \
802 movzbl dtab_x(%r4), %r1; \
810 movl 8(%ebp), %esi; \
811 movl 12(%ebp), %edi; \
813 irn_fun(ni_xor, ni_mov); \
822 #define dec_last_round \
825 movl 8(%ebp), %esi; \
826 movl 12(%ebp), %edi; \
828 irn_fun(li_xor, li_mov); \
840 movl 8(%ebp), %esi; \
841 movl 12(%ebp), %edi; \
843 irn_fun(ni_xor, ni_mov); \
852 #define dec_last_round \
855 movl 8(%ebp), %esi; \
856 movl 12(%ebp), %edi; \
858 irn_fun(li_xor, li_mov); \
865 #endif /* AES_REV_DKS */
867 .section __TEXT, __text
870 * AES Decryption Subroutine
881 * input four columns and xor in first round key
883 movl in_blk+stk_spc(%esp), %esi /* input pointer */
890 movl ctx+stk_spc(%esp), %ebp /* key pointer */
891 movzbl 4*KS_LENGTH(%ebp), %edi
892 #ifndef AES_REV_DKS /* if decryption key schedule is not reversed */
893 leal (%ebp,%edi), %ebp /* we have to access it from the top down */
895 xorl (%ebp), %eax /* key schedule */
901 * determine the number of rounds
931 * move final values to the output array.
933 movl out_blk+stk_spc(%esp), %ebp
950 #define inv_mix_col \
952 movzbl etab_b(%ebx), %ebx; \
953 movl dtab_0(%ebx), %eax; \
956 movzbl etab_b(%ebx), %ebx; \
957 xorl dtab_1(%ebx), %eax; \
959 movzbl etab_b(%ebx), %ebx; \
960 xorl dtab_2(%ebx), %eax; \
962 movzbl etab_b(%ebx), %ebx; \
963 xorl dtab_3(%ebx), %eax;
965 #ifdef DECRYPTION_KEY_SCHEDULE
969 #ifndef DECRYPTION_TABLE
970 #define DECRYPTION_TABLE
973 Entry(aes_decrypt_key128)
979 movl 24(%esp), %eax /* context */
980 movl 20(%esp), %edx /* key */
983 do_call(aes_encrypt_key128, 8)
985 movl 24(%esp), %esi /* pointer to first round key */
986 leal (%esi,%eax), %edi /* pointer to last round key */
988 /* the inverse mix column transformation */
989 movl -16(%esi), %edx /* needs to be applied to all round keys */
991 movl %eax, -16(%esi) /* transforming the four sub-keys in the */
992 movl -12(%esi), %edx /* second round key */
994 movl %eax, -12(%esi) /* transformations for subsequent rounds */
995 movl -8(%esi), %edx /* can then be made more efficient by */
997 movl %eax, -8(%esi) /* in the encryption round key ek[r]: */
1000 movl %eax, -4(%esi) /* where n is 1..3. Hence the corresponding */
1002 aes_decrypt_key128.0:
1003 movl (%esi), %edx /* subkeys in the decryption round key dk[r] */
1005 movl %eax, (%esi) /* GF(256): */
1006 xorl -12(%esi), %eax
1007 movl %eax, 4(%esi) /* dk[r][n] = dk[r][n-1] ^ dk[r-1][n] */
1009 movl %eax, 8(%esi) /* So we only need one inverse mix column */
1010 xorl -4(%esi), %eax /* operation (n = 0) for each four word cycle */
1011 movl %eax, 12(%esi) /* in the expanded key. */
1014 jg aes_decrypt_key128.0
1021 #ifndef DECRYPTION_TABLE
1022 #define DECRYPTION_TABLE
1025 Entry(aes_decrypt_key192)
1031 movl 24(%esp), %eax /* context */
1032 movl 20(%esp), %edx /* key */
1035 do_call(aes_encrypt_key192, 8)
1037 movl 24(%esp), %esi /* first round key */
1038 leal (%esi,%eax), %edi /* last round key */
1039 addl $48, %esi /* the first 6 words are the key, of */
1040 /* which the top 2 words are part of */
1041 movl -32(%esi), %edx /* the second round key and hence */
1043 movl %eax, -32(%esi) /* need to do a further six values prior */
1044 movl -28(%esi), %edx /* to using a more efficient technique */
1046 movl %eax, -28(%esi)
1047 /* dk[r][n] = dk[r][n-1] ^ dk[r-1][n] */
1048 movl -24(%esi), %edx
1050 movl %eax, -24(%esi) /* cycle is now 6 words long */
1051 movl -20(%esi), %edx
1053 movl %eax, -20(%esi)
1054 movl -16(%esi), %edx
1056 movl %eax, -16(%esi)
1057 movl -12(%esi), %edx
1059 movl %eax, -12(%esi)
1067 aes_decrypt_key192.0:
1068 movl (%esi), %edx /* expanded key is 13 * 4 = 44 32-bit words */
1070 movl %eax, (%esi) /* using inv_mix_col. We have already done 8 */
1071 xorl -20(%esi), %eax /* of these so 36 are left - hence we need */
1072 movl %eax, 4(%esi) /* exactly 6 loops of six here */
1073 xorl -16(%esi), %eax
1075 xorl -12(%esi), %eax
1083 jg aes_decrypt_key192.0
1090 #ifndef DECRYPTION_TABLE
1091 #define DECRYPTION_TABLE
1094 Entry(aes_decrypt_key256)
1104 do_call(aes_encrypt_key256, 8)
1107 leal (%esi,%eax), %edi
1110 movl -48(%esi), %edx /* the primary key is 8 words, of which */
1112 movl %eax, -48(%esi)
1113 movl -44(%esi), %edx
1115 movl %eax, -44(%esi)
1116 movl -40(%esi), %edx
1118 movl %eax, -40(%esi)
1119 movl -36(%esi), %edx
1121 movl %eax, -36(%esi)
1123 movl -32(%esi), %edx /* the encryption key expansion cycle is */
1125 movl %eax, -32(%esi) /* start by doing one complete block */
1126 movl -28(%esi), %edx
1128 movl %eax, -28(%esi)
1129 movl -24(%esi), %edx
1131 movl %eax, -24(%esi)
1132 movl -20(%esi), %edx
1134 movl %eax, -20(%esi)
1135 movl -16(%esi), %edx
1137 movl %eax, -16(%esi)
1138 movl -12(%esi), %edx
1140 movl %eax, -12(%esi)
1148 aes_decrypt_key256.0:
1149 movl (%esi), %edx /* we can now speed up the remaining */
1151 movl %eax, (%esi) /* outlined earlier. But note that */
1152 xorl -28(%esi), %eax /* there is one extra inverse mix */
1153 movl %eax, 4(%esi) /* column operation as the 256 bit */
1154 xorl -24(%esi), %eax /* key has an extra non-linear step */
1155 movl %eax, 8(%esi) /* for the midway element. */
1156 xorl -20(%esi), %eax
1157 movl %eax, 12(%esi) /* the expanded key is 15 * 4 = 60 */
1158 movl 16(%esi), %edx /* 32-bit words of which 52 need to */
1160 movl %eax, 16(%esi) /* 12 so 40 are left - which means */
1161 xorl -12(%esi), %eax /* that we need exactly 5 loops of 8 */
1169 jg aes_decrypt_key256.0
1177 movl 24(%esp), %esi /* this reverses the order of the */
1179 movl (%esi), %eax /* round keys if required */
1213 Entry(aes_decrypt_key)
1222 je aes_decrypt_key.1
1224 je aes_decrypt_key.1
1227 je aes_decrypt_key.2
1229 je aes_decrypt_key.2
1232 je aes_decrypt_key.3
1234 je aes_decrypt_key.3
1240 do_call(aes_decrypt_key128, 8)
1243 do_call(aes_decrypt_key192, 8)
1246 do_call(aes_decrypt_key256, 8)
1253 #ifdef DECRYPTION_TABLE
1256 * Inverse S-box data - 256 entries
1259 .section __DATA, __data
1262 #define v8(x) fe(x), f9(x), fd(x), fb(x), fe(x), f9(x), fd(x), x
1265 .byte v8(0x52),v8(0x09),v8(0x6a),v8(0xd5),v8(0x30),v8(0x36),v8(0xa5),v8(0x38)
1266 .byte v8(0xbf),v8(0x40),v8(0xa3),v8(0x9e),v8(0x81),v8(0xf3),v8(0xd7),v8(0xfb)
1267 .byte v8(0x7c),v8(0xe3),v8(0x39),v8(0x82),v8(0x9b),v8(0x2f),v8(0xff),v8(0x87)
1268 .byte v8(0x34),v8(0x8e),v8(0x43),v8(0x44),v8(0xc4),v8(0xde),v8(0xe9),v8(0xcb)
1269 .byte v8(0x54),v8(0x7b),v8(0x94),v8(0x32),v8(0xa6),v8(0xc2),v8(0x23),v8(0x3d)
1270 .byte v8(0xee),v8(0x4c),v8(0x95),v8(0x0b),v8(0x42),v8(0xfa),v8(0xc3),v8(0x4e)
1271 .byte v8(0x08),v8(0x2e),v8(0xa1),v8(0x66),v8(0x28),v8(0xd9),v8(0x24),v8(0xb2)
1272 .byte v8(0x76),v8(0x5b),v8(0xa2),v8(0x49),v8(0x6d),v8(0x8b),v8(0xd1),v8(0x25)
1273 .byte v8(0x72),v8(0xf8),v8(0xf6),v8(0x64),v8(0x86),v8(0x68),v8(0x98),v8(0x16)
1274 .byte v8(0xd4),v8(0xa4),v8(0x5c),v8(0xcc),v8(0x5d),v8(0x65),v8(0xb6),v8(0x92)
1275 .byte v8(0x6c),v8(0x70),v8(0x48),v8(0x50),v8(0xfd),v8(0xed),v8(0xb9),v8(0xda)
1276 .byte v8(0x5e),v8(0x15),v8(0x46),v8(0x57),v8(0xa7),v8(0x8d),v8(0x9d),v8(0x84)
1277 .byte v8(0x90),v8(0xd8),v8(0xab),v8(0x00),v8(0x8c),v8(0xbc),v8(0xd3),v8(0x0a)
1278 .byte v8(0xf7),v8(0xe4),v8(0x58),v8(0x05),v8(0xb8),v8(0xb3),v8(0x45),v8(0x06)
1279 .byte v8(0xd0),v8(0x2c),v8(0x1e),v8(0x8f),v8(0xca),v8(0x3f),v8(0x0f),v8(0x02)
1280 .byte v8(0xc1),v8(0xaf),v8(0xbd),v8(0x03),v8(0x01),v8(0x13),v8(0x8a),v8(0x6b)
1281 .byte v8(0x3a),v8(0x91),v8(0x11),v8(0x41),v8(0x4f),v8(0x67),v8(0xdc),v8(0xea)
1282 .byte v8(0x97),v8(0xf2),v8(0xcf),v8(0xce),v8(0xf0),v8(0xb4),v8(0xe6),v8(0x73)
1283 .byte v8(0x96),v8(0xac),v8(0x74),v8(0x22),v8(0xe7),v8(0xad),v8(0x35),v8(0x85)
1284 .byte v8(0xe2),v8(0xf9),v8(0x37),v8(0xe8),v8(0x1c),v8(0x75),v8(0xdf),v8(0x6e)
1285 .byte v8(0x47),v8(0xf1),v8(0x1a),v8(0x71),v8(0x1d),v8(0x29),v8(0xc5),v8(0x89)
1286 .byte v8(0x6f),v8(0xb7),v8(0x62),v8(0x0e),v8(0xaa),v8(0x18),v8(0xbe),v8(0x1b)
1287 .byte v8(0xfc),v8(0x56),v8(0x3e),v8(0x4b),v8(0xc6),v8(0xd2),v8(0x79),v8(0x20)
1288 .byte v8(0x9a),v8(0xdb),v8(0xc0),v8(0xfe),v8(0x78),v8(0xcd),v8(0x5a),v8(0xf4)
1289 .byte v8(0x1f),v8(0xdd),v8(0xa8),v8(0x33),v8(0x88),v8(0x07),v8(0xc7),v8(0x31)
1290 .byte v8(0xb1),v8(0x12),v8(0x10),v8(0x59),v8(0x27),v8(0x80),v8(0xec),v8(0x5f)
1291 .byte v8(0x60),v8(0x51),v8(0x7f),v8(0xa9),v8(0x19),v8(0xb5),v8(0x4a),v8(0x0d)
1292 .byte v8(0x2d),v8(0xe5),v8(0x7a),v8(0x9f),v8(0x93),v8(0xc9),v8(0x9c),v8(0xef)
1293 .byte v8(0xa0),v8(0xe0),v8(0x3b),v8(0x4d),v8(0xae),v8(0x2a),v8(0xf5),v8(0xb0)
1294 .byte v8(0xc8),v8(0xeb),v8(0xbb),v8(0x3c),v8(0x83),v8(0x53),v8(0x99),v8(0x61)
1295 .byte v8(0x17),v8(0x2b),v8(0x04),v8(0x7e),v8(0xba),v8(0x77),v8(0xd6),v8(0x26)
1296 .byte v8(0xe1),v8(0x69),v8(0x14),v8(0x63),v8(0x55),v8(0x21),v8(0x0c),v8(0x7d)