]> git.saurik.com Git - apple/xnu.git/blame - bsd/crypto/aes/i386/aes_modes_asm.s
xnu-1699.22.73.tar.gz
[apple/xnu.git] / bsd / crypto / aes / i386 / aes_modes_asm.s
CommitLineData
6d2010ae
A
1/*\r
2 ---------------------------------------------------------------------------\r
3 Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved.\r
4\r
5 LICENSE TERMS\r
6\r
7 The free distribution and use of this software in both source and binary\r
8 form is allowed (with or without changes) provided that:\r
9\r
10 1. distributions of this source code include the above copyright\r
11 notice, this list of conditions and the following disclaimer;\r
12\r
13 2. distributions in binary form include the above copyright\r
14 notice, this list of conditions and the following disclaimer\r
15 in the documentation and/or other associated materials;\r
16\r
17 3. the copyright holder's name is not used to endorse products\r
18 built using this software without specific written permission.\r
19\r
20 ALTERNATIVELY, provided that this notice is retained in full, this product\r
21 may be distributed under the terms of the GNU General Public License (GPL),\r
22 in which case the provisions of the GPL apply INSTEAD OF those given above.\r
23\r
24 DISCLAIMER\r
25\r
26 This software is provided 'as is' with no explicit or implied warranties\r
27 in respect of its properties, including, but not limited to, correctness\r
28 and/or fitness for purpose.\r
29 ---------------------------------------------------------------------------\r
30 Issue 31/01/2006\r
31\r
32 These subroutines implement multiple block AES modes for ECB, CBC, CFB,\r
33 OFB and CTR encryption, The code provides support for the VIA Advanced \r
34 Cryptography Engine (ACE).\r
35\r
36 NOTE: In the following subroutines, the AES contexts (ctx) must be\r
37 16 byte aligned if VIA ACE is being used\r
38*/\r
39\r
40/* modified 3/5/10 cclee */\r
41/* Clean up those related to VIA ACE and hand optimize aes_cbc_encrypt and aes_cbc_decrypt */\r
42/* move the xmm registers save/restore originally inside the callee functions into these 2 caller functions */\r
43\r
44/* add code comments/description and HW AES detection and execution branch cclee 3-13-10 */\r
45\r
46#ifdef KERNEL\r
47#include <i386/cpu_capabilities.h> // to use __cpu_capabilities&kHasAES to detect Intel Westmere AES HW\r
48#else\r
49#include <System/i386/cpu_capabilities.h> // to use __cpu_capabilities&kHasAES to detect Intel Westmere AES HW\r
50#endif\r
51\r
52#if 0\r
53\r
54// TODO:\r
55// aes_ecb_encrypt and aes_ecb_decrypt are not present in gen/aescrypt.c\r
56// would add the implementation if needed\r
57// they are now compiled from aes_modes.c\r
58\r
59aes_rval aes_ecb_encrypt(const unsigned char *ibuf, unsigned char *obuf,\r
60 int len, const aes_encrypt_ctx ctx[1])\r
61{ int nb = len >> 4;\r
62\r
63 if(len & (AES_BLOCK_SIZE - 1)) return 1;\r
64 while(nb--) {\r
65 aes_encrypt(ibuf, obuf, ctx);\r
66 ibuf += AES_BLOCK_SIZE;\r
67 obuf += AES_BLOCK_SIZE;\r
68 }\r
69 return 0;\r
70}\r
71\r
72aes_rval aes_ecb_decrypt(const unsigned char *ibuf, unsigned char *obuf,\r
73 int len, const aes_decrypt_ctx ctx[1])\r
74{ int nb = len >> 4;\r
75\r
76 if(len & (AES_BLOCK_SIZE - 1)) return 1;\r
77 while(nb--) {\r
78 aes_decrypt(ibuf, obuf, ctx);\r
79 ibuf += AES_BLOCK_SIZE;\r
80 obuf += AES_BLOCK_SIZE;\r
81 }\r
82 return 0;\r
83}\r
84#endif\r
85\r
86#if 0\r
87aes_rval aes_encrypt_cbc(const unsigned char *ibuf, const unsigned char *in_iv, unsigned int num_blk,\r
88 unsigned char *obuf, const aes_encrypt_ctx ctx[1])\r
89{\r
90 unsigned char iv[16];\r
91 int i;\r
92 \r
93 for (i = 0; i < 16; i++) iv[i] = *(in_iv + i);\r
94\r
95 while (num_blk--) {\r
96 iv ^= ibuf; // 128-bit \r
97 aes_encrypt(iv, iv, ctx);\r
98 memcpy(obuf, iv, AES_BLOCK_SIZE);\r
99 ibuf += AES_BLOCK_SIZE;\r
100 obuf += AES_BLOCK_SIZE;\r
101 \r
102 } \r
103\r
104 return 0;\r
105}\r
106#endif\r
107\r
108 .text\r
109 .align 4,0x90\r
110 .globl _aes_encrypt_cbc\r
111_aes_encrypt_cbc:\r
112\r
113 // detect AES HW\r
114 // if AES HW detected, branch to AES-HW-specific function _aes_encrypt_cbc_hw (aes_modes_hw.s)\r
115 // o.w., fall through to the original AES-SW function\r
116\r
117#if defined __x86_64__\r
118 movq __cpu_capabilities@GOTPCREL(%rip), %rax // %rax -> __cpu_capability\r
119 mov (%rax), %eax // %eax = __cpu_capabilities\r
120#else\r
121#ifdef KERNEL\r
122 leal __cpu_capabilities, %eax // %eax -> __cpu_capabilities\r
123 mov (%eax), %eax // %eax = __cpu_capabilities\r
124#else\r
125 mov _COMM_PAGE_CPU_CAPABILITIES, %eax\r
126#endif\r
127#endif\r
128 test $(kHasAES), %eax // kHasAES & __cpu_capabilities\r
129 jne _aes_encrypt_cbc_hw // if AES HW detected, branch to HW-specific code\r
130\r
131 // save registers and allocate stack memory for xmm registers and calling arguments (i386 only)\r
132#if defined __i386__\r
133 push %ebp\r
134 mov %esp, %ebp\r
135 push %ebx // to be used as ibuf\r
136 push %edi // to be used as obuf\r
137 sub $(16+16+7*16), %esp // 12 (calling arguments) + 4 (%esi) + 16 (iv) + 7*16 (xmm)\r
138 mov %esi, 12(%esp) // save %esp in the unused 4-bytes, to be used as num_blk\r
139\r
140 #define sp %esp\r
141#else // __x86_64__\r
142 push %rbp\r
143 mov %rsp, %rbp\r
144 push %rbx\r
145 push %r12\r
146 push %r13\r
147 push %r14\r
148 push %r15\r
149 sub $(8+16+5*16+16), %rsp // 8 (align) + 16 (dummy iv) + 5*16 (xmm) + 16 (for i386-x86_64 consistency) \r
150\r
151 #define sp %rsp\r
152#endif\r
153\r
154 // save xmm registers for kernel use\r
155 // xmm6-xmm7 will be used locally\r
156 // xmm0-xmm2 (x86_64) or xmm0-/xmm4 (i386) will be used inside _aes_encrypt_xmm_no_save (non-restored)\r
157 // there is a hole not used for xmm, which is 48(sp). \r
158 // it has been used to store iv (16-bytes) in i386 code\r
159 // for consistency between i386 and x86_64, this hole is dummied in x86_64 code\r
160 // also the 1st 16 bytes (sp) is dummied in x86_64 code\r
161\r
162#ifdef KERNEL\r
163 movaps %xmm7, 16(sp)\r
164 movaps %xmm6, 32(sp)\r
165 movaps %xmm0, 64(sp)\r
166 movaps %xmm1, 80(sp)\r
167 movaps %xmm2, 96(sp)\r
168#if defined __i386__\r
169 movaps %xmm3, 112(sp)\r
170 movaps %xmm4, 128(sp)\r
171#endif\r
172#endif\r
173\r
174 // set up registers from calling arguments\r
175\r
176#if defined __i386__\r
177\r
178 mov 12(%ebp), %eax // in_iv\r
179 mov 24(%ebp), %edx // ctx\r
180 movups (%eax), %xmm7 // in_iv \r
181 lea 48(%esp), %eax // &iv[0]\r
182 mov %eax, (%esp) // 1st iv for aes_encrypt\r
183 mov %eax, 4(%esp) // 2nd iv for aes_encrypt\r
184 mov %edx, 8(%esp) // ctx for aes_encrypt\r
185 mov 8(%ebp), %ebx // ibuf\r
186 mov 16(%ebp), %esi // num_blk\r
187 mov 20(%ebp), %edi // obuf\r
188\r
189 #define ibuf %ebx\r
190 #define obuf %edi\r
191 #define num_blk %esi \r
192\r
193#else // __x86_64__, calling arguments order : rdi/rsi/rdx/rcx/r8\r
194\r
195 mov %rdi, %rbx // ibuf\r
196 lea 48(sp), %r12 // &iv\r
197 movups (%rsi), %xmm7 // in_iv\r
198 mov %rdx, %r13 // num_blk\r
199 mov %rcx, %r14 // obuf\r
200 mov %r8, %r15 // ctx \r
201\r
202 #define ibuf %rbx\r
203 #define iv %r12\r
204 #define num_blk %r13d\r
205 #define obuf %r14 \r
206 #define ctx %r15\r
207\r
208#endif\r
209\r
210 cmp $1, num_blk // num_blk vs 1\r
211 jl 9f // if num_blk < 1, branch to bypass the main loop\r
2120:\r
213 movups (ibuf), %xmm6 // ibuf\r
214#if defined __i386__\r
215 lea 48(sp), %eax // &iv[0]\r
216 pxor %xmm6, %xmm7 // iv ^= ibuf\r
217 movups %xmm7, (%eax) // save iv\r
218#else\r
219 pxor %xmm6, %xmm7 // iv ^= ibuf\r
220 movups %xmm7, (iv) // save iv\r
221 mov iv, %rdi // 1st calling argument for aes_encrypt\r
222 mov iv, %rsi // 2nd calling argument for aes_encrypt\r
223 mov ctx, %rdx // 3rd calling argument for aes_encrypt\r
224#endif\r
225 call _aes_encrypt_xmm_no_save // aes_encrypt(iv, iv, ctx)\r
226#if defined __i386__\r
227 leal 48(%esp), %eax // &iv[0]\r
228 movups (%eax), %xmm7 // read iv\r
229#else\r
230 movups (iv), %xmm7 // read iv\r
231#endif\r
232 movups %xmm7, (obuf) // memcpy(obuf, iv, AES_BLOCK_SIZE);\r
233 add $16, ibuf // ibuf += AES_BLOCK_SIZE; \r
234 add $16, obuf // obuf += AES_BLOCK_SIZE; \r
235 sub $1, num_blk // num_blk --\r
236 jg 0b // if num_blk > 0, repeat the loop\r
2379: \r
238\r
239L_crypt_cbc_done:\r
240\r
241 // restore xmm registers due to kernel use\r
242#ifdef KERNEL\r
243 movaps 16(sp), %xmm7\r
244 movaps 32(sp), %xmm6\r
245 movaps 64(sp), %xmm0\r
246 movaps 80(sp), %xmm1\r
247 movaps 96(sp), %xmm2\r
248#if defined __i386__\r
249 movaps 112(sp), %xmm3\r
250 movaps 128(sp), %xmm4\r
251#endif\r
252#endif\r
253\r
254 xor %eax, %eax // to return 0 for SUCCESS\r
255\r
256#if defined __i386__\r
257 mov 12(%esp), %esi // restore %esi\r
258 add $(16+16+7*16), %esp // 12 (calling arguments) + 4 (%esi) + 16 (iv) + 7*16 (xmm)\r
259 pop %edi\r
260 pop %ebx\r
261#else\r
262 add $(8+16+5*16+16), %rsp // 8 (align) + 16 (dummy iv) + 5*16 (xmm) + 16 (for i386-x86_64 consistency) \r
263 pop %r15\r
264 pop %r14\r
265 pop %r13\r
266 pop %r12\r
267 pop %rbx\r
268#endif\r
269 leave\r
270 ret\r
271\r
272#if 0\r
273aes_rval aes_decrypt_cbc(const unsigned char *ibuf, const unsigned char *in_iv, unsigned int num_blk,\r
274 unsigned char *obuf, const aes_decrypt_ctx cx[1])\r
275{\r
276 unsigned char iv[16], tmp[16];\r
277 int i;\r
278 \r
279 for (i = 0; i < 16; i++) iv[i] = *(in_iv + i);\r
280\r
281 while (num_blk--) {\r
282\r
283 memcpy(tmp, ibuf, AES_BLOCK_SIZE);\r
284 aes_decrypt(ibuf, obuf, ctx);\r
285 obuf ^= iv;\r
286 memcpy(iv, tmp, AES_BLOCK_SIZE);\r
287 ibuf += AES_BLOCK_SIZE;\r
288 obuf += AES_BLOCK_SIZE;\r
289 }\r
290\r
291 return 0;\r
292}\r
293#endif\r
294\r
295 .text\r
296 .align 4,0x90\r
297 .globl _aes_decrypt_cbc\r
298_aes_decrypt_cbc:\r
299\r
300 // detect AES HW\r
301 // if AES HW detected, branch to AES-HW-specific function _aes_decrypt_cbc_hw (aes_modes_hw.s)\r
302 // o.w., fall through to the original AES-SW function\r
303\r
304#if defined __x86_64__\r
305 movq __cpu_capabilities@GOTPCREL(%rip), %rax // %rax -> __cpu_capability\r
306 mov (%rax), %eax // %eax = __cpu_capabilities\r
307#else\r
308#ifdef KERNEL\r
309 leal __cpu_capabilities, %eax // %eax -> __cpu_capabilities\r
310 mov (%eax), %eax // %eax = __cpu_capabilities\r
311#else\r
312 mov _COMM_PAGE_CPU_CAPABILITIES, %eax\r
313#endif\r
314#endif\r
315 test $(kHasAES), %eax // kHasAES & __cpu_capabilities\r
316 jne _aes_decrypt_cbc_hw\r
317\r
318 // save registers and allocate stack memory for xmm registers and calling arguments (i386 only)\r
319#if defined __i386__\r
320 push %ebp\r
321 mov %esp, %ebp\r
322 push %ebx // to be used as ibuf\r
323 push %edi // to be used as obuf\r
324 sub $(16+16+7*16), %esp // 12 (calling arguments) + 4 (%esi) + 16 (iv) + 7*16 (xmm)\r
325 mov %esi, 12(%esp) // save %esp in the unused 4-bytes, to be used as num_blk\r
326\r
327 #define sp %esp\r
328#else // __x86_64__\r
329 push %rbp\r
330 mov %rsp, %rbp\r
331 push %rbx\r
332 push %r12\r
333 push %r13\r
334 push %r14\r
335 push %r15\r
336 sub $(8+16+5*16+16), %rsp // 8 (align) + 16 (dummy iv) + 5*16 (xmm) + 16 (for i386-x86_64 consistency) \r
337\r
338 #define sp %rsp\r
339#endif\r
340\r
341 // save xmm registers for kernel use\r
342 // xmm6-xmm7 will be used locally\r
343 // xmm0-xmm2 (x86_64) or xmm0-/xmm4 (i386) will be used inside _aes_encrypt_xmm_no_save (non-restored)\r
344 // there is a hole not used for xmm, which is 48(sp). \r
345 // it has been used to store iv (16-bytes) in i386 code\r
346 // for consistency between i386 and x86_64, this hole is dummied in x86_64 code\r
347 // also the 1st 16 bytes (sp) is dummied in x86_64 code\r
348\r
349#ifdef KERNEL\r
350 movaps %xmm7, 16(sp)\r
351 movaps %xmm6, 32(sp)\r
352 movaps %xmm0, 64(sp)\r
353 movaps %xmm1, 80(sp)\r
354 movaps %xmm2, 96(sp)\r
355#if defined __i386__\r
356 movaps %xmm3, 112(sp)\r
357 movaps %xmm4, 128(sp)\r
358#endif\r
359#endif\r
360\r
361 // set up registers from calling arguments\r
362\r
363#if defined __i386__\r
364 mov 12(%ebp), %eax // in_iv\r
365 mov 24(%ebp), %edx // ctx\r
366 movups (%eax), %xmm7 // in_iv \r
367 mov %edx, 8(%esp) // ctx for aes_encrypt\r
368 mov 8(%ebp), %ebx // ibuf\r
369 mov 16(%ebp), %esi // num_blk\r
370 mov 20(%ebp), %edi // obuf\r
371\r
372 #define ibuf %ebx\r
373 #define obuf %edi\r
374 #define num_blk %esi \r
375#else // __x86_64__, rdi/rsi/rdx/rcx/r8\r
376 mov %rdi, %rbx // ibuf\r
377 movups (%rsi), %xmm7 // in_iv\r
378 mov %rdx, %r13 // num_blk\r
379 mov %rcx, %r14 // obuf \r
380 mov %r8, %r15 // ctx \r
381\r
382 #define ibuf %rbx\r
383 #define num_blk %r13d\r
384 #define obuf %r14 \r
385 #define ctx %r15\r
386\r
387#endif\r
388 // memcpy(tmp, ibuf, AES_BLOCK_SIZE);\r
389 // aes_decrypt(ibuf, obuf, ctx);\r
390 // obuf ^= iv;\r
391 // memcpy(iv, tmp, AES_BLOCK_SIZE);\r
392 // ibuf += AES_BLOCK_SIZE;\r
393 // obuf += AES_BLOCK_SIZE;\r
394\r
395 cmp $1, num_blk // num_blk vs 1\r
396 jl L_crypt_cbc_done // if num_blk < 1, bypass the main loop, jump to finishing code\r
3970:\r
398 movups (ibuf), %xmm6 // tmp\r
399#if defined __i386__\r
400 mov ibuf, (sp) // ibuf\r
401 mov obuf, 4(sp) // obuf\r
402#else\r
403 mov ibuf, %rdi // ibuf \r
404 mov obuf, %rsi // obuf\r
405 mov ctx, %rdx // ctx\r
406#endif\r
407 call _aes_decrypt_xmm_no_save // aes_decrypt(ibuf, obuf, ctx)\r
408 movups (obuf), %xmm0 // obuf\r
409 pxor %xmm7, %xmm0 // obuf ^= iv;\r
410 movaps %xmm6, %xmm7 // memcpy(iv, tmp, AES_BLOCK_SIZE);\r
411 movups %xmm0, (obuf) // update obuf\r
412 add $16, ibuf // ibuf += AES_BLOCK_SIZE; \r
413 add $16, obuf // obuf += AES_BLOCK_SIZE; \r
414 sub $1, num_blk // num_blk --\r
415 jg 0b // if num_blk > 0, repeat the loop\r
4169: \r
417\r
418 // we are done here, the finishing code is identical to that in aes_encrypt_cbc, so just jump to there\r
419 jmp L_crypt_cbc_done\r
420\r