]> git.saurik.com Git - apple/xnu.git/blob - bsd/crypto/aes/i386/aes_modes_asm.s
3b0f29aa117e3e01d15f90b5939963b5c87587b5
[apple/xnu.git] / bsd / crypto / aes / i386 / aes_modes_asm.s
1 /*
2 ---------------------------------------------------------------------------
3 Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved.
4
5 LICENSE TERMS
6
7 The free distribution and use of this software in both source and binary
8 form is allowed (with or without changes) provided that:
9
10 1. distributions of this source code include the above copyright
11 notice, this list of conditions and the following disclaimer;
12
13 2. distributions in binary form include the above copyright
14 notice, this list of conditions and the following disclaimer
15 in the documentation and/or other associated materials;
16
17 3. the copyright holder's name is not used to endorse products
18 built using this software without specific written permission.
19
20 ALTERNATIVELY, provided that this notice is retained in full, this product
21 may be distributed under the terms of the GNU General Public License (GPL),
22 in which case the provisions of the GPL apply INSTEAD OF those given above.
23
24 DISCLAIMER
25
26 This software is provided 'as is' with no explicit or implied warranties
27 in respect of its properties, including, but not limited to, correctness
28 and/or fitness for purpose.
29 ---------------------------------------------------------------------------
30 Issue 31/01/2006
31
32 These subroutines implement multiple block AES modes for ECB, CBC, CFB,
33 OFB and CTR encryption, The code provides support for the VIA Advanced
34 Cryptography Engine (ACE).
35
36 NOTE: In the following subroutines, the AES contexts (ctx) must be
37 16 byte aligned if VIA ACE is being used
38 */
39
40 /* modified 3/5/10 cclee */
41 /* Clean up those related to VIA ACE and hand optimize aes_cbc_encrypt and aes_cbc_decrypt */
42 /* move the xmm registers save/restore originally inside the callee functions into these 2 caller functions */
43
44 /* add code comments/description and HW AES detection and execution branch cclee 3-13-10 */
45
46 #ifdef KERNEL
47 #include <i386/cpu_capabilities.h> // to use __cpu_capabilities&kHasAES to detect Intel Westmere AES HW
48 #else
49 #include <System/i386/cpu_capabilities.h> // to use __cpu_capabilities&kHasAES to detect Intel Westmere AES HW
50 #endif
51
52 #if 0
53
54 // TODO:
55 // aes_ecb_encrypt and aes_ecb_decrypt are not present in gen/aescrypt.c
56 // would add the implementation if needed
57 // they are now compiled from aes_modes.c
58
59 aes_rval aes_ecb_encrypt(const unsigned char *ibuf, unsigned char *obuf,
60 int len, const aes_encrypt_ctx ctx[1])
61 { int nb = len >> 4;
62
63 if(len & (AES_BLOCK_SIZE - 1)) return 1;
64 while(nb--) {
65 aes_encrypt(ibuf, obuf, ctx);
66 ibuf += AES_BLOCK_SIZE;
67 obuf += AES_BLOCK_SIZE;
68 }
69 return 0;
70 }
71
72 aes_rval aes_ecb_decrypt(const unsigned char *ibuf, unsigned char *obuf,
73 int len, const aes_decrypt_ctx ctx[1])
74 { int nb = len >> 4;
75
76 if(len & (AES_BLOCK_SIZE - 1)) return 1;
77 while(nb--) {
78 aes_decrypt(ibuf, obuf, ctx);
79 ibuf += AES_BLOCK_SIZE;
80 obuf += AES_BLOCK_SIZE;
81 }
82 return 0;
83 }
84 #endif
85
86 #if 0
87 aes_rval aes_encrypt_cbc(const unsigned char *ibuf, const unsigned char *in_iv, unsigned int num_blk,
88 unsigned char *obuf, const aes_encrypt_ctx ctx[1])
89 {
90 unsigned char iv[16];
91 int i;
92
93 for (i = 0; i < 16; i++) iv[i] = *(in_iv + i);
94
95 while (num_blk--) {
96 iv ^= ibuf; // 128-bit
97 aes_encrypt(iv, iv, ctx);
98 memcpy(obuf, iv, AES_BLOCK_SIZE);
99 ibuf += AES_BLOCK_SIZE;
100 obuf += AES_BLOCK_SIZE;
101
102 }
103
104 return 0;
105 }
106 #endif
107
108 .text
109 .align 4,0x90
110 .globl _aes_encrypt_cbc
111 _aes_encrypt_cbc:
112
113 // detect AES HW
114 // if AES HW detected, branch to AES-HW-specific function _aes_encrypt_cbc_hw (aes_modes_hw.s)
115 // o.w., fall through to the original AES-SW function
116
117 #if defined __x86_64__
118 movq __cpu_capabilities@GOTPCREL(%rip), %rax // %rax -> __cpu_capability
119 mov (%rax), %eax // %eax = __cpu_capabilities
120 #else
121 #ifdef KERNEL
122 leal __cpu_capabilities, %eax // %eax -> __cpu_capabilities
123 mov (%eax), %eax // %eax = __cpu_capabilities
124 #else
125 mov _COMM_PAGE_CPU_CAPABILITIES, %eax
126 #endif
127 #endif
128 test $(kHasAES), %eax // kHasAES & __cpu_capabilities
129 jne _aes_encrypt_cbc_hw // if AES HW detected, branch to HW-specific code
130
131 // save registers and allocate stack memory for xmm registers and calling arguments (i386 only)
132 #if defined __i386__
133 push %ebp
134 mov %esp, %ebp
135 push %ebx // to be used as ibuf
136 push %edi // to be used as obuf
137 sub $(16+16+7*16), %esp // 12 (calling arguments) + 4 (%esi) + 16 (iv) + 7*16 (xmm)
138 mov %esi, 12(%esp) // save %esp in the unused 4-bytes, to be used as num_blk
139
140 #define sp %esp
141 #else // __x86_64__
142 push %rbp
143 mov %rsp, %rbp
144 push %rbx
145 push %r12
146 push %r13
147 push %r14
148 push %r15
149 sub $(8+16+5*16+16), %rsp // 8 (align) + 16 (dummy iv) + 5*16 (xmm) + 16 (for i386-x86_64 consistency)
150
151 #define sp %rsp
152 #endif
153
154 // save xmm registers for kernel use
155 // xmm6-xmm7 will be used locally
156 // xmm0-xmm2 (x86_64) or xmm0-/xmm4 (i386) will be used inside _aes_encrypt_xmm_no_save (non-restored)
157 // there is a hole not used for xmm, which is 48(sp).
158 // it has been used to store iv (16-bytes) in i386 code
159 // for consistency between i386 and x86_64, this hole is dummied in x86_64 code
160 // also the 1st 16 bytes (sp) is dummied in x86_64 code
161
162 #ifdef KERNEL
163 movaps %xmm7, 16(sp)
164 movaps %xmm6, 32(sp)
165 movaps %xmm0, 64(sp)
166 movaps %xmm1, 80(sp)
167 movaps %xmm2, 96(sp)
168 #if defined __i386__
169 movaps %xmm3, 112(sp)
170 movaps %xmm4, 128(sp)
171 #endif
172 #endif
173
174 // set up registers from calling arguments
175
176 #if defined __i386__
177
178 mov 12(%ebp), %eax // in_iv
179 mov 24(%ebp), %edx // ctx
180 movups (%eax), %xmm7 // in_iv
181 lea 48(%esp), %eax // &iv[0]
182 mov %eax, (%esp) // 1st iv for aes_encrypt
183 mov %eax, 4(%esp) // 2nd iv for aes_encrypt
184 mov %edx, 8(%esp) // ctx for aes_encrypt
185 mov 8(%ebp), %ebx // ibuf
186 mov 16(%ebp), %esi // num_blk
187 mov 20(%ebp), %edi // obuf
188
189 #define ibuf %ebx
190 #define obuf %edi
191 #define num_blk %esi
192
193 #else // __x86_64__, calling arguments order : rdi/rsi/rdx/rcx/r8
194
195 mov %rdi, %rbx // ibuf
196 lea 48(sp), %r12 // &iv
197 movups (%rsi), %xmm7 // in_iv
198 mov %rdx, %r13 // num_blk
199 mov %rcx, %r14 // obuf
200 mov %r8, %r15 // ctx
201
202 #define ibuf %rbx
203 #define iv %r12
204 #define num_blk %r13d
205 #define obuf %r14
206 #define ctx %r15
207
208 #endif
209
210 cmp $1, num_blk // num_blk vs 1
211 jl 9f // if num_blk < 1, branch to bypass the main loop
212 0:
213 movups (ibuf), %xmm6 // ibuf
214 #if defined __i386__
215 lea 48(sp), %eax // &iv[0]
216 pxor %xmm6, %xmm7 // iv ^= ibuf
217 movups %xmm7, (%eax) // save iv
218 #else
219 pxor %xmm6, %xmm7 // iv ^= ibuf
220 movups %xmm7, (iv) // save iv
221 mov iv, %rdi // 1st calling argument for aes_encrypt
222 mov iv, %rsi // 2nd calling argument for aes_encrypt
223 mov ctx, %rdx // 3rd calling argument for aes_encrypt
224 #endif
225 call _aes_encrypt_xmm_no_save // aes_encrypt(iv, iv, ctx)
226 #if defined __i386__
227 leal 48(%esp), %eax // &iv[0]
228 movups (%eax), %xmm7 // read iv
229 #else
230 movups (iv), %xmm7 // read iv
231 #endif
232 movups %xmm7, (obuf) // memcpy(obuf, iv, AES_BLOCK_SIZE);
233 add $16, ibuf // ibuf += AES_BLOCK_SIZE;
234 add $16, obuf // obuf += AES_BLOCK_SIZE;
235 sub $1, num_blk // num_blk --
236 jg 0b // if num_blk > 0, repeat the loop
237 9:
238
239 L_crypt_cbc_done:
240
241 // restore xmm registers due to kernel use
242 #ifdef KERNEL
243 movaps 16(sp), %xmm7
244 movaps 32(sp), %xmm6
245 movaps 64(sp), %xmm0
246 movaps 80(sp), %xmm1
247 movaps 96(sp), %xmm2
248 #if defined __i386__
249 movaps 112(sp), %xmm3
250 movaps 128(sp), %xmm4
251 #endif
252 #endif
253
254 xor %eax, %eax // to return 0 for SUCCESS
255
256 #if defined __i386__
257 mov 12(%esp), %esi // restore %esi
258 add $(16+16+7*16), %esp // 12 (calling arguments) + 4 (%esi) + 16 (iv) + 7*16 (xmm)
259 pop %edi
260 pop %ebx
261 #else
262 add $(8+16+5*16+16), %rsp // 8 (align) + 16 (dummy iv) + 5*16 (xmm) + 16 (for i386-x86_64 consistency)
263 pop %r15
264 pop %r14
265 pop %r13
266 pop %r12
267 pop %rbx
268 #endif
269 leave
270 ret
271
272 #if 0
273 aes_rval aes_decrypt_cbc(const unsigned char *ibuf, const unsigned char *in_iv, unsigned int num_blk,
274 unsigned char *obuf, const aes_decrypt_ctx cx[1])
275 {
276 unsigned char iv[16], tmp[16];
277 int i;
278
279 for (i = 0; i < 16; i++) iv[i] = *(in_iv + i);
280
281 while (num_blk--) {
282
283 memcpy(tmp, ibuf, AES_BLOCK_SIZE);
284 aes_decrypt(ibuf, obuf, ctx);
285 obuf ^= iv;
286 memcpy(iv, tmp, AES_BLOCK_SIZE);
287 ibuf += AES_BLOCK_SIZE;
288 obuf += AES_BLOCK_SIZE;
289 }
290
291 return 0;
292 }
293 #endif
294
295 .text
296 .align 4,0x90
297 .globl _aes_decrypt_cbc
298 _aes_decrypt_cbc:
299
300 // detect AES HW
301 // if AES HW detected, branch to AES-HW-specific function _aes_decrypt_cbc_hw (aes_modes_hw.s)
302 // o.w., fall through to the original AES-SW function
303
304 #if defined __x86_64__
305 movq __cpu_capabilities@GOTPCREL(%rip), %rax // %rax -> __cpu_capability
306 mov (%rax), %eax // %eax = __cpu_capabilities
307 #else
308 #ifdef KERNEL
309 leal __cpu_capabilities, %eax // %eax -> __cpu_capabilities
310 mov (%eax), %eax // %eax = __cpu_capabilities
311 #else
312 mov _COMM_PAGE_CPU_CAPABILITIES, %eax
313 #endif
314 #endif
315 test $(kHasAES), %eax // kHasAES & __cpu_capabilities
316 jne _aes_decrypt_cbc_hw
317
318 // save registers and allocate stack memory for xmm registers and calling arguments (i386 only)
319 #if defined __i386__
320 push %ebp
321 mov %esp, %ebp
322 push %ebx // to be used as ibuf
323 push %edi // to be used as obuf
324 sub $(16+16+7*16), %esp // 12 (calling arguments) + 4 (%esi) + 16 (iv) + 7*16 (xmm)
325 mov %esi, 12(%esp) // save %esp in the unused 4-bytes, to be used as num_blk
326
327 #define sp %esp
328 #else // __x86_64__
329 push %rbp
330 mov %rsp, %rbp
331 push %rbx
332 push %r12
333 push %r13
334 push %r14
335 push %r15
336 sub $(8+16+5*16+16), %rsp // 8 (align) + 16 (dummy iv) + 5*16 (xmm) + 16 (for i386-x86_64 consistency)
337
338 #define sp %rsp
339 #endif
340
341 // save xmm registers for kernel use
342 // xmm6-xmm7 will be used locally
343 // xmm0-xmm2 (x86_64) or xmm0-/xmm4 (i386) will be used inside _aes_encrypt_xmm_no_save (non-restored)
344 // there is a hole not used for xmm, which is 48(sp).
345 // it has been used to store iv (16-bytes) in i386 code
346 // for consistency between i386 and x86_64, this hole is dummied in x86_64 code
347 // also the 1st 16 bytes (sp) is dummied in x86_64 code
348
349 #ifdef KERNEL
350 movaps %xmm7, 16(sp)
351 movaps %xmm6, 32(sp)
352 movaps %xmm0, 64(sp)
353 movaps %xmm1, 80(sp)
354 movaps %xmm2, 96(sp)
355 #if defined __i386__
356 movaps %xmm3, 112(sp)
357 movaps %xmm4, 128(sp)
358 #endif
359 #endif
360
361 // set up registers from calling arguments
362
363 #if defined __i386__
364 mov 12(%ebp), %eax // in_iv
365 mov 24(%ebp), %edx // ctx
366 movups (%eax), %xmm7 // in_iv
367 mov %edx, 8(%esp) // ctx for aes_encrypt
368 mov 8(%ebp), %ebx // ibuf
369 mov 16(%ebp), %esi // num_blk
370 mov 20(%ebp), %edi // obuf
371
372 #define ibuf %ebx
373 #define obuf %edi
374 #define num_blk %esi
375 #else // __x86_64__, rdi/rsi/rdx/rcx/r8
376 mov %rdi, %rbx // ibuf
377 movups (%rsi), %xmm7 // in_iv
378 mov %rdx, %r13 // num_blk
379 mov %rcx, %r14 // obuf
380 mov %r8, %r15 // ctx
381
382 #define ibuf %rbx
383 #define num_blk %r13d
384 #define obuf %r14
385 #define ctx %r15
386
387 #endif
388 // memcpy(tmp, ibuf, AES_BLOCK_SIZE);
389 // aes_decrypt(ibuf, obuf, ctx);
390 // obuf ^= iv;
391 // memcpy(iv, tmp, AES_BLOCK_SIZE);
392 // ibuf += AES_BLOCK_SIZE;
393 // obuf += AES_BLOCK_SIZE;
394
395 cmp $1, num_blk // num_blk vs 1
396 jl L_crypt_cbc_done // if num_blk < 1, bypass the main loop, jump to finishing code
397 0:
398 movups (ibuf), %xmm6 // tmp
399 #if defined __i386__
400 mov ibuf, (sp) // ibuf
401 mov obuf, 4(sp) // obuf
402 #else
403 mov ibuf, %rdi // ibuf
404 mov obuf, %rsi // obuf
405 mov ctx, %rdx // ctx
406 #endif
407 call _aes_decrypt_xmm_no_save // aes_decrypt(ibuf, obuf, ctx)
408 movups (obuf), %xmm0 // obuf
409 pxor %xmm7, %xmm0 // obuf ^= iv;
410 movaps %xmm6, %xmm7 // memcpy(iv, tmp, AES_BLOCK_SIZE);
411 movups %xmm0, (obuf) // update obuf
412 add $16, ibuf // ibuf += AES_BLOCK_SIZE;
413 add $16, obuf // obuf += AES_BLOCK_SIZE;
414 sub $1, num_blk // num_blk --
415 jg 0b // if num_blk > 0, repeat the loop
416 9:
417
418 // we are done here, the finishing code is identical to that in aes_encrypt_cbc, so just jump to there
419 jmp L_crypt_cbc_done
420