]>
Commit | Line | Data |
---|---|---|
6d2010ae A |
1 | /*\r |
2 | ---------------------------------------------------------------------------\r | |
3 | Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved.\r | |
4 | \r | |
5 | LICENSE TERMS\r | |
6 | \r | |
7 | The free distribution and use of this software in both source and binary\r | |
8 | form is allowed (with or without changes) provided that:\r | |
9 | \r | |
10 | 1. distributions of this source code include the above copyright\r | |
11 | notice, this list of conditions and the following disclaimer;\r | |
12 | \r | |
13 | 2. distributions in binary form include the above copyright\r | |
14 | notice, this list of conditions and the following disclaimer\r | |
15 | in the documentation and/or other associated materials;\r | |
16 | \r | |
17 | 3. the copyright holder's name is not used to endorse products\r | |
18 | built using this software without specific written permission.\r | |
19 | \r | |
20 | ALTERNATIVELY, provided that this notice is retained in full, this product\r | |
21 | may be distributed under the terms of the GNU General Public License (GPL),\r | |
22 | in which case the provisions of the GPL apply INSTEAD OF those given above.\r | |
23 | \r | |
24 | DISCLAIMER\r | |
25 | \r | |
26 | This software is provided 'as is' with no explicit or implied warranties\r | |
27 | in respect of its properties, including, but not limited to, correctness\r | |
28 | and/or fitness for purpose.\r | |
29 | ---------------------------------------------------------------------------\r | |
30 | Issue 31/01/2006\r | |
31 | \r | |
32 | These subroutines implement multiple block AES modes for ECB, CBC, CFB,\r | |
33 | OFB and CTR encryption, The code provides support for the VIA Advanced \r | |
34 | Cryptography Engine (ACE).\r | |
35 | \r | |
36 | NOTE: In the following subroutines, the AES contexts (ctx) must be\r | |
37 | 16 byte aligned if VIA ACE is being used\r | |
38 | */\r | |
39 | \r | |
40 | /* modified 3/5/10 cclee */\r | |
41 | /* Clean up those related to VIA ACE and hand optimize aes_cbc_encrypt and aes_cbc_decrypt */\r | |
42 | /* move the xmm registers save/restore originally inside the callee functions into these 2 caller functions */\r | |
43 | \r | |
44 | /* add code comments/description and HW AES detection and execution branch cclee 3-13-10 */\r | |
45 | \r | |
46 | #ifdef KERNEL\r | |
47 | #include <i386/cpu_capabilities.h> // to use __cpu_capabilities&kHasAES to detect Intel Westmere AES HW\r | |
48 | #else\r | |
49 | #include <System/i386/cpu_capabilities.h> // to use __cpu_capabilities&kHasAES to detect Intel Westmere AES HW\r | |
50 | #endif\r | |
51 | \r | |
52 | #if 0\r | |
53 | \r | |
54 | // TODO:\r | |
55 | // aes_ecb_encrypt and aes_ecb_decrypt are not present in gen/aescrypt.c\r | |
56 | // would add the implementation if needed\r | |
57 | // they are now compiled from aes_modes.c\r | |
58 | \r | |
59 | aes_rval aes_ecb_encrypt(const unsigned char *ibuf, unsigned char *obuf,\r | |
60 | int len, const aes_encrypt_ctx ctx[1])\r | |
61 | { int nb = len >> 4;\r | |
62 | \r | |
63 | if(len & (AES_BLOCK_SIZE - 1)) return 1;\r | |
64 | while(nb--) {\r | |
65 | aes_encrypt(ibuf, obuf, ctx);\r | |
66 | ibuf += AES_BLOCK_SIZE;\r | |
67 | obuf += AES_BLOCK_SIZE;\r | |
68 | }\r | |
69 | return 0;\r | |
70 | }\r | |
71 | \r | |
72 | aes_rval aes_ecb_decrypt(const unsigned char *ibuf, unsigned char *obuf,\r | |
73 | int len, const aes_decrypt_ctx ctx[1])\r | |
74 | { int nb = len >> 4;\r | |
75 | \r | |
76 | if(len & (AES_BLOCK_SIZE - 1)) return 1;\r | |
77 | while(nb--) {\r | |
78 | aes_decrypt(ibuf, obuf, ctx);\r | |
79 | ibuf += AES_BLOCK_SIZE;\r | |
80 | obuf += AES_BLOCK_SIZE;\r | |
81 | }\r | |
82 | return 0;\r | |
83 | }\r | |
84 | #endif\r | |
85 | \r | |
86 | #if 0\r | |
87 | aes_rval aes_encrypt_cbc(const unsigned char *ibuf, const unsigned char *in_iv, unsigned int num_blk,\r | |
88 | unsigned char *obuf, const aes_encrypt_ctx ctx[1])\r | |
89 | {\r | |
90 | unsigned char iv[16];\r | |
91 | int i;\r | |
92 | \r | |
93 | for (i = 0; i < 16; i++) iv[i] = *(in_iv + i);\r | |
94 | \r | |
95 | while (num_blk--) {\r | |
96 | iv ^= ibuf; // 128-bit \r | |
97 | aes_encrypt(iv, iv, ctx);\r | |
98 | memcpy(obuf, iv, AES_BLOCK_SIZE);\r | |
99 | ibuf += AES_BLOCK_SIZE;\r | |
100 | obuf += AES_BLOCK_SIZE;\r | |
101 | \r | |
102 | } \r | |
103 | \r | |
104 | return 0;\r | |
105 | }\r | |
106 | #endif\r | |
107 | \r | |
108 | .text\r | |
109 | .align 4,0x90\r | |
110 | .globl _aes_encrypt_cbc\r | |
111 | _aes_encrypt_cbc:\r | |
112 | \r | |
113 | // detect AES HW\r | |
114 | // if AES HW detected, branch to AES-HW-specific function _aes_encrypt_cbc_hw (aes_modes_hw.s)\r | |
115 | // o.w., fall through to the original AES-SW function\r | |
116 | \r | |
117 | #if defined __x86_64__\r | |
118 | movq __cpu_capabilities@GOTPCREL(%rip), %rax // %rax -> __cpu_capability\r | |
119 | mov (%rax), %eax // %eax = __cpu_capabilities\r | |
120 | #else\r | |
121 | #ifdef KERNEL\r | |
122 | leal __cpu_capabilities, %eax // %eax -> __cpu_capabilities\r | |
123 | mov (%eax), %eax // %eax = __cpu_capabilities\r | |
124 | #else\r | |
125 | mov _COMM_PAGE_CPU_CAPABILITIES, %eax\r | |
126 | #endif\r | |
127 | #endif\r | |
128 | test $(kHasAES), %eax // kHasAES & __cpu_capabilities\r | |
129 | jne _aes_encrypt_cbc_hw // if AES HW detected, branch to HW-specific code\r | |
130 | \r | |
131 | // save registers and allocate stack memory for xmm registers and calling arguments (i386 only)\r | |
132 | #if defined __i386__\r | |
133 | push %ebp\r | |
134 | mov %esp, %ebp\r | |
135 | push %ebx // to be used as ibuf\r | |
136 | push %edi // to be used as obuf\r | |
137 | sub $(16+16+7*16), %esp // 12 (calling arguments) + 4 (%esi) + 16 (iv) + 7*16 (xmm)\r | |
138 | mov %esi, 12(%esp) // save %esp in the unused 4-bytes, to be used as num_blk\r | |
139 | \r | |
140 | #define sp %esp\r | |
141 | #else // __x86_64__\r | |
142 | push %rbp\r | |
143 | mov %rsp, %rbp\r | |
144 | push %rbx\r | |
145 | push %r12\r | |
146 | push %r13\r | |
147 | push %r14\r | |
148 | push %r15\r | |
149 | sub $(8+16+5*16+16), %rsp // 8 (align) + 16 (dummy iv) + 5*16 (xmm) + 16 (for i386-x86_64 consistency) \r | |
150 | \r | |
151 | #define sp %rsp\r | |
152 | #endif\r | |
153 | \r | |
154 | // save xmm registers for kernel use\r | |
155 | // xmm6-xmm7 will be used locally\r | |
156 | // xmm0-xmm2 (x86_64) or xmm0-/xmm4 (i386) will be used inside _aes_encrypt_xmm_no_save (non-restored)\r | |
157 | // there is a hole not used for xmm, which is 48(sp). \r | |
158 | // it has been used to store iv (16-bytes) in i386 code\r | |
159 | // for consistency between i386 and x86_64, this hole is dummied in x86_64 code\r | |
160 | // also the 1st 16 bytes (sp) is dummied in x86_64 code\r | |
161 | \r | |
162 | #ifdef KERNEL\r | |
163 | movaps %xmm7, 16(sp)\r | |
164 | movaps %xmm6, 32(sp)\r | |
165 | movaps %xmm0, 64(sp)\r | |
166 | movaps %xmm1, 80(sp)\r | |
167 | movaps %xmm2, 96(sp)\r | |
168 | #if defined __i386__\r | |
169 | movaps %xmm3, 112(sp)\r | |
170 | movaps %xmm4, 128(sp)\r | |
171 | #endif\r | |
172 | #endif\r | |
173 | \r | |
174 | // set up registers from calling arguments\r | |
175 | \r | |
176 | #if defined __i386__\r | |
177 | \r | |
178 | mov 12(%ebp), %eax // in_iv\r | |
179 | mov 24(%ebp), %edx // ctx\r | |
180 | movups (%eax), %xmm7 // in_iv \r | |
181 | lea 48(%esp), %eax // &iv[0]\r | |
182 | mov %eax, (%esp) // 1st iv for aes_encrypt\r | |
183 | mov %eax, 4(%esp) // 2nd iv for aes_encrypt\r | |
184 | mov %edx, 8(%esp) // ctx for aes_encrypt\r | |
185 | mov 8(%ebp), %ebx // ibuf\r | |
186 | mov 16(%ebp), %esi // num_blk\r | |
187 | mov 20(%ebp), %edi // obuf\r | |
188 | \r | |
189 | #define ibuf %ebx\r | |
190 | #define obuf %edi\r | |
191 | #define num_blk %esi \r | |
192 | \r | |
193 | #else // __x86_64__, calling arguments order : rdi/rsi/rdx/rcx/r8\r | |
194 | \r | |
195 | mov %rdi, %rbx // ibuf\r | |
196 | lea 48(sp), %r12 // &iv\r | |
197 | movups (%rsi), %xmm7 // in_iv\r | |
198 | mov %rdx, %r13 // num_blk\r | |
199 | mov %rcx, %r14 // obuf\r | |
200 | mov %r8, %r15 // ctx \r | |
201 | \r | |
202 | #define ibuf %rbx\r | |
203 | #define iv %r12\r | |
204 | #define num_blk %r13d\r | |
205 | #define obuf %r14 \r | |
206 | #define ctx %r15\r | |
207 | \r | |
208 | #endif\r | |
209 | \r | |
210 | cmp $1, num_blk // num_blk vs 1\r | |
211 | jl 9f // if num_blk < 1, branch to bypass the main loop\r | |
212 | 0:\r | |
213 | movups (ibuf), %xmm6 // ibuf\r | |
214 | #if defined __i386__\r | |
215 | lea 48(sp), %eax // &iv[0]\r | |
216 | pxor %xmm6, %xmm7 // iv ^= ibuf\r | |
217 | movups %xmm7, (%eax) // save iv\r | |
218 | #else\r | |
219 | pxor %xmm6, %xmm7 // iv ^= ibuf\r | |
220 | movups %xmm7, (iv) // save iv\r | |
221 | mov iv, %rdi // 1st calling argument for aes_encrypt\r | |
222 | mov iv, %rsi // 2nd calling argument for aes_encrypt\r | |
223 | mov ctx, %rdx // 3rd calling argument for aes_encrypt\r | |
224 | #endif\r | |
225 | call _aes_encrypt_xmm_no_save // aes_encrypt(iv, iv, ctx)\r | |
226 | #if defined __i386__\r | |
227 | leal 48(%esp), %eax // &iv[0]\r | |
228 | movups (%eax), %xmm7 // read iv\r | |
229 | #else\r | |
230 | movups (iv), %xmm7 // read iv\r | |
231 | #endif\r | |
232 | movups %xmm7, (obuf) // memcpy(obuf, iv, AES_BLOCK_SIZE);\r | |
233 | add $16, ibuf // ibuf += AES_BLOCK_SIZE; \r | |
234 | add $16, obuf // obuf += AES_BLOCK_SIZE; \r | |
235 | sub $1, num_blk // num_blk --\r | |
236 | jg 0b // if num_blk > 0, repeat the loop\r | |
237 | 9: \r | |
238 | \r | |
239 | L_crypt_cbc_done:\r | |
240 | \r | |
241 | // restore xmm registers due to kernel use\r | |
242 | #ifdef KERNEL\r | |
243 | movaps 16(sp), %xmm7\r | |
244 | movaps 32(sp), %xmm6\r | |
245 | movaps 64(sp), %xmm0\r | |
246 | movaps 80(sp), %xmm1\r | |
247 | movaps 96(sp), %xmm2\r | |
248 | #if defined __i386__\r | |
249 | movaps 112(sp), %xmm3\r | |
250 | movaps 128(sp), %xmm4\r | |
251 | #endif\r | |
252 | #endif\r | |
253 | \r | |
254 | xor %eax, %eax // to return 0 for SUCCESS\r | |
255 | \r | |
256 | #if defined __i386__\r | |
257 | mov 12(%esp), %esi // restore %esi\r | |
258 | add $(16+16+7*16), %esp // 12 (calling arguments) + 4 (%esi) + 16 (iv) + 7*16 (xmm)\r | |
259 | pop %edi\r | |
260 | pop %ebx\r | |
261 | #else\r | |
262 | add $(8+16+5*16+16), %rsp // 8 (align) + 16 (dummy iv) + 5*16 (xmm) + 16 (for i386-x86_64 consistency) \r | |
263 | pop %r15\r | |
264 | pop %r14\r | |
265 | pop %r13\r | |
266 | pop %r12\r | |
267 | pop %rbx\r | |
268 | #endif\r | |
269 | leave\r | |
270 | ret\r | |
271 | \r | |
272 | #if 0\r | |
273 | aes_rval aes_decrypt_cbc(const unsigned char *ibuf, const unsigned char *in_iv, unsigned int num_blk,\r | |
274 | unsigned char *obuf, const aes_decrypt_ctx cx[1])\r | |
275 | {\r | |
276 | unsigned char iv[16], tmp[16];\r | |
277 | int i;\r | |
278 | \r | |
279 | for (i = 0; i < 16; i++) iv[i] = *(in_iv + i);\r | |
280 | \r | |
281 | while (num_blk--) {\r | |
282 | \r | |
283 | memcpy(tmp, ibuf, AES_BLOCK_SIZE);\r | |
284 | aes_decrypt(ibuf, obuf, ctx);\r | |
285 | obuf ^= iv;\r | |
286 | memcpy(iv, tmp, AES_BLOCK_SIZE);\r | |
287 | ibuf += AES_BLOCK_SIZE;\r | |
288 | obuf += AES_BLOCK_SIZE;\r | |
289 | }\r | |
290 | \r | |
291 | return 0;\r | |
292 | }\r | |
293 | #endif\r | |
294 | \r | |
295 | .text\r | |
296 | .align 4,0x90\r | |
297 | .globl _aes_decrypt_cbc\r | |
298 | _aes_decrypt_cbc:\r | |
299 | \r | |
300 | // detect AES HW\r | |
301 | // if AES HW detected, branch to AES-HW-specific function _aes_decrypt_cbc_hw (aes_modes_hw.s)\r | |
302 | // o.w., fall through to the original AES-SW function\r | |
303 | \r | |
304 | #if defined __x86_64__\r | |
305 | movq __cpu_capabilities@GOTPCREL(%rip), %rax // %rax -> __cpu_capability\r | |
306 | mov (%rax), %eax // %eax = __cpu_capabilities\r | |
307 | #else\r | |
308 | #ifdef KERNEL\r | |
309 | leal __cpu_capabilities, %eax // %eax -> __cpu_capabilities\r | |
310 | mov (%eax), %eax // %eax = __cpu_capabilities\r | |
311 | #else\r | |
312 | mov _COMM_PAGE_CPU_CAPABILITIES, %eax\r | |
313 | #endif\r | |
314 | #endif\r | |
315 | test $(kHasAES), %eax // kHasAES & __cpu_capabilities\r | |
316 | jne _aes_decrypt_cbc_hw\r | |
317 | \r | |
318 | // save registers and allocate stack memory for xmm registers and calling arguments (i386 only)\r | |
319 | #if defined __i386__\r | |
320 | push %ebp\r | |
321 | mov %esp, %ebp\r | |
322 | push %ebx // to be used as ibuf\r | |
323 | push %edi // to be used as obuf\r | |
324 | sub $(16+16+7*16), %esp // 12 (calling arguments) + 4 (%esi) + 16 (iv) + 7*16 (xmm)\r | |
325 | mov %esi, 12(%esp) // save %esp in the unused 4-bytes, to be used as num_blk\r | |
326 | \r | |
327 | #define sp %esp\r | |
328 | #else // __x86_64__\r | |
329 | push %rbp\r | |
330 | mov %rsp, %rbp\r | |
331 | push %rbx\r | |
332 | push %r12\r | |
333 | push %r13\r | |
334 | push %r14\r | |
335 | push %r15\r | |
336 | sub $(8+16+5*16+16), %rsp // 8 (align) + 16 (dummy iv) + 5*16 (xmm) + 16 (for i386-x86_64 consistency) \r | |
337 | \r | |
338 | #define sp %rsp\r | |
339 | #endif\r | |
340 | \r | |
341 | // save xmm registers for kernel use\r | |
342 | // xmm6-xmm7 will be used locally\r | |
343 | // xmm0-xmm2 (x86_64) or xmm0-/xmm4 (i386) will be used inside _aes_encrypt_xmm_no_save (non-restored)\r | |
344 | // there is a hole not used for xmm, which is 48(sp). \r | |
345 | // it has been used to store iv (16-bytes) in i386 code\r | |
346 | // for consistency between i386 and x86_64, this hole is dummied in x86_64 code\r | |
347 | // also the 1st 16 bytes (sp) is dummied in x86_64 code\r | |
348 | \r | |
349 | #ifdef KERNEL\r | |
350 | movaps %xmm7, 16(sp)\r | |
351 | movaps %xmm6, 32(sp)\r | |
352 | movaps %xmm0, 64(sp)\r | |
353 | movaps %xmm1, 80(sp)\r | |
354 | movaps %xmm2, 96(sp)\r | |
355 | #if defined __i386__\r | |
356 | movaps %xmm3, 112(sp)\r | |
357 | movaps %xmm4, 128(sp)\r | |
358 | #endif\r | |
359 | #endif\r | |
360 | \r | |
361 | // set up registers from calling arguments\r | |
362 | \r | |
363 | #if defined __i386__\r | |
364 | mov 12(%ebp), %eax // in_iv\r | |
365 | mov 24(%ebp), %edx // ctx\r | |
366 | movups (%eax), %xmm7 // in_iv \r | |
367 | mov %edx, 8(%esp) // ctx for aes_encrypt\r | |
368 | mov 8(%ebp), %ebx // ibuf\r | |
369 | mov 16(%ebp), %esi // num_blk\r | |
370 | mov 20(%ebp), %edi // obuf\r | |
371 | \r | |
372 | #define ibuf %ebx\r | |
373 | #define obuf %edi\r | |
374 | #define num_blk %esi \r | |
375 | #else // __x86_64__, rdi/rsi/rdx/rcx/r8\r | |
376 | mov %rdi, %rbx // ibuf\r | |
377 | movups (%rsi), %xmm7 // in_iv\r | |
378 | mov %rdx, %r13 // num_blk\r | |
379 | mov %rcx, %r14 // obuf \r | |
380 | mov %r8, %r15 // ctx \r | |
381 | \r | |
382 | #define ibuf %rbx\r | |
383 | #define num_blk %r13d\r | |
384 | #define obuf %r14 \r | |
385 | #define ctx %r15\r | |
386 | \r | |
387 | #endif\r | |
388 | // memcpy(tmp, ibuf, AES_BLOCK_SIZE);\r | |
389 | // aes_decrypt(ibuf, obuf, ctx);\r | |
390 | // obuf ^= iv;\r | |
391 | // memcpy(iv, tmp, AES_BLOCK_SIZE);\r | |
392 | // ibuf += AES_BLOCK_SIZE;\r | |
393 | // obuf += AES_BLOCK_SIZE;\r | |
394 | \r | |
395 | cmp $1, num_blk // num_blk vs 1\r | |
396 | jl L_crypt_cbc_done // if num_blk < 1, bypass the main loop, jump to finishing code\r | |
397 | 0:\r | |
398 | movups (ibuf), %xmm6 // tmp\r | |
399 | #if defined __i386__\r | |
400 | mov ibuf, (sp) // ibuf\r | |
401 | mov obuf, 4(sp) // obuf\r | |
402 | #else\r | |
403 | mov ibuf, %rdi // ibuf \r | |
404 | mov obuf, %rsi // obuf\r | |
405 | mov ctx, %rdx // ctx\r | |
406 | #endif\r | |
407 | call _aes_decrypt_xmm_no_save // aes_decrypt(ibuf, obuf, ctx)\r | |
408 | movups (obuf), %xmm0 // obuf\r | |
409 | pxor %xmm7, %xmm0 // obuf ^= iv;\r | |
410 | movaps %xmm6, %xmm7 // memcpy(iv, tmp, AES_BLOCK_SIZE);\r | |
411 | movups %xmm0, (obuf) // update obuf\r | |
412 | add $16, ibuf // ibuf += AES_BLOCK_SIZE; \r | |
413 | add $16, obuf // obuf += AES_BLOCK_SIZE; \r | |
414 | sub $1, num_blk // num_blk --\r | |
415 | jg 0b // if num_blk > 0, repeat the loop\r | |
416 | 9: \r | |
417 | \r | |
418 | // we are done here, the finishing code is identical to that in aes_encrypt_cbc, so just jump to there\r | |
419 | jmp L_crypt_cbc_done\r | |
420 | \r |