]> git.saurik.com Git - apple/libc.git/blame_incremental - x86_64/string/strlen.s
Libc-391.5.22.tar.gz
[apple/libc.git] / x86_64 / string / strlen.s
... / ...
CommitLineData
1/*
2 * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22
23/*
24 * Strlen, for processors with SSE3.
25 *
26 * Note that all memory references must be aligned, in order to avoid spurious
27 * page faults. Thus we have to load the aligned 16-byte chunk containing the
28 * first byte of the operand, then mask out false 0s that may occur before the
29 * first byte.
30 *
31 * We favor the fall-through (ie, short operand) path.
32 */
33
34 .text
35 .globl _strlen
36 .align 4, 0x90
37_strlen: // size_t strlen(char *b);
38 pxor %xmm0,%xmm0 // zero %xmm0
39 movq %rdi,%rcx // copy ptr
40 movq %rdi,%rdx // make another copy
41 andq $(-16),%rdi // 16-byte align ptr
42 orl $(-1),%eax
43 pcmpeqb (%rdi),%xmm0 // check whole qw for 0s
44 andl $15,%ecx // get #bytes in aligned dq before operand
45 shl %cl,%eax // create mask for the bytes of aligned dq in operand
46 pmovmskb %xmm0,%ecx // collect mask of 0-bytes
47 andl %eax,%ecx // mask out any 0s that occur before 1st byte
48 jz LEnterLoop // no 0-bytes (ie, 1-bits), so enter by-16 loop
49
50// We've found a 0-byte.
51// %rdi = aligned address of 16-byte block containing the terminating 0-byte
52// %ecx = compare bit vector
53
54LFoundIt:
55 bsf %ecx,%eax // find first 1-bit (ie, first 0-byte)
56 movq %rdx,%rcx // recover ptr to 1st byte in string
57 addq %rdi,%rax // get address of the 0-byte
58 subq %rcx,%rax // subtract address of 1st byte to get string length
59 ret
60
61// Loop over aligned 16-byte blocks:
62// %rdi = address of previous block
63
64LEnterLoop:
65 pxor %xmm0,%xmm0 // get some 0-bytes
66 addq $16,%rdi // advance ptr
67LLoop:
68 movdqa (%rdi),%xmm1 // get next chunk
69 addq $16,%rdi
70 pcmpeqb %xmm0,%xmm1 // check for 0s
71 pmovmskb %xmm1,%ecx // collect mask of 0-bytes
72 test %ecx,%ecx // any 0-bytes?
73 jz LLoop // no 0-bytes, so get next dq
74
75 subq $16,%rdi // back up ptr
76 jmp LFoundIt