]> git.saurik.com Git - apple/libc.git/blob - x86_64/string/strnlen.s
8b9600363b7be48881438cd2f00107f9e32f76d8
[apple/libc.git] / x86_64 / string / strnlen.s
1 /*
2 * Copyright (c) 2012 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 *
28 * This file implements strnlen( ) for the x86_64 architecture.
29 */
30
31 .globl _strnlen
32
33 /*****************************************************************************
34 * Macros *
35 *****************************************************************************/
36
37 .macro EstablishFrame
38 push %rbp
39 mov %rsp, %rbp
40 .endm
41
42 .macro ClearFrameAndReturn
43 pop %rbp
44 ret
45 .endm
46
47 /*****************************************************************************
48 * Entrypoint *
49 *****************************************************************************/
50
51 .text
52 .align 4
53 _strnlen:
54 // size_t strnlen(char *s, size_t maxlen);
55
56 // If maxlen is larger than any object that can be allocated, we know a priori
57 // that it does not effect the operation of the function in any way; we can
58 // simply call strlen instead, which is more efficient and makes handling the
59 // edge cases here much cleaner.
60 test %rsi, %rsi
61 js _strlen
62
63 // The strnlen() function attempts to compute the length of s, but never
64 // scans beyond the first maxlen bytes of s.
65 //
66 // Thus, we need to early-out without doing any reads at all if maxlen == 0.
67 EstablishFrame
68 mov %rsi, %rax
69 jz L_maxlenExhausted
70
71 // We are going to check the string in aligned 16-byte blocks. The first such
72 // block may contain characters that preceed the start of the string, so we
73 // construct a mask based on the string's alignment to use in processing this
74 // initial block. We also need to account for these characters in maxlen.
75 mov %rdi, %rcx
76 and $0xf, %rcx
77 or $-1, %rdx
78 shl %cl, %rdx // mask
79 add %rcx, %rsi // adjust maxlen
80
81 // Load the 16-byte block containing the start of the string. If any NUL
82 // bytes are present in this block, the corresponding *bit* in ecx will be 1.
83 // We check only the bits that are set in the mask, to avoid detecting NULs
84 // that preceed the start of the string.
85 and $-16, %rdi
86 pxor %xmm0, %xmm0
87 pcmpeqb (%rdi), %xmm0
88 pmovmskb %xmm0, %ecx
89 and %rdx, %rcx
90 jnz L_foundNUL
91
92 // Now subtract 16 from maxlen. If this causes a borrow, then we exhausted
93 // maxlen somewhere in this 16-byte block (formally, we have read past maxlen
94 // bytes, but that is not a problem; because the accesses are all aligned, we
95 // cannot read accross a page--or even cacheline--boundary, so the observable
96 // behavior is not different from if we had stopped at maxlen). If the result
97 // is exactly zero, we need to stop before reading the *next* 16 bytes.
98 sub $16, %rsi
99 jbe L_maxlenExhausted
100
101 L_loop:
102 add $16, %rdi
103 pxor %xmm0, %xmm0
104 pcmpeqb (%rdi), %xmm0
105 pmovmskb %xmm0, %ecx
106 test %rcx, %rcx
107 jnz L_foundNUL
108 sub $16, %rsi
109 ja L_loop
110
111 L_maxlenExhausted:
112 // If we exhaust maxlen bytes without finding a NUL, we return maxlen.
113 ClearFrameAndReturn
114
115 L_foundNUL:
116 // The last 16-byte block that we searched contained at least one NUL.
117 // We use bsf to identify the first NUL.
118 bsf %rcx, %rdx
119 // Handle the case where the NUL that we found is preceeded by the maxlen'th
120 // byte of the string, returning maxlen.
121 cmp %rdx, %rsi
122 jb L_maxlenExhausted
123 // Otherwise, return the length of the string.
124 sub %rsi, %rax
125 add %rdx, %rax
126 ClearFrameAndReturn