]> git.saurik.com Git - apple/xnu.git/blob - osfmk/arm64/strnlen.s
xnu-4570.20.62.tar.gz
[apple/xnu.git] / osfmk / arm64 / strnlen.s
1 /*
2 * Copyright (c) 2012 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 *
28 * This file implements the following function for the arm64 architecture:
29 *
30 * size_t strnlen(const char *string, size_t maxlen);
31 *
32 * The strnlen function returns either strlen(string) or maxlen, whichever
33 * is amller, without reading beyond the first maxlen characters of string.
34 */
35
36 .globl _strlen
37 .globl _strnlen
38
39 /*****************************************************************************
40 * Macros *
41 *****************************************************************************/
42
43 .macro EstablishFrame
44 stp fp, lr, [sp, #-16]!
45 mov fp, sp
46 .endm
47
48 .macro ClearFrameAndReturn
49 ldp fp, lr, [sp], #16
50 ret
51 .endm
52
53 /*****************************************************************************
54 * Constants *
55 *****************************************************************************/
56
57 .text
58 .align 5
59 L_masks:
60 .quad 0x0706050403020100, 0x0f0e0d0c0b0a0908
61 .quad 0x0000000000000000, 0x0000000000000000
62
63 /*****************************************************************************
64 * strnlen entrypoint *
65 *****************************************************************************/
66
67 _strnlen:
68 // If n == 0, return NULL without loading any data from s. If n is so large
69 // that it exceeds the size of any buffer that can be allocted, jump into a
70 // simpler implementation that omits all length checks. This is both faster
71 // and lets us avoid some messy edgecases in the mainline.
72 tst x1, x1
73 b.mi _strlen
74 b.eq L_maxlenIsZero
75 EstablishFrame
76 // Load the 16-byte aligned vector containing the start of the string.
77 and x2, x0, #-16
78 ldr q0, [x2]
79 // Load a vector {0,1,2, ... ,15} for use in finding the index of the NUL
80 // byte once we identify one. We don't use this vector until the very end
81 // of the routine; it simply falls out naturally to load it now.
82 adr x3, L_masks
83 ldr q2, [x3],#16
84 // The aligned vector that we loaded to q0 contains the start of the string,
85 // but if the string was not originally aligned, it also contains bytes
86 // which preceed the start of the string, and which may cause false positives
87 // when we search for the terminating NUL. We generate a mask to OR into the
88 // vector using an unaligned load to prevent this. The mask has non-zero
89 // values only in those bytes which correspond to bytes preceeding the start
90 // of the string in the aligned vector load.
91 and x4, x0, #0xf
92 sub x3, x3, x4
93 ldr q1, [x3]
94 orr.16b v0, v0, v1
95 // Adjust maxlen to account for bytes which preceed the start of the string,
96 // and jump into the main scanning loop.
97 add x1, x1, x4
98 b 1f
99
100 // Main loop. Identical to strlen, except that we also need to check that we
101 // don't read more than maxlen bytes. To that end, we decrement maxlen by 16
102 // on each iteration, and exit the loop if the result is zero or negative.
103 .align 4
104 0: ldr q0, [x2, #16]!
105 1: uminv.16b b1, v0
106 fmov w3, s1
107 cbz w3, L_foundNUL
108 subs x1, x1, #16
109 b.hi 0b
110
111 // We exhausted maxlen bytes without finding a terminating NUL character, so
112 // we need to return maxlen.
113 sub x0, x2, x0
114 add x1, x1, #16
115 add x0, x0, x1
116 ClearFrameAndReturn
117
118 L_maxlenIsZero:
119 mov x0, xzr
120 ret // No stack frame, so don't clear it.
121
122 L_foundNUL:
123 // Compute the index of the NUL byte, and check if it occurs before maxlen
124 // bytes into the vector. If not, return maxlen. Otherwise, return the
125 // length of the string.
126 eor.16b v1, v1, v1
127 cmhi.16b v0, v0, v1
128 orr.16b v0, v0, v2
129 uminv.16b b1, v0
130 fmov w3, s1 // index of NUL byte in vector
131 sub x0, x2, x0 // index of vector in string
132 cmp x1, x3 // if NUL occurs before maxlen bytes
133 csel x1, x1, x3, cc // return strlen, else maxlen
134 add x0, x0, x1
135 ClearFrameAndReturn
136
137 /*****************************************************************************
138 * strlen entrypoint *
139 *****************************************************************************/
140
141 .align 4
142 _strlen:
143 EstablishFrame
144 // Load the 16-byte aligned vector containing the start of the string.
145 and x1, x0, #-16
146 ldr q0, [x1]
147 // Load a vector {0,1,2, ... ,15} for use in finding the index of the NUL
148 // byte once we identify one. We don't use this vector until the very end
149 // of the routine; it simply falls out naturally to load it now.
150 adr x3, L_masks
151 ldr q2, [x3],#16
152 // The aligned vector that we loaded to q0 contains the start of the string,
153 // but if the string was not originally aligned, it also contains bytes
154 // which preceed the start of the string, and which may cause false positives
155 // when we search for the terminating NUL. We generate a mask to OR into the
156 // vector using an unaligned load to prevent this. The mask has non-zero
157 // values only in those bytes which correspond to bytes preceeding the start
158 // of the string in the aligned vector load.
159 and x2, x0, #0xf
160 sub x3, x3, x2
161 ldr q1, [x3]
162 orr.16b v0, v0, v1
163 b 1f
164
165 // Main loop. On each iteration we do the following:
166 //
167 // q0 <-- next 16 aligned bytes of string
168 // b1 <-- unsigned minimum byte in q0
169 // if (b1 != 0) continue
170 //
171 // Thus, we continue the loop until the 16 bytes we load contain a zero byte.
172 .align 4
173 0: ldr q0, [x1, #16]!
174 1: uminv.16b b1, v0
175 fmov w2, s1 // umov.b would be more natural, but requries 2 µops.
176 cbnz w2, 0b
177
178 // A zero byte has been found. The following registers contain values that
179 // we need to compute the string's length:
180 //
181 // x0 pointer to start of string
182 // x1 pointer to vector containing terminating NUL byte
183 // v0 vector containing terminating NUL byte
184 // v2 {0, 1, 2, ... , 15}
185 //
186 // We compute the index of the terminating NUL byte in the string (which is
187 // precisely the length of the string) as follows:
188 //
189 // vec <-- mask(v0 != 0) | v2
190 // index <-- x1 - x0 + unsignedMinimum(vec)
191 eor.16b v1, v1, v1
192 cmhi.16b v0, v0, v1
193 orr.16b v0, v0, v2
194 uminv.16b b1, v0
195 fmov w2, s1
196 sub x0, x1, x0
197 add x0, x0, x2
198 ClearFrameAndReturn