]>
Commit | Line | Data |
---|---|---|
5ba3f43e A |
1 | /* |
2 | * Copyright (c) 2012 Apple Computer, Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | * | |
28 | * This file implements the following function for the arm64 architecture: | |
29 | * | |
30 | * size_t strnlen(const char *string, size_t maxlen); | |
31 | * | |
32 | * The strnlen function returns either strlen(string) or maxlen, whichever | |
33 | * is amller, without reading beyond the first maxlen characters of string. | |
34 | */ | |
35 | ||
d9a64523 A |
36 | #include <arm64/asm.h> |
37 | ||
5ba3f43e A |
38 | .globl _strlen |
39 | .globl _strnlen | |
40 | ||
41 | /***************************************************************************** | |
42 | * Macros * | |
43 | *****************************************************************************/ | |
44 | ||
45 | .macro EstablishFrame | |
d9a64523 | 46 | ARM64_STACK_PROLOG |
5ba3f43e A |
47 | stp fp, lr, [sp, #-16]! |
48 | mov fp, sp | |
49 | .endm | |
50 | ||
51 | .macro ClearFrameAndReturn | |
52 | ldp fp, lr, [sp], #16 | |
d9a64523 | 53 | ARM64_STACK_EPILOG |
5ba3f43e A |
54 | .endm |
55 | ||
56 | /***************************************************************************** | |
57 | * Constants * | |
58 | *****************************************************************************/ | |
59 | ||
60 | .text | |
61 | .align 5 | |
62 | L_masks: | |
63 | .quad 0x0706050403020100, 0x0f0e0d0c0b0a0908 | |
64 | .quad 0x0000000000000000, 0x0000000000000000 | |
65 | ||
66 | /***************************************************************************** | |
67 | * strnlen entrypoint * | |
68 | *****************************************************************************/ | |
69 | ||
70 | _strnlen: | |
71 | // If n == 0, return NULL without loading any data from s. If n is so large | |
72 | // that it exceeds the size of any buffer that can be allocted, jump into a | |
73 | // simpler implementation that omits all length checks. This is both faster | |
74 | // and lets us avoid some messy edgecases in the mainline. | |
75 | tst x1, x1 | |
76 | b.mi _strlen | |
77 | b.eq L_maxlenIsZero | |
78 | EstablishFrame | |
79 | // Load the 16-byte aligned vector containing the start of the string. | |
80 | and x2, x0, #-16 | |
81 | ldr q0, [x2] | |
82 | // Load a vector {0,1,2, ... ,15} for use in finding the index of the NUL | |
83 | // byte once we identify one. We don't use this vector until the very end | |
84 | // of the routine; it simply falls out naturally to load it now. | |
85 | adr x3, L_masks | |
86 | ldr q2, [x3],#16 | |
87 | // The aligned vector that we loaded to q0 contains the start of the string, | |
88 | // but if the string was not originally aligned, it also contains bytes | |
89 | // which preceed the start of the string, and which may cause false positives | |
90 | // when we search for the terminating NUL. We generate a mask to OR into the | |
91 | // vector using an unaligned load to prevent this. The mask has non-zero | |
92 | // values only in those bytes which correspond to bytes preceeding the start | |
93 | // of the string in the aligned vector load. | |
94 | and x4, x0, #0xf | |
95 | sub x3, x3, x4 | |
96 | ldr q1, [x3] | |
97 | orr.16b v0, v0, v1 | |
98 | // Adjust maxlen to account for bytes which preceed the start of the string, | |
99 | // and jump into the main scanning loop. | |
100 | add x1, x1, x4 | |
101 | b 1f | |
102 | ||
103 | // Main loop. Identical to strlen, except that we also need to check that we | |
104 | // don't read more than maxlen bytes. To that end, we decrement maxlen by 16 | |
105 | // on each iteration, and exit the loop if the result is zero or negative. | |
106 | .align 4 | |
107 | 0: ldr q0, [x2, #16]! | |
108 | 1: uminv.16b b1, v0 | |
109 | fmov w3, s1 | |
110 | cbz w3, L_foundNUL | |
111 | subs x1, x1, #16 | |
112 | b.hi 0b | |
113 | ||
114 | // We exhausted maxlen bytes without finding a terminating NUL character, so | |
115 | // we need to return maxlen. | |
116 | sub x0, x2, x0 | |
117 | add x1, x1, #16 | |
118 | add x0, x0, x1 | |
119 | ClearFrameAndReturn | |
120 | ||
121 | L_maxlenIsZero: | |
d9a64523 | 122 | mov x0, #0 |
5ba3f43e A |
123 | ret // No stack frame, so don't clear it. |
124 | ||
125 | L_foundNUL: | |
126 | // Compute the index of the NUL byte, and check if it occurs before maxlen | |
127 | // bytes into the vector. If not, return maxlen. Otherwise, return the | |
128 | // length of the string. | |
129 | eor.16b v1, v1, v1 | |
130 | cmhi.16b v0, v0, v1 | |
131 | orr.16b v0, v0, v2 | |
132 | uminv.16b b1, v0 | |
133 | fmov w3, s1 // index of NUL byte in vector | |
134 | sub x0, x2, x0 // index of vector in string | |
135 | cmp x1, x3 // if NUL occurs before maxlen bytes | |
136 | csel x1, x1, x3, cc // return strlen, else maxlen | |
137 | add x0, x0, x1 | |
138 | ClearFrameAndReturn | |
139 | ||
140 | /***************************************************************************** | |
141 | * strlen entrypoint * | |
142 | *****************************************************************************/ | |
143 | ||
144 | .align 4 | |
145 | _strlen: | |
146 | EstablishFrame | |
147 | // Load the 16-byte aligned vector containing the start of the string. | |
148 | and x1, x0, #-16 | |
149 | ldr q0, [x1] | |
150 | // Load a vector {0,1,2, ... ,15} for use in finding the index of the NUL | |
151 | // byte once we identify one. We don't use this vector until the very end | |
152 | // of the routine; it simply falls out naturally to load it now. | |
153 | adr x3, L_masks | |
154 | ldr q2, [x3],#16 | |
155 | // The aligned vector that we loaded to q0 contains the start of the string, | |
156 | // but if the string was not originally aligned, it also contains bytes | |
157 | // which preceed the start of the string, and which may cause false positives | |
158 | // when we search for the terminating NUL. We generate a mask to OR into the | |
159 | // vector using an unaligned load to prevent this. The mask has non-zero | |
160 | // values only in those bytes which correspond to bytes preceeding the start | |
161 | // of the string in the aligned vector load. | |
162 | and x2, x0, #0xf | |
163 | sub x3, x3, x2 | |
164 | ldr q1, [x3] | |
165 | orr.16b v0, v0, v1 | |
166 | b 1f | |
167 | ||
168 | // Main loop. On each iteration we do the following: | |
169 | // | |
170 | // q0 <-- next 16 aligned bytes of string | |
171 | // b1 <-- unsigned minimum byte in q0 | |
172 | // if (b1 != 0) continue | |
173 | // | |
174 | // Thus, we continue the loop until the 16 bytes we load contain a zero byte. | |
175 | .align 4 | |
176 | 0: ldr q0, [x1, #16]! | |
177 | 1: uminv.16b b1, v0 | |
178 | fmov w2, s1 // umov.b would be more natural, but requries 2 µops. | |
179 | cbnz w2, 0b | |
180 | ||
181 | // A zero byte has been found. The following registers contain values that | |
182 | // we need to compute the string's length: | |
183 | // | |
184 | // x0 pointer to start of string | |
185 | // x1 pointer to vector containing terminating NUL byte | |
186 | // v0 vector containing terminating NUL byte | |
187 | // v2 {0, 1, 2, ... , 15} | |
188 | // | |
189 | // We compute the index of the terminating NUL byte in the string (which is | |
190 | // precisely the length of the string) as follows: | |
191 | // | |
192 | // vec <-- mask(v0 != 0) | v2 | |
193 | // index <-- x1 - x0 + unsignedMinimum(vec) | |
194 | eor.16b v1, v1, v1 | |
195 | cmhi.16b v0, v0, v1 | |
196 | orr.16b v0, v0, v2 | |
197 | uminv.16b b1, v0 | |
198 | fmov w2, s1 | |
199 | sub x0, x1, x0 | |
200 | add x0, x0, x2 | |
201 | ClearFrameAndReturn |