osfmk/arm64/strnlen.s

   1 /*
   2  * Copyright (c) 2012 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  *
  28  * This file implements the following function for the arm64 architecture:
  29  *
  30  *  size_t strnlen(const char *string, size_t maxlen);
  31  *
  32  * The strnlen function returns either strlen(string) or maxlen, whichever
  33  * is amller, without reading beyond the first maxlen characters of string.
  34  */
  35
  36 .globl _strlen
  37 .globl _strnlen
  38
  39 /*****************************************************************************
  40  *  Macros                                                                   *
  41  *****************************************************************************/
  42
  43 .macro EstablishFrame
  44         stp       fp, lr, [sp, #-16]!
  45         mov       fp,      sp
  46 .endm
  47
  48 .macro ClearFrameAndReturn
  49         ldp       fp, lr, [sp], #16
  50         ret
  51 .endm
  52
  53 /*****************************************************************************
  54  *  Constants                                                                *
  55  *****************************************************************************/
  56
  57 .text
  58 .align 5
  59 L_masks:
  60 .quad 0x0706050403020100, 0x0f0e0d0c0b0a0908
  61 .quad 0x0000000000000000, 0x0000000000000000
  62
  63 /*****************************************************************************
  64  *  strnlen entrypoint                                                       *
  65  *****************************************************************************/
  66
  67 _strnlen:
  68 //      If n == 0, return NULL without loading any data from s.  If n is so large
  69 //      that it exceeds the size of any buffer that can be allocted, jump into a
  70 //      simpler implementation that omits all length checks.  This is both faster
  71 //      and lets us avoid some messy edgecases in the mainline.
  72         tst       x1,      x1
  73         b.mi      _strlen
  74         b.eq      L_maxlenIsZero
  75         EstablishFrame
  76 //      Load the 16-byte aligned vector containing the start of the string.
  77         and       x2,      x0, #-16
  78         ldr       q0,     [x2]
  79 //      Load a vector {0,1,2, ... ,15} for use in finding the index of the NUL
  80 //      byte once we identify one.  We don't use this vector until the very end
  81 //      of the routine; it simply falls out naturally to load it now.
  82         adr       x3,          L_masks
  83         ldr       q2,     [x3],#16
  84 //      The aligned vector that we loaded to q0 contains the start of the string,
  85 //      but if the string was not originally aligned, it also contains bytes
  86 //      which preceed the start of the string, and which may cause false positives
  87 //      when we search for the terminating NUL.  We generate a mask to OR into the
  88 //      vector using an unaligned load to prevent this.  The mask has non-zero
  89 //      values only in those bytes which correspond to bytes preceeding the start
  90 //      of the string in the aligned vector load.
  91         and       x4,      x0, #0xf
  92         sub       x3,      x3, x4
  93         ldr       q1,     [x3]
  94         orr.16b   v0,      v0, v1
  95 //      Adjust maxlen to account for bytes which preceed the start of the string,
  96 //      and jump into the main scanning loop.
  97         add       x1,      x1, x4
  98         b         1f
  99
 100 //      Main loop.  Identical to strlen, except that we also need to check that we
 101 //      don't read more than maxlen bytes.  To that end, we decrement maxlen by 16
 102 //      on each iteration, and exit the loop if the result is zero or negative.
 103 .align 4
 104 0:      ldr       q0,     [x2, #16]!
 105 1:  uminv.16b b1,      v0
 106         fmov      w3,      s1
 107         cbz       w3,      L_foundNUL
 108         subs      x1,      x1, #16
 109         b.hi      0b
 110
 111 //      We exhausted maxlen bytes without finding a terminating NUL character, so
 112 //  we need to return maxlen.
 113         sub       x0,      x2, x0
 114         add       x1,      x1, #16
 115         add       x0,      x0, x1
 116         ClearFrameAndReturn
 117
 118 L_maxlenIsZero:
 119         mov       x0,      xzr
 120         ret                         // No stack frame, so don't clear it.
 121
 122 L_foundNUL:
 123 //      Compute the index of the NUL byte, and check if it occurs before maxlen
 124 //      bytes into the vector.  If not, return maxlen.  Otherwise, return the
 125 //      length of the string.
 126         eor.16b   v1,      v1, v1
 127         cmhi.16b  v0,      v0, v1
 128         orr.16b   v0,      v0, v2
 129         uminv.16b b1,      v0
 130         fmov      w3,      s1      // index of NUL byte in vector
 131         sub       x0,      x2, x0  // index of vector in string
 132         cmp       x1,      x3      // if NUL occurs before maxlen bytes
 133         csel      x1,      x1, x3, cc // return strlen, else maxlen
 134         add       x0,      x0, x1
 135         ClearFrameAndReturn
 136
 137 /*****************************************************************************
 138  *  strlen entrypoint                                                        *
 139  *****************************************************************************/
 140
 141 .align 4
 142 _strlen:
 143         EstablishFrame
 144 //      Load the 16-byte aligned vector containing the start of the string.
 145         and       x1,      x0, #-16
 146         ldr       q0,     [x1]
 147 //      Load a vector {0,1,2, ... ,15} for use in finding the index of the NUL
 148 //      byte once we identify one.  We don't use this vector until the very end
 149 //      of the routine; it simply falls out naturally to load it now.
 150         adr       x3,          L_masks
 151         ldr       q2,     [x3],#16
 152 //      The aligned vector that we loaded to q0 contains the start of the string,
 153 //      but if the string was not originally aligned, it also contains bytes
 154 //      which preceed the start of the string, and which may cause false positives
 155 //      when we search for the terminating NUL.  We generate a mask to OR into the
 156 //      vector using an unaligned load to prevent this.  The mask has non-zero
 157 //      values only in those bytes which correspond to bytes preceeding the start
 158 //      of the string in the aligned vector load.
 159         and       x2,      x0, #0xf
 160         sub       x3,      x3, x2
 161         ldr       q1,     [x3]
 162         orr.16b   v0,      v0, v1
 163         b         1f
 164
 165 //      Main loop.  On each iteration we do the following:
 166 //
 167 //              q0 <-- next 16 aligned bytes of string
 168 //              b1 <-- unsigned minimum byte in q0
 169 //      if (b1 != 0) continue
 170 //
 171 //      Thus, we continue the loop until the 16 bytes we load contain a zero byte.
 172 .align 4
 173 0:      ldr       q0,     [x1, #16]!
 174 1:      uminv.16b b1,      v0
 175         fmov      w2,      s1 // umov.b would be more natural, but requries 2 µops.
 176         cbnz      w2,      0b
 177
 178 //      A zero byte has been found.  The following registers contain values that
 179 //      we need to compute the string's length:
 180 //
 181 //              x0              pointer to start of string
 182 //              x1              pointer to vector containing terminating NUL byte
 183 //              v0              vector containing terminating NUL byte
 184 //              v2      {0, 1, 2, ... , 15}
 185 //
 186 //      We compute the index of the terminating NUL byte in the string (which is
 187 //      precisely the length of the string) as follows:
 188 //
 189 //              vec <-- mask(v0 != 0) | v2
 190 //              index <-- x1 - x0 + unsignedMinimum(vec)
 191         eor.16b   v1,      v1, v1
 192         cmhi.16b  v0,      v0, v1
 193         orr.16b   v0,      v0, v2
 194         uminv.16b b1,      v0
 195         fmov      w2,      s1
 196         sub       x0,      x1, x0
 197         add       x0,      x0, x2
 198         ClearFrameAndReturn