arm/string/memcmp.s

   1 /*
   2  * Copyright (c) 2009 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. Please obtain a copy of the License at
  10  * http://www.opensource.apple.com/apsl/ and read it before using this
  11  * file.
  12  *
  13  * The Original Code and all software distributed under the License are
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18  * Please see the License for the specific language governing rights and
  19  * limitations under the License.
  20  *
  21  * @APPLE_LICENSE_HEADER_END@
  22  */
  23
  24 // ARM Assembly implementation of memcmp( ) from <string.h>
  25 // Uses Thumb2 if it is available, otherwise generates ARM code.
  26 //
  27 // -- Stephen Canon, August 2009
  28 //
  29 // The basic idea is to use word compares instead of byte compares as long as
  30 // at least four bytes remain to be compared.  However, because memcmp( )
  31 // compares the buffers as though they were big-endian unsigned integers, we
  32 // need to byte-reverse each word before comparing them.
  33 //
  34 // If the buffers are not word aligned, or they are shorter than four bytes,
  35 // we just use a simple byte comparison loop instead.
  36 //
  37 // int   bcmp(void *src1, void *src2, size_t length);
  38 // int memcmp(void *src1, void *src2, size_t length);
  39
  40 #include <arm/arch.h>
  41
  42     .text
  43     .syntax unified
  44 #if defined __thumb2__
  45     .code 16
  46     .thumb_func _bcmp
  47     .thumb_func _memcmp
  48 #else
  49     .code 32
  50 #endif
  51     .globl _bcmp
  52     .globl _memcmp
  53     .align 3
  54 _bcmp:
  55 _memcmp:
  56
  57 #ifdef _ARM_ARCH_6
  58     subs    ip,     r2,  #4     // if length < 4
  59     bmi     L_useByteCompares   // jump to the byte comparison loop
  60
  61     orr     r3,     r0,  r1     // if the buffers are
  62     tst     r3,          #3     // not word aligned
  63     bne     L_useByteCompares   // jump to the byte comparison loop
  64
  65 .align 3
  66 L_wordCompare:                  // Here we know that both buffers are word
  67     ldr     r2,    [r0], #4     // aligned, and (length - 4) > 0, so at least
  68     ldr     r3,    [r1], #4     // four bytes remain to be compared.  We load
  69     subs    ip,          #4     // a word from each buffer, and byte reverse
  70     bmi     L_lastWord          // the loaded words.  We also decrement the
  71     rev     r2,     r2          // length by four and jump out of this loop if
  72     rev     r3,     r3          // the result is negative.  Then we compare the
  73     cmp     r2,     r3          // reversed words, and continue the loop only
  74     beq     L_wordCompare       // if they are equal.
  75 L_wordsUnequal:
  76     ite     hi                  // If the words compared unequal, return +/- 1
  77     movhi   r0,     #1          // according to the result of the comparison.
  78     movls   r0,     #-1         //
  79     bx      lr                  //
  80 L_lastWord:
  81     rev     r2,     r2          // If we just loaded the last complete words
  82     rev     r3,     r3          // from the buffers, byte-reverse them and
  83     cmp     r2,     r3          // compare.  If they are unequal, jump to the
  84     bne     L_wordsUnequal      // return path.
  85     add     r2,     ip,  #4     // Otherwise, fall into the cleanup code.
  86 #endif // _ARM_ARCH_6
  87
  88 L_useByteCompares:
  89     tst     r2,     r2          // If the length is exactly zero
  90     beq     L_returnZero        // avoid doing any loads and return zero.
  91     mov     r3,     r0
  92 .align 3
  93 L_byteCompareLoop:
  94     ldrb    r0,    [r3], #1     // Load a byte from each buffer, and decrement
  95     ldrb    ip,    [r1], #1     // the length by one.  If the decremented
  96     subs    r2,     #1          // length is zero, exit the loop.  Otherwise
  97     beq     L_lastByte          // subtract the loaded bytes; if their
  98     subs    r0,     ip          // difference is zero, continue the comparison
  99     beq     L_byteCompareLoop   // loop.  Otherwise, return their difference.
 100     bx      lr
 101 L_returnZero:
 102     mov     r0,     ip
 103 L_lastByte:
 104     sub     r0,     ip          // Return the difference of the final bytes
 105     bx      lr