X-Git-Url: https://git.saurik.com/apple/libc.git/blobdiff_plain/34e8f8296870d0e8695f90e1a54240a589d41312..51282358e8fdbfc483c0c34e7eae9b89b51f2570:/arm/string/memcmp.s diff --git a/arm/string/memcmp.s b/arm/string/memcmp.s new file mode 100644 index 0000000..83e0f87 --- /dev/null +++ b/arm/string/memcmp.s @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2009 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +// ARM Assembly implementation of memcmp( ) from +// Uses Thumb2 if it is available, otherwise generates ARM code. +// +// -- Stephen Canon, August 2009 +// +// The basic idea is to use word compares instead of byte compares as long as +// at least four bytes remain to be compared. However, because memcmp( ) +// compares the buffers as though they were big-endian unsigned integers, we +// need to byte-reverse each word before comparing them. +// +// If the buffers are not word aligned, or they are shorter than four bytes, +// we just use a simple byte comparison loop instead. +// +// int bcmp(void *src1, void *src2, size_t length); +// int memcmp(void *src1, void *src2, size_t length); + +#include + + .text + .syntax unified +#if defined __thumb2__ + .code 16 + .thumb_func _bcmp + .thumb_func _memcmp +#else + .code 32 +#endif + .globl _bcmp + .globl _memcmp + .align 3 +_bcmp: +_memcmp: + +#ifdef _ARM_ARCH_6 + subs ip, r2, #4 // if length < 4 + bmi L_useByteCompares // jump to the byte comparison loop + + orr r3, r0, r1 // if the buffers are + tst r3, #3 // not word aligned + bne L_useByteCompares // jump to the byte comparison loop + +.align 3 +L_wordCompare: // Here we know that both buffers are word + ldr r2, [r0], #4 // aligned, and (length - 4) > 0, so at least + ldr r3, [r1], #4 // four bytes remain to be compared. We load + subs ip, #4 // a word from each buffer, and byte reverse + bmi L_lastWord // the loaded words. We also decrement the + rev r2, r2 // length by four and jump out of this loop if + rev r3, r3 // the result is negative. Then we compare the + cmp r2, r3 // reversed words, and continue the loop only + beq L_wordCompare // if they are equal. +L_wordsUnequal: + ite hi // If the words compared unequal, return +/- 1 + movhi r0, #1 // according to the result of the comparison. + movls r0, #-1 // + bx lr // +L_lastWord: + rev r2, r2 // If we just loaded the last complete words + rev r3, r3 // from the buffers, byte-reverse them and + cmp r2, r3 // compare. If they are unequal, jump to the + bne L_wordsUnequal // return path. + add r2, ip, #4 // Otherwise, fall into the cleanup code. +#endif // _ARM_ARCH_6 + +L_useByteCompares: + tst r2, r2 // If the length is exactly zero + beq L_returnZero // avoid doing any loads and return zero. + mov r3, r0 +.align 3 +L_byteCompareLoop: + ldrb r0, [r3], #1 // Load a byte from each buffer, and decrement + ldrb ip, [r1], #1 // the length by one. If the decremented + subs r2, #1 // length is zero, exit the loop. Otherwise + beq L_lastByte // subtract the loaded bytes; if their + subs r0, ip // difference is zero, continue the comparison + beq L_byteCompareLoop // loop. Otherwise, return their difference. + bx lr +L_returnZero: + mov r0, ip +L_lastByte: + sub r0, ip // Return the difference of the final bytes + bx lr