]>
Commit | Line | Data |
---|---|---|
9385eb3d A |
1 | /* |
2 | * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
9385eb3d A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. Please obtain a copy of the License at | |
10 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
11 | * file. | |
12 | * | |
13 | * The Original Code and all software distributed under the License are | |
14 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
15 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
16 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
18 | * Please see the License for the specific language governing rights and | |
19 | * limitations under the License. | |
20 | * | |
21 | * @APPLE_LICENSE_HEADER_END@ | |
22 | */ | |
23 | #define ASSEMBLER // we need the defs for cr7_eq etc | |
24 | #include <mach/ppc/asm.h> | |
25 | #undef ASSEMBLER | |
26 | ||
59e0d9fe A |
27 | /* We use mode-independent "g" opcodes such as "srgi". These expand |
28 | * into word operations when targeting __ppc__, and into doubleword | |
29 | * operations when targeting __ppc64__. | |
30 | */ | |
31 | #include <architecture/ppc/mode_independent_asm.h> | |
32 | ||
33 | ||
9385eb3d A |
34 | // *************** *********** |
35 | // * M E M C M P * and * B C M P * | |
36 | // *************** *********** | |
37 | // | |
38 | // int memcmp(const char *s1, const char *s2, size_t len); | |
39 | // int bcmp(const char *s1, const char *s2, size_t len); | |
40 | // | |
41 | // Bcmp returns (+,0,-), whereas memcmp returns the true difference | |
42 | // between the first differing bytes, but we treat them identically. | |
43 | // | |
44 | // We optimize the compare by doing it word parallel. This introduces | |
45 | // a complication: if we blindly did word loads from both sides until | |
46 | // finding a difference, we might get a spurious page fault by | |
47 | // reading bytes past the difference. To avoid this, we never do a "lwz" | |
48 | // that crosses a page boundary. | |
59e0d9fe A |
49 | // |
50 | // In 64-bit mode, this routine is doubleword parallel. | |
9385eb3d A |
51 | |
52 | .text | |
53 | .globl EXT(memcmp) | |
54 | .globl EXT(bcmp) | |
55 | ||
56 | .align 5 | |
59e0d9fe | 57 | LEXT(memcmp) // int memcmp(const char *s1,const char *s2,size_t len); |
9385eb3d | 58 | LEXT(bcmp) // int bcmp(const char *s1,const char *s2,size_t len); |
59e0d9fe A |
59 | cmplgi cr1,r5,2*GPR_BYTES // is buffer too short to bother with parallel compares? |
60 | andi. r0,r3,GPR_BYTES-1 // is LHS aligned? | |
9385eb3d A |
61 | blt cr1,Lshort // short buffer, so just compare byte-by-byte |
62 | beq Laligned // skip if aligned | |
59e0d9fe | 63 | subfic r0,r0,GPR_BYTES // r0 <- #bytes to align LHS |
9385eb3d A |
64 | mtctr r0 // set up for byte loop |
65 | b Lbyteloop | |
66 | ||
67 | // Handle short buffer or end-of-buffer. | |
68 | // r3 = LHS ptr (unaligned) | |
69 | // r4 = RHS ptr (unaligned) | |
59e0d9fe | 70 | // r5 = length remaining in buffer (0..2*GPR_BYTES-1) |
9385eb3d A |
71 | |
72 | Lshort: | |
59e0d9fe | 73 | cmpgi r5,0 // null buffer? |
9385eb3d | 74 | mtctr r5 // assume not null, and set up for loop |
59e0d9fe | 75 | bne Lshortloop // buffer not null |
9385eb3d A |
76 | li r3,0 // say "equal" |
77 | blr | |
78 | ||
79 | .align 5 | |
80 | Lshortloop: | |
81 | lbz r7,0(r3) // next LHS byte | |
82 | addi r3,r3,1 | |
83 | lbz r8,0(r4) // next RHS byte | |
84 | addi r4,r4,1 | |
85 | cmpw r7,r8 // compare the bytes | |
86 | bdnzt eq,Lshortloop // loop if more to go and bytes are equal | |
87 | ||
88 | sub r3,r7,r8 // generate return value | |
89 | blr | |
90 | ||
59e0d9fe A |
91 | // We're at a RHS page boundary. Compare GPR_BYTES bytes in order to cross the |
92 | // page but still keep the LHS ptr aligned. | |
9385eb3d A |
93 | |
94 | Lcrosspage: | |
59e0d9fe A |
95 | cmplgi r5,2*GPR_BYTES // enough bytes left to use parallel compares? |
96 | li r0,GPR_BYTES // get #bytes to cross RHS page | |
9385eb3d | 97 | blt Lshort // buffer is about to end |
59e0d9fe | 98 | mtctr r0 |
9385eb3d A |
99 | b Lbyteloop |
100 | ||
101 | // Compare byte-by-byte. | |
102 | // r3 = LHS ptr (unaligned) | |
103 | // r4 = RHS ptr (unaligned) | |
104 | // r5 = length remaining in buffer (must be >0) | |
105 | // ctr = bytes to compare | |
106 | ||
107 | .align 5 | |
108 | Lbyteloop: | |
109 | lbz r7,0(r3) // next LHS byte | |
110 | addi r3,r3,1 | |
111 | lbz r8,0(r4) // next RHS byte | |
112 | addi r4,r4,1 | |
113 | subi r5,r5,1 // decrement bytes remaining in buffer | |
114 | cmpw r7,r8 // compare the bytes | |
115 | bdnzt eq,Lbyteloop // loop if more to go and bytes are equal | |
116 | ||
59e0d9fe | 117 | bne Ldifferent // done if we found differing bytes |
9385eb3d | 118 | |
59e0d9fe A |
119 | // LHS is now aligned. Loop over words/doublewords until end of RHS page or buffer. |
120 | // When we get to the end of the page, we compare 4/8 bytes, so that we keep | |
121 | // the LHS aligned. | |
9385eb3d A |
122 | // r3 = LHS ptr (aligned) |
123 | // r4 = RHS ptr (unaligned) | |
59e0d9fe | 124 | // r5 = length remaining in buffer (>= GPR_BYTES bytes) |
9385eb3d A |
125 | |
126 | Laligned: | |
127 | rlwinm r9,r4,0,0xFFF // get RHS offset in page | |
128 | subfic r0,r9,4096 // get #bytes left in RHS page | |
129 | subfc r7,r0,r5 // *** | |
130 | subfe r8,r5,r5 // * r9 <- min(r0,r5), | |
131 | and r7,r7,r8 // * using algorithm in Compiler Writer's Guide | |
132 | add r9,r0,r7 // *** | |
59e0d9fe A |
133 | srgi. r8,r9,LOG2_GPR_BYTES// get #words/doublewords we can compare |
134 | clrrgi r9,r9,LOG2_GPR_BYTES// get #bytes we will compare word-parallel | |
9385eb3d A |
135 | beq-- Lcrosspage // we're at a RHS page boundary |
136 | mtctr r8 // set up loop count | |
137 | sub r5,r5,r9 // decrement length remaining | |
138 | b Lwordloop | |
139 | ||
59e0d9fe | 140 | // Compare a word or doubleword at a time, until one of two conditions: |
9385eb3d A |
141 | // - a difference is found |
142 | // - end of count (ie, end of buffer or RHS page, whichever is first) | |
143 | // At this point, registers are as follows: | |
144 | // r3 = LHS ptr (aligned) | |
145 | // r4 = RHS ptr (unaligned) | |
146 | // r5 = length remaining in buffer (may be 0) | |
59e0d9fe | 147 | // ctr = count of word/doublewords until end of buffer or RHS page |
9385eb3d | 148 | |
59e0d9fe | 149 | .align 5 // align inner loop |
9385eb3d | 150 | Lwordloop: |
59e0d9fe A |
151 | lg r7,0(r3) // r7 <- next aligned LHS word or doubleword |
152 | addi r3,r3,GPR_BYTES | |
153 | lg r8,0(r4) // r8 <- next unaligned RHS word or doubleword | |
154 | addi r4,r4,GPR_BYTES | |
155 | xor. r11,r7,r8 // compare them | |
9385eb3d A |
156 | bdnzt eq,Lwordloop // loop if ctr!=0 and cr0_eq |
157 | ||
59e0d9fe | 158 | beq Lcrosspage // skip if buffer or page end reached wo difference |
9385eb3d A |
159 | |
160 | // Found differing bytes. | |
161 | ||
59e0d9fe A |
162 | cntlzg r0,r11 // find 1st difference (r0 = 0..31 or 63) |
163 | rlwinm r9,r0,0,0x38 // byte align bit offset (r9 = 0,8,16, or 24 etc) | |
164 | addi r0,r9,8 // now, r0 = 8, 16, 24, or 32 etc | |
165 | #if defined(__ppc__) | |
9385eb3d A |
166 | rlwnm r7,r7,r0,24,31 // right justify differing bytes and mask off rest |
167 | rlwnm r8,r8,r0,24,31 | |
59e0d9fe A |
168 | #else |
169 | rldcl r7,r7,r0,56 // right justify differing bytes and mask off rest | |
170 | rldcl r8,r8,r0,56 | |
171 | #endif | |
9385eb3d A |
172 | |
173 | Ldifferent: // bytes in r7 and r8 differ | |
174 | sub r3,r7,r8 // compute return value | |
175 | blr | |
176 |