]>
Commit | Line | Data |
---|---|---|
0b4e3aa0 A |
1 | /* |
2 | * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved. | |
3 | * | |
8ad349bb | 4 | * @APPLE_LICENSE_OSREFERENCE_HEADER_START@ |
0b4e3aa0 | 5 | * |
8ad349bb A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the | |
10 | * License may not be used to create, or enable the creation or | |
11 | * redistribution of, unlawful or unlicensed copies of an Apple operating | |
12 | * system, or to circumvent, violate, or enable the circumvention or | |
13 | * violation of, any terms of an Apple operating system software license | |
14 | * agreement. | |
15 | * | |
16 | * Please obtain a copy of the License at | |
17 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
18 | * file. | |
19 | * | |
20 | * The Original Code and all software distributed under the License are | |
21 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
22 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
23 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
24 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
25 | * Please see the License for the specific language governing rights and | |
26 | * limitations under the License. | |
27 | * | |
28 | * @APPLE_LICENSE_OSREFERENCE_HEADER_END@ | |
0b4e3aa0 A |
29 | */ |
30 | ; | |
31 | ; | |
32 | ; Strlen, optimized for PPC. The routine we use is 2-3x faster | |
33 | ; then the simple loop which checks each byte for zero. | |
34 | ; For 0- and 1-byte strings, the simple routine is faster, but | |
35 | ; only by a few cycles. The algorithm used was adapted from the | |
36 | ; Mac OS 9 stdCLib strcopy routine, which was originally | |
37 | ; written by Gary Davidian. It relies on the following rather | |
38 | ; inobvious but very efficient test: | |
39 | ; | |
40 | ; y = dataWord + 0xFEFEFEFF | |
41 | ; z = ~dataWord & 0x80808080 | |
42 | ; if ( y & z ) = 0 then all bytes in dataWord are non-zero | |
43 | ; | |
44 | ; The test maps any non-zero byte to zeros and any zero byte to 0x80, | |
45 | ; with one exception: 0x01 bytes preceeding the first zero are also | |
46 | ; mapped to 0x80. | |
47 | ; | |
48 | #include <ppc/asm.h> | |
49 | #include <ppc/proc_reg.h> | |
50 | ; | |
51 | ; int strlen(ptr) | |
52 | ; | |
53 | ; | |
54 | ||
55 | .align 5 | |
56 | .globl EXT(strlen) | |
57 | LEXT(strlen) | |
58 | ||
59 | andi. r4,r3,0x03 ; test alignment first | |
60 | mr r9,r3 ; store the original address for later use.... | |
61 | bne LalignSource ; align the source addr if not already aligned | |
62 | Llentry: | |
63 | lis r5,hi16(0xFEFEFEFF) | |
64 | lis r6,hi16(0x80808080) | |
65 | subi r3,r3,0x04 ; pre-decrement r3 for the lwzu | |
66 | ori r5,r5,lo16(0xFEFEFEFF) ; r5=0xFEFEFEFF | |
67 | ori r6,r6,lo16(0x80808080) ; r6=0x80808080 | |
68 | ||
69 | LLoop: | |
70 | lwzu r8,4(r3) ; get the first 4 bytes and increment address | |
71 | add r4,r5,r8 ; r4= data + 0xFEFEFEFF | |
72 | andc r7,r6,r8 ; r7= ~data & 0x80808080 | |
73 | and. r4,r4,r7 ; r4= r4 & r7 | |
74 | beq LLoop ; if r4 is zero, then all bytes are non-zero | |
75 | ||
76 | ; Now we know one of the bytes in r8 is zero, | |
77 | ; we just have to figure out which one. | |
78 | ; We have mapped 0 bytes to 0x80, and nonzero bytes to 0x00, | |
79 | ; with one exception: | |
80 | ; 0x01 bytes preceeding the first zero are also mapped to 0x80. | |
81 | ; So we have to mask out the 0x80s caused by 0x01s before | |
82 | ; counting leading zeroes to get the bytes in last word. | |
83 | ||
84 | rlwinm r5,r8,7,0,31 ; move 0x01 bits to 0x80 position | |
85 | subf r3,r9,r3 ; start to compute string length | |
86 | andc r4,r4,r5 ; turn off false hits from 0x0100 worst case | |
87 | cntlzw r7,r4 ; now we can count leading 0s | |
88 | srwi r7,r7,3 ; convert 0,8,16,24 to 0,1,2,3 | |
89 | add r3,r3,r7 ; add in nonzero bytes in last word | |
90 | blr | |
91 | ||
92 | ; We must align the source address for two reasons: to avoid spurious page | |
93 | ; faults, and for speed. | |
94 | ; r4 = low 2 bits of address (1,2, or 3) | |
95 | ; r3 = address | |
96 | ; r9 = original address (still same as r3) | |
97 | ||
98 | LalignSource: | |
99 | lbz r5,0(r3) ; get the first byte... | |
100 | subic. r4,r4,2 ; test for 1, 2 or 3 bytes | |
101 | addi r3,r3,1 ; increment address | |
102 | addi r6,r9,1 ; now r6==r3 | |
103 | cmpwi cr1,r5,0 ; zero? | |
104 | beq cr1,Lreturn ; if its zero return zero | |
105 | bgt Llentry ; address is aligned now if low bits were 3 | |
106 | ||
107 | lbz r5,0(r3) ; get the next byte... | |
108 | addi r3,r3,1 ; increment address | |
109 | cmpwi cr1,r5,0 ; zero? | |
110 | beq cr1,Lreturn ; if its zero return one | |
111 | beq Llentry ; addr is aligned now if low bits were 2 | |
112 | ||
113 | lbz r5,0(r3) ; get the next byte... | |
114 | addi r3,r3,1 ; increment address | |
115 | cmpwi cr1,r5,0 ; zero? | |
116 | bne cr1,Llentry ; not zero, continue check (now aligned) | |
117 | Lreturn: | |
118 | sub r3,r3,r6 ; get string length (0, 1, or 2) | |
119 | blr | |
120 |