]>
Commit | Line | Data |
---|---|---|
0b4e3aa0 A |
1 | /* |
2 | * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved. | |
3 | * | |
2d21ac55 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
0b4e3aa0 | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
8f6c56a5 | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
8f6c56a5 A |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
2d21ac55 A |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
8f6c56a5 | 25 | * |
2d21ac55 | 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
0b4e3aa0 A |
27 | */ |
28 | ; | |
29 | ; | |
30 | ; Strlen, optimized for PPC. The routine we use is 2-3x faster | |
31 | ; then the simple loop which checks each byte for zero. | |
32 | ; For 0- and 1-byte strings, the simple routine is faster, but | |
33 | ; only by a few cycles. The algorithm used was adapted from the | |
34 | ; Mac OS 9 stdCLib strcopy routine, which was originally | |
35 | ; written by Gary Davidian. It relies on the following rather | |
36 | ; inobvious but very efficient test: | |
37 | ; | |
38 | ; y = dataWord + 0xFEFEFEFF | |
39 | ; z = ~dataWord & 0x80808080 | |
40 | ; if ( y & z ) = 0 then all bytes in dataWord are non-zero | |
41 | ; | |
42 | ; The test maps any non-zero byte to zeros and any zero byte to 0x80, | |
43 | ; with one exception: 0x01 bytes preceeding the first zero are also | |
44 | ; mapped to 0x80. | |
45 | ; | |
46 | #include <ppc/asm.h> | |
47 | #include <ppc/proc_reg.h> | |
48 | ; | |
49 | ; int strlen(ptr) | |
50 | ; | |
51 | ; | |
52 | ||
53 | .align 5 | |
54 | .globl EXT(strlen) | |
55 | LEXT(strlen) | |
56 | ||
57 | andi. r4,r3,0x03 ; test alignment first | |
58 | mr r9,r3 ; store the original address for later use.... | |
59 | bne LalignSource ; align the source addr if not already aligned | |
60 | Llentry: | |
61 | lis r5,hi16(0xFEFEFEFF) | |
62 | lis r6,hi16(0x80808080) | |
63 | subi r3,r3,0x04 ; pre-decrement r3 for the lwzu | |
64 | ori r5,r5,lo16(0xFEFEFEFF) ; r5=0xFEFEFEFF | |
65 | ori r6,r6,lo16(0x80808080) ; r6=0x80808080 | |
66 | ||
67 | LLoop: | |
68 | lwzu r8,4(r3) ; get the first 4 bytes and increment address | |
69 | add r4,r5,r8 ; r4= data + 0xFEFEFEFF | |
70 | andc r7,r6,r8 ; r7= ~data & 0x80808080 | |
71 | and. r4,r4,r7 ; r4= r4 & r7 | |
72 | beq LLoop ; if r4 is zero, then all bytes are non-zero | |
73 | ||
74 | ; Now we know one of the bytes in r8 is zero, | |
75 | ; we just have to figure out which one. | |
76 | ; We have mapped 0 bytes to 0x80, and nonzero bytes to 0x00, | |
77 | ; with one exception: | |
78 | ; 0x01 bytes preceeding the first zero are also mapped to 0x80. | |
79 | ; So we have to mask out the 0x80s caused by 0x01s before | |
80 | ; counting leading zeroes to get the bytes in last word. | |
81 | ||
82 | rlwinm r5,r8,7,0,31 ; move 0x01 bits to 0x80 position | |
83 | subf r3,r9,r3 ; start to compute string length | |
84 | andc r4,r4,r5 ; turn off false hits from 0x0100 worst case | |
85 | cntlzw r7,r4 ; now we can count leading 0s | |
86 | srwi r7,r7,3 ; convert 0,8,16,24 to 0,1,2,3 | |
87 | add r3,r3,r7 ; add in nonzero bytes in last word | |
88 | blr | |
89 | ||
90 | ; We must align the source address for two reasons: to avoid spurious page | |
91 | ; faults, and for speed. | |
92 | ; r4 = low 2 bits of address (1,2, or 3) | |
93 | ; r3 = address | |
94 | ; r9 = original address (still same as r3) | |
95 | ||
96 | LalignSource: | |
97 | lbz r5,0(r3) ; get the first byte... | |
98 | subic. r4,r4,2 ; test for 1, 2 or 3 bytes | |
99 | addi r3,r3,1 ; increment address | |
100 | addi r6,r9,1 ; now r6==r3 | |
101 | cmpwi cr1,r5,0 ; zero? | |
102 | beq cr1,Lreturn ; if its zero return zero | |
103 | bgt Llentry ; address is aligned now if low bits were 3 | |
104 | ||
105 | lbz r5,0(r3) ; get the next byte... | |
106 | addi r3,r3,1 ; increment address | |
107 | cmpwi cr1,r5,0 ; zero? | |
108 | beq cr1,Lreturn ; if its zero return one | |
109 | beq Llentry ; addr is aligned now if low bits were 2 | |
110 | ||
111 | lbz r5,0(r3) ; get the next byte... | |
112 | addi r3,r3,1 ; increment address | |
113 | cmpwi cr1,r5,0 ; zero? | |
114 | bne cr1,Llentry ; not zero, continue check (now aligned) | |
115 | Lreturn: | |
116 | sub r3,r3,r6 ; get string length (0, 1, or 2) | |
117 | blr | |
118 |