]> git.saurik.com Git - apple/libc.git/blob - gen.subproj/ppc.subproj/bcopy.s
38ffd428d80451284ac7756686e4316078f3d823
[apple/libc.git] / gen.subproj / ppc.subproj / bcopy.s
1 /*
2 * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 ;
23 ; Copy bytes of data around. handles overlapped data.
24 ;
25 ; Change this to use Altivec later on
26 ;
27
28 ;
29 ; void bcopy(from, to, nbytes)
30 ;
31
32 ; Use CR5_lt to indicate non-cached
33 #define noncache 20
34 .text
35 .align 2
36 #if !defined(MEMCPY) && !defined(MEMMOVE)
37 .globl _bcopy
38 _bcopy:
39 crclr noncache ; Set cached
40 cmplw cr1,r4,r3 ; Compare "to" and "from"
41 mr. r5,r5 ; Check if we have a 0 length
42 mr r6,r3 ; Set source
43 beqlr- cr1 ; Bail if "to" and "from" are the same
44 beqlr- ; Bail if length is 0
45 b Lcopyit ; Go copy it...
46
47 ;
48 ; When we move the memory, forward overlays must be handled. We
49 ; also can not use the cache instructions if we are from bcopy_nc.
50 ; We need to preserve R3 because it needs to be returned for memcpy.
51 ; We can be interrupted and lose control here.
52 ;
53 ; There is no stack, so in order to used floating point, we would
54 ; need to take the FP exception. Any potential gains by using FP
55 ; would be more than eaten up by this.
56 ;
57 ; Later, we should used Altivec for large moves.
58 ;
59
60 #else
61 #if defined(MEMCPY)
62 .globl _memcpy
63 _memcpy:
64 #endif
65
66 #if defined(MEMMOVE)
67 .globl _memmove
68 _memmove:
69 #endif
70 cmplw cr1,r3,r4 ; "to" and "from" the same?
71 mr r6,r4 ; Set the "from"
72 mr. r5,r5 ; Length zero?
73 crclr noncache ; Set cached
74 mr r4,r3 ; Set the "to"
75 beqlr- cr1 ; "to" and "from" are the same
76 beqlr- ; Length is 0
77 #endif
78 Lcopyit: sub r12,r4,r6 ; Get potential overlap (negative if backward move)
79 lis r8,0x7FFF ; Start up a mask
80 srawi r11,r12,31 ; Propagate the sign bit
81 dcbt 0,r6 ; Touch in the first source line
82 cntlzw r7,r5 ; Get the highest power of 2 factor of the length
83 ori r8,r8,0xFFFF ; Make limit 0x7FFFFFFF
84 xor r9,r12,r11 ; If sink - source was negative, invert bits
85 srw r8,r8,r7 ; Get move length limitation
86 sub r9,r9,r11 ; If sink - source was negative, add 1 and get absolute value
87 cmplw r12,r5 ; See if we actually forward overlap
88 cmplwi cr7,r9,32 ; See if at least a line between source and sink
89 dcbtst 0,r4 ; Touch in the first sink line
90 cmplwi cr1,r5,32 ; Are we moving more than a line?
91 cror noncache,noncache,28 ; Set to not DCBZ output line if not enough space
92 blt- Lfwdovrlap ; This is a forward overlapping area, handle it...
93
94 ;
95 ; R4 = sink
96 ; R5 = length
97 ; R6 = source
98 ;
99
100 ;
101 ; Here we figure out how much we have to move to get the sink onto a
102 ; cache boundary. If we can, and there are still more that 32 bytes
103 ; left to move, we can really speed things up by DCBZing the sink line.
104 ; We can not do this if noncache is set because we will take an
105 ; alignment exception.
106
107 neg r0,r4 ; Get the number of bytes to move to align to a line boundary
108 rlwinm. r0,r0,0,27,31 ; Clean it up and test it
109 and r0,r0,r8 ; limit to the maximum front end move
110 mtcrf 3,r0 ; Make branch mask for partial moves
111 sub r5,r5,r0 ; Set the length left to move
112 beq Lalline ; Already on a line...
113
114 bf 31,Lalhalf ; No single byte to do...
115 lbz r7,0(r6) ; Get the byte
116 addi r6,r6,1 ; Point to the next
117 stb r7,0(r4) ; Save the single
118 addi r4,r4,1 ; Bump sink
119
120 ; Sink is halfword aligned here
121
122 Lalhalf: bf 30,Lalword ; No halfword to do...
123 lhz r7,0(r6) ; Get the halfword
124 addi r6,r6,2 ; Point to the next
125 sth r7,0(r4) ; Save the halfword
126 addi r4,r4,2 ; Bump sink
127
128 ; Sink is word aligned here
129
130 Lalword: bf 29,Laldouble ; No word to do...
131 lwz r7,0(r6) ; Get the word
132 addi r6,r6,4 ; Point to the next
133 stw r7,0(r4) ; Save the word
134 addi r4,r4,4 ; Bump sink
135
136 ; Sink is double aligned here
137
138 Laldouble: bf 28,Lalquad ; No double to do...
139 lwz r7,0(r6) ; Get the first word
140 lwz r8,4(r6) ; Get the second word
141 addi r6,r6,8 ; Point to the next
142 stw r7,0(r4) ; Save the first word
143 stw r8,4(r4) ; Save the second word
144 addi r4,r4,8 ; Bump sink
145
146 ; Sink is quadword aligned here
147
148 Lalquad: bf 27,Lalline ; No quad to do...
149 lwz r7,0(r6) ; Get the first word
150 lwz r8,4(r6) ; Get the second word
151 lwz r9,8(r6) ; Get the third word
152 stw r7,0(r4) ; Save the first word
153 lwz r11,12(r6) ; Get the fourth word
154 addi r6,r6,16 ; Point to the next
155 stw r8,4(r4) ; Save the second word
156 stw r9,8(r4) ; Save the third word
157 stw r11,12(r4) ; Save the fourth word
158 addi r4,r4,16 ; Bump sink
159
160 ; Sink is line aligned here
161
162 Lalline: rlwinm. r0,r5,27,5,31 ; Get the number of full lines to move
163 mtcrf 3,r5 ; Make branch mask for backend partial moves
164 rlwinm r11,r5,0,0,26 ; Get number of bytes to move
165 beq- Lbackend ; No full lines to move
166
167 sub r5,r5,r11 ; Calculate the residual
168 li r10,96 ; Stride for touch ahead
169
170 Lnxtline: subic. r0,r0,1 ; Account for the line now
171
172 bt- noncache,Lskipz ; Skip if we are not cached...
173 dcbz 0,r4 ; Blow away the whole line because we are replacing it
174 dcbt r6,r10 ; Touch ahead a bit
175
176 Lskipz: lwz r7,0(r6) ; Get the first word
177 lwz r8,4(r6) ; Get the second word
178 lwz r9,8(r6) ; Get the third word
179 stw r7,0(r4) ; Save the first word
180 lwz r11,12(r6) ; Get the fourth word
181 stw r8,4(r4) ; Save the second word
182 lwz r7,16(r6) ; Get the fifth word
183 stw r9,8(r4) ; Save the third word
184 lwz r8,20(r6) ; Get the sixth word
185 stw r11,12(r4) ; Save the fourth word
186 lwz r9,24(r6) ; Get the seventh word
187 stw r7,16(r4) ; Save the fifth word
188 lwz r11,28(r6) ; Get the eighth word
189 addi r6,r6,32 ; Point to the next
190 stw r8,20(r4) ; Save the sixth word
191 stw r9,24(r4) ; Save the seventh word
192 stw r11,28(r4) ; Save the eighth word
193 addi r4,r4,32 ; Bump sink
194 bgt+ Lnxtline ; Do the next line, if any...
195
196
197 ; Move backend quadword
198
199 Lbackend: bf 27,Lnoquad ; No quad to do...
200 lwz r7,0(r6) ; Get the first word
201 lwz r8,4(r6) ; Get the second word
202 lwz r9,8(r6) ; Get the third word
203 lwz r11,12(r6) ; Get the fourth word
204 stw r7,0(r4) ; Save the first word
205 addi r6,r6,16 ; Point to the next
206 stw r8,4(r4) ; Save the second word
207 stw r9,8(r4) ; Save the third word
208 stw r11,12(r4) ; Save the fourth word
209 addi r4,r4,16 ; Bump sink
210
211 ; Move backend double
212
213 Lnoquad: bf 28,Lnodouble ; No double to do...
214 lwz r7,0(r6) ; Get the first word
215 lwz r8,4(r6) ; Get the second word
216 addi r6,r6,8 ; Point to the next
217 stw r7,0(r4) ; Save the first word
218 stw r8,4(r4) ; Save the second word
219 addi r4,r4,8 ; Bump sink
220
221 ; Move backend word
222
223 Lnodouble: bf 29,Lnoword ; No word to do...
224 lwz r7,0(r6) ; Get the word
225 addi r6,r6,4 ; Point to the next
226 stw r7,0(r4) ; Save the word
227 addi r4,r4,4 ; Bump sink
228
229 ; Move backend halfword
230
231 Lnoword: bf 30,Lnohalf ; No halfword to do...
232 lhz r7,0(r6) ; Get the halfword
233 addi r6,r6,2 ; Point to the next
234 sth r7,0(r4) ; Save the halfword
235 addi r4,r4,2 ; Bump sink
236
237 ; Move backend byte
238
239 Lnohalf: bflr 31 ; Leave cuz we are all done...
240 lbz r7,0(r6) ; Get the byte
241 stb r7,0(r4) ; Save the single
242
243 blr ; Leave cuz we are all done...
244
245 ;
246 ; 0123456789ABCDEF0123456789ABCDEF
247 ; 0123456789ABCDEF0123456789ABCDEF
248 ; F
249 ; DE
250 ; 9ABC
251 ; 12345678
252 ; 123456789ABCDEF0
253 ; 0
254
255 ;
256 ; Here is where we handle a forward overlapping move. These will be slow
257 ; because we can not kill the cache of the destination until after we have
258 ; loaded/saved the source area. Also, because reading memory backwards is
259 ; slower when the cache line needs to be loaded because the critical
260 ; doubleword is loaded first, i.e., the last, then it goes back to the first,
261 ; and on in order. That means that when we are at the second to last DW we
262 ; have to wait until the whole line is in cache before we can proceed.
263 ;
264
265 Lfwdovrlap: add r4,r5,r4 ; Point past the last sink byte
266 add r6,r5,r6 ; Point past the last source byte
267 and r0,r4,r8 ; Apply movement limit
268 li r12,-1 ; Make sure we touch in the actual line
269 mtcrf 3,r0 ; Figure out the best way to move backwards
270 dcbt r12,r6 ; Touch in the last line of source
271 rlwinm. r0,r0,0,27,31 ; Calculate the length to adjust to cache boundary
272 dcbtst r12,r4 ; Touch in the last line of the sink
273 beq- Lballine ; Aready on cache line boundary
274
275 sub r5,r5,r0 ; Precaculate move length left after alignment
276
277 bf 31,Lbalhalf ; No single byte to do...
278 lbz r7,-1(r6) ; Get the byte
279 subi r6,r6,1 ; Point to the next
280 stb r7,-1(r4) ; Save the single
281 subi r4,r4,1 ; Bump sink
282
283 ; Sink is halfword aligned here
284
285 Lbalhalf: bf 30,Lbalword ; No halfword to do...
286 lhz r7,-2(r6) ; Get the halfword
287 subi r6,r6,2 ; Point to the next
288 sth r7,-2(r4) ; Save the halfword
289 subi r4,r4,2 ; Bump sink
290
291 ; Sink is word aligned here
292
293 Lbalword: bf 29,Lbaldouble ; No word to do...
294 lwz r7,-4(r6) ; Get the word
295 subi r6,r6,4 ; Point to the next
296 stw r7,-4(r4) ; Save the word
297 subi r4,r4,4 ; Bump sink
298
299 ; Sink is double aligned here
300
301 Lbaldouble: bf 28,Lbalquad ; No double to do...
302 lwz r7,-8(r6) ; Get the first word
303 lwz r8,-4(r6) ; Get the second word
304 subi r6,r6,8 ; Point to the next
305 stw r7,-8(r4) ; Save the first word
306 stw r8,-4(r4) ; Save the second word
307 subi r4,r4,8 ; Bump sink
308
309 ; Sink is quadword aligned here
310
311 Lbalquad: bf 27,Lballine ; No quad to do...
312 lwz r7,-16(r6) ; Get the first word
313 lwz r8,-12(r6) ; Get the second word
314 lwz r9,-8(r6) ; Get the third word
315 lwz r11,-4(r6) ; Get the fourth word
316 stw r7,-16(r4) ; Save the first word
317 subi r6,r6,16 ; Point to the next
318 stw r8,-12(r4) ; Save the second word
319 stw r9,-8(r4) ; Save the third word
320 stw r11,-4(r4) ; Save the fourth word
321 subi r4,r4,16 ; Bump sink
322
323 ; Sink is line aligned here
324
325 Lballine: rlwinm. r0,r5,27,5,31 ; Get the number of full lines to move
326 mtcrf 3,r5 ; Make branch mask for backend partial moves
327 beq- Lbbackend ; No full lines to move
328
329
330 ; Registers in use: R0, R1, R3, R4, R5, R6
331 ; Registers not in use: R2, R7, R8, R9, R10, R11, R12 - Ok, we can make another free for 8 of them
332
333 Lbnxtline: subic. r0,r0,1 ; Account for the line now
334
335 lwz r7,-32(r6) ; Get the first word
336 lwz r5,-28(r6) ; Get the second word
337 lwz r2,-24(r6) ; Get the third word
338 lwz r12,-20(r6) ; Get the third word
339 lwz r11,-16(r6) ; Get the fifth word
340 lwz r10,-12(r6) ; Get the sixth word
341 lwz r9,-8(r6) ; Get the seventh word
342 lwz r8,-4(r6) ; Get the eighth word
343 subi r6,r6,32 ; Point to the next
344
345 stw r7,-32(r4) ; Get the first word
346 ble- Lbnotouch ; Last time, skip touch of source...
347 dcbt 0,r6 ; Touch in next source line
348
349 Lbnotouch: stw r5,-28(r4) ; Get the second word
350 stw r2,-24(r4) ; Get the third word
351 stw r12,-20(r4) ; Get the third word
352 stw r11,-16(r4) ; Get the fifth word
353 stw r10,-12(r4) ; Get the sixth word
354 stw r9,-8(r4) ; Get the seventh word
355 stw r8,-4(r4) ; Get the eighth word
356 subi r4,r4,32 ; Bump sink
357
358 bgt+ Lbnxtline ; Do the next line, if any...
359
360 ;
361 ; Note: We touched these lines in at the beginning
362 ;
363
364 ; Move backend quadword
365
366 Lbbackend: bf 27,Lbnoquad ; No quad to do...
367 lwz r7,-16(r6) ; Get the first word
368 lwz r8,-12(r6) ; Get the second word
369 lwz r9,-8(r6) ; Get the third word
370 lwz r11,-4(r6) ; Get the fourth word
371 stw r7,-16(r4) ; Save the first word
372 subi r6,r6,16 ; Point to the next
373 stw r8,-12(r4) ; Save the second word
374 stw r9,-8(r4) ; Save the third word
375 stw r11,-4(r4) ; Save the fourth word
376 subi r4,r4,16 ; Bump sink
377
378 ; Move backend double
379
380 Lbnoquad: bf 28,Lbnodouble ; No double to do...
381 lwz r7,-8(r6) ; Get the first word
382 lwz r8,-4(r6) ; Get the second word
383 subi r6,r6,8 ; Point to the next
384 stw r7,-8(r4) ; Save the first word
385 stw r8,-4(r4) ; Save the second word
386 subi r4,r4,8 ; Bump sink
387
388 ; Move backend word
389
390 Lbnodouble: bf 29,Lbnoword ; No word to do...
391 lwz r7,-4(r6) ; Get the word
392 subi r6,r6,4 ; Point to the next
393 stw r7,-4(r4) ; Save the word
394 subi r4,r4,4 ; Bump sink
395
396 ; Move backend halfword
397
398 Lbnoword: bf 30,Lbnohalf ; No halfword to do...
399 lhz r7,-2(r6) ; Get the halfword
400 subi r6,r6,2 ; Point to the next
401 sth r7,-2(r4) ; Save the halfword
402 subi r4,r4,2 ; Bump sink
403
404 ; Move backend byte
405
406 Lbnohalf: bflr 31 ; Leave cuz we are all done...
407 lbz r7,-1(r6) ; Get the byte
408 stb r7,-1(r4) ; Save the single
409
410 blr ; Leave cuz we are all done...