]> git.saurik.com Git - apple/xnu.git/blob - osfmk/ppc/bcopy.s
95ad3ea6bdd6147d4d8e1c69de09eb441a092942
[apple/xnu.git] / osfmk / ppc / bcopy.s
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 ;
23 ; Copy bytes of data around. handles overlapped data.
24 ;
25 ; Change this to use Altivec later on, and maybe floating point.
26 ;
27 ; NOTE: This file compiles and executes on both MacOX 8.x (Codewarrior)
28 ; and MacOX X. The "#if 0"s are treated as comments by CW so the
29 ; stuff between them is included by CW and excluded on MacOX X.
30 ; Same with the "#include"s.
31 ;
32 #include <ppc/asm.h>
33 #include <ppc/proc_reg.h>
34
35 ; Use CR5_lt to indicate non-cached
36 #define noncache 20
37 ; Use CR5_gt to indicate that we need to turn data translation back on
38 #define fixxlate 21
39 #if 0
40 noncache: equ 20
41 fixxlate: equ 21
42 #endif
43 #if 0
44 br0: equ 0
45 #endif
46
47 ;
48 ; bcopy_nc(from, to, nbytes)
49 ;
50 ; bcopy_nc operates on non-cached memory so we can not use any kind
51 ; of cache instructions.
52 ;
53
54
55
56 #if 0
57 IF 0
58 #endif
59 ENTRY(bcopy_nc, TAG_NO_FRAME_USED)
60 #if 0
61 ENDIF
62 export xbcopy_nc[DS]
63 tc xbcopy_nc[TC],xbcopy_nc[DS]
64 csect xbcopy_nc[DS]
65 dc.l .xbcopy_nc
66 dc.l TOC[tc0]
67 export .xbcopy_nc
68 csect xbcopy_nc[PR]
69 .xbcopy_nc:
70 #endif
71
72 crset noncache ; Set non-cached
73 b bcpswap
74
75 ;
76 ; void bcopy_phys(from, to, nbytes)
77 ; Turns off data translation before the copy. Note, this one will
78 ; not work in user state
79 ;
80
81 #if 0
82 IF 0
83 #endif
84 ENTRY(bcopy_phys, TAG_NO_FRAME_USED)
85 #if 0
86 ENDIF
87 export xbcopy_phys[DS]
88 tc bcopy_physc[TC],bcopy_phys[DS]
89 csect bcopy_phys[DS]
90 dc.l .bcopy_phys
91 dc.l TOC[tc0]
92 export .bcopy_phys
93 csect bcopy_phys[PR]
94 .bcopy_phys:
95 #endif
96
97 mfmsr r9 ; Get the MSR
98 crclr noncache ; Set cached
99 rlwinm. r8,r9,0,MSR_DR_BIT,MSR_DR_BIT ; Is data translation on?
100
101 cmplw cr1,r4,r3 ; Compare "to" and "from"
102 cmplwi cr7,r5,0 ; Check if we have a 0 length
103 mr r6,r3 ; Set source
104 beqlr- cr1 ; Bail if "to" and "from" are the same
105 xor r9,r9,r8 ; Turn off translation if it is on (should be)
106 beqlr- cr7 ; Bail if length is 0
107
108 mtmsr r9 ; Set DR translation off
109 isync ; Wait for it
110
111 crnot fixxlate,cr0_eq ; Remember to turn on translation if it was
112 b copyit ; Go copy it...
113
114 ;
115 ; void bcopy(from, to, nbytes)
116 ;
117
118 #if 0
119 IF 0
120 #endif
121 ENTRY(bcopy, TAG_NO_FRAME_USED)
122 #if 0
123 ENDIF
124 export xbcopy[DS]
125 tc xbcopyc[TC],xbcopy[DS]
126 csect xbcopy[DS]
127 dc.l .xbcopy
128 dc.l TOC[tc0]
129 export .xbcopy
130 csect xbcopy[PR]
131 .xbcopy:
132 #endif
133
134 crclr noncache ; Set cached
135
136 bcpswap: cmplw cr1,r4,r3 ; Compare "to" and "from"
137 mr. r5,r5 ; Check if we have a 0 length
138 mr r6,r3 ; Set source
139 beqlr- cr1 ; Bail if "to" and "from" are the same
140 beqlr- ; Bail if length is 0
141 crclr fixxlate ; Set translation already ok
142 b copyit ; Go copy it...
143
144 ;
145 ; When we move the memory, forward overlays must be handled. We
146 ; also can not use the cache instructions if we are from bcopy_nc.
147 ; We need to preserve R3 because it needs to be returned for memcpy.
148 ; We can be interrupted and lose control here.
149 ;
150 ; There is no stack, so in order to used floating point, we would
151 ; need to take the FP exception. Any potential gains by using FP
152 ; would be more than eaten up by this.
153 ;
154 ; Later, we should used Altivec for large moves.
155 ;
156
157 #if 0
158 IF 0
159 #endif
160 ENTRY(memcpy, TAG_NO_FRAME_USED)
161 #if 0
162 ENDIF
163 export xmemcpy[DS]
164 tc xmemcpy[TC],xmemcpy[DS]
165 csect xmemcpy[DS]
166 dc.l .xmemcpy
167 dc.l TOC[tc0]
168 export .xmemcpy
169 csect xmemcpy[PR]
170 .xmemcpy:
171 #endif
172 cmplw cr1,r3,r4 ; "to" and "from" the same?
173 mr r6,r4 ; Set the "from"
174 mr. r5,r5 ; Length zero?
175 crclr noncache ; Set cached
176 mr r4,r3 ; Set the "to"
177 crclr fixxlate ; Set translation already ok
178 beqlr- cr1 ; "to" and "from" are the same
179 beqlr- ; Length is 0
180
181 copyit: sub r12,r4,r6 ; Get potential overlap (negative if backward move)
182 lis r8,0x7FFF ; Start up a mask
183 srawi r11,r12,31 ; Propagate the sign bit
184 dcbt br0,r6 ; Touch in the first source line
185 cntlzw r7,r5 ; Get the highest power of 2 factor of the length
186 ori r8,r8,0xFFFF ; Make limit 0x7FFFFFFF
187 xor r9,r12,r11 ; If sink - source was negative, invert bits
188 srw r8,r8,r7 ; Get move length limitation
189 sub r9,r9,r11 ; If sink - source was negative, add 1 and get absolute value
190 cmplw r12,r5 ; See if we actually forward overlap
191 cmplwi cr7,r9,32 ; See if at least a line between source and sink
192 dcbtst br0,r4 ; Touch in the first sink line
193 cmplwi cr1,r5,32 ; Are we moving more than a line?
194 cror noncache,noncache,28 ; Set to not DCBZ output line if not enough space
195 blt- fwdovrlap ; This is a forward overlapping area, handle it...
196
197 ;
198 ; R4 = sink
199 ; R5 = length
200 ; R6 = source
201 ;
202
203 ;
204 ; Here we figure out how much we have to move to get the sink onto a
205 ; cache boundary. If we can, and there are still more that 32 bytes
206 ; left to move, we can really speed things up by DCBZing the sink line.
207 ; We can not do this if noncache is set because we will take an
208 ; alignment exception.
209
210 neg r0,r4 ; Get the number of bytes to move to align to a line boundary
211 rlwinm. r0,r0,0,27,31 ; Clean it up and test it
212 and r0,r0,r8 ; limit to the maximum front end move
213 mtcrf 3,r0 ; Make branch mask for partial moves
214 sub r5,r5,r0 ; Set the length left to move
215 beq alline ; Already on a line...
216
217 bf 31,alhalf ; No single byte to do...
218 lbz r7,0(r6) ; Get the byte
219 addi r6,r6,1 ; Point to the next
220 stb r7,0(r4) ; Save the single
221 addi r4,r4,1 ; Bump sink
222
223 ; Sink is halfword aligned here
224
225 alhalf: bf 30,alword ; No halfword to do...
226 lhz r7,0(r6) ; Get the halfword
227 addi r6,r6,2 ; Point to the next
228 sth r7,0(r4) ; Save the halfword
229 addi r4,r4,2 ; Bump sink
230
231 ; Sink is word aligned here
232
233 alword: bf 29,aldouble ; No word to do...
234 lwz r7,0(r6) ; Get the word
235 addi r6,r6,4 ; Point to the next
236 stw r7,0(r4) ; Save the word
237 addi r4,r4,4 ; Bump sink
238
239 ; Sink is double aligned here
240
241 aldouble: bf 28,alquad ; No double to do...
242 lwz r7,0(r6) ; Get the first word
243 lwz r8,4(r6) ; Get the second word
244 addi r6,r6,8 ; Point to the next
245 stw r7,0(r4) ; Save the first word
246 stw r8,4(r4) ; Save the second word
247 addi r4,r4,8 ; Bump sink
248
249 ; Sink is quadword aligned here
250
251 alquad: bf 27,alline ; No quad to do...
252 lwz r7,0(r6) ; Get the first word
253 lwz r8,4(r6) ; Get the second word
254 lwz r9,8(r6) ; Get the third word
255 stw r7,0(r4) ; Save the first word
256 lwz r11,12(r6) ; Get the fourth word
257 addi r6,r6,16 ; Point to the next
258 stw r8,4(r4) ; Save the second word
259 stw r9,8(r4) ; Save the third word
260 stw r11,12(r4) ; Save the fourth word
261 addi r4,r4,16 ; Bump sink
262
263 ; Sink is line aligned here
264
265 alline: rlwinm. r0,r5,27,5,31 ; Get the number of full lines to move
266 mtcrf 3,r5 ; Make branch mask for backend partial moves
267 rlwinm r11,r5,0,0,26 ; Get number of bytes we are going to move
268 beq- backend ; No full lines to move
269
270 sub r5,r5,r11 ; Calculate the residual
271 li r10,96 ; Stride for touch ahead
272
273 nxtline: subic. r0,r0,1 ; Account for the line now
274
275 bt- noncache,skipz ; Skip if we are not cached...
276 dcbz br0,r4 ; Blow away the whole line because we are replacing it
277 dcbt r6,r10 ; Touch ahead a bit
278
279 skipz: lwz r7,0(r6) ; Get the first word
280 lwz r8,4(r6) ; Get the second word
281 lwz r9,8(r6) ; Get the third word
282 stw r7,0(r4) ; Save the first word
283 lwz r11,12(r6) ; Get the fourth word
284 stw r8,4(r4) ; Save the second word
285 lwz r7,16(r6) ; Get the fifth word
286 stw r9,8(r4) ; Save the third word
287 lwz r8,20(r6) ; Get the sixth word
288 stw r11,12(r4) ; Save the fourth word
289 lwz r9,24(r6) ; Get the seventh word
290 stw r7,16(r4) ; Save the fifth word
291 lwz r11,28(r6) ; Get the eighth word
292 addi r6,r6,32 ; Point to the next
293 stw r8,20(r4) ; Save the sixth word
294 stw r9,24(r4) ; Save the seventh word
295 stw r11,28(r4) ; Save the eighth word
296 addi r4,r4,32 ; Bump sink
297 bgt+ nxtline ; Do the next line, if any...
298
299
300 ; Move backend quadword
301
302 backend: bf 27,noquad ; No quad to do...
303 lwz r7,0(r6) ; Get the first word
304 lwz r8,4(r6) ; Get the second word
305 lwz r9,8(r6) ; Get the third word
306 lwz r11,12(r6) ; Get the fourth word
307 stw r7,0(r4) ; Save the first word
308 addi r6,r6,16 ; Point to the next
309 stw r8,4(r4) ; Save the second word
310 stw r9,8(r4) ; Save the third word
311 stw r11,12(r4) ; Save the fourth word
312 addi r4,r4,16 ; Bump sink
313
314 ; Move backend double
315
316 noquad: bf 28,nodouble ; No double to do...
317 lwz r7,0(r6) ; Get the first word
318 lwz r8,4(r6) ; Get the second word
319 addi r6,r6,8 ; Point to the next
320 stw r7,0(r4) ; Save the first word
321 stw r8,4(r4) ; Save the second word
322 addi r4,r4,8 ; Bump sink
323
324 ; Move backend word
325
326 nodouble: bf 29,noword ; No word to do...
327 lwz r7,0(r6) ; Get the word
328 addi r6,r6,4 ; Point to the next
329 stw r7,0(r4) ; Save the word
330 addi r4,r4,4 ; Bump sink
331
332 ; Move backend halfword
333
334 noword: bf 30,nohalf ; No halfword to do...
335 lhz r7,0(r6) ; Get the halfword
336 addi r6,r6,2 ; Point to the next
337 sth r7,0(r4) ; Save the halfword
338 addi r4,r4,2 ; Bump sink
339
340 ; Move backend byte
341
342 nohalf: bf 31,bcpydone ; Leave cuz we are all done...
343 lbz r7,0(r6) ; Get the byte
344 stb r7,0(r4) ; Save the single
345
346 bcpydone: bflr fixxlate ; Leave now if we do not need to fix translation...
347 mfmsr r9 ; Get the MSR
348 ori r9,r9,lo16(MASK(MSR_DR)) ; Turn data translation on
349 mtmsr r9 ; Just do it
350 isync ; Hang in there
351 blr ; Leave cuz we are all done...
352
353 ;
354 ; 0123456789ABCDEF0123456789ABCDEF
355 ; 0123456789ABCDEF0123456789ABCDEF
356 ; F
357 ; DE
358 ; 9ABC
359 ; 12345678
360 ; 123456789ABCDEF0
361 ; 0
362
363 ;
364 ; Here is where we handle a forward overlapping move. These will be slow
365 ; because we can not kill the cache of the destination until after we have
366 ; loaded/saved the source area. Also, because reading memory backwards is
367 ; slower when the cache line needs to be loaded because the critical
368 ; doubleword is loaded first, i.e., the last, then it goes back to the first,
369 ; and on in order. That means that when we are at the second to last DW we
370 ; have to wait until the whole line is in cache before we can proceed.
371 ;
372
373 fwdovrlap: add r4,r5,r4 ; Point past the last sink byte
374 add r6,r5,r6 ; Point past the last source byte
375 and r0,r4,r8 ; Apply movement limit
376 li r12,-1 ; Make sure we touch in the actual line
377 mtcrf 3,r0 ; Figure out the best way to move backwards
378 dcbt r12,r6 ; Touch in the last line of source
379 rlwinm. r0,r0,0,27,31 ; Calculate the length to adjust to cache boundary
380 dcbtst r12,r4 ; Touch in the last line of the sink
381 beq- balline ; Aready on cache line boundary
382
383 sub r5,r5,r0 ; Precaculate move length left after alignment
384
385 bf 31,balhalf ; No single byte to do...
386 lbz r7,-1(r6) ; Get the byte
387 subi r6,r6,1 ; Point to the next
388 stb r7,-1(r4) ; Save the single
389 subi r4,r4,1 ; Bump sink
390
391 ; Sink is halfword aligned here
392
393 balhalf: bf 30,balword ; No halfword to do...
394 lhz r7,-2(r6) ; Get the halfword
395 subi r6,r6,2 ; Point to the next
396 sth r7,-2(r4) ; Save the halfword
397 subi r4,r4,2 ; Bump sink
398
399 ; Sink is word aligned here
400
401 balword: bf 29,baldouble ; No word to do...
402 lwz r7,-4(r6) ; Get the word
403 subi r6,r6,4 ; Point to the next
404 stw r7,-4(r4) ; Save the word
405 subi r4,r4,4 ; Bump sink
406
407 ; Sink is double aligned here
408
409 baldouble: bf 28,balquad ; No double to do...
410 lwz r7,-8(r6) ; Get the first word
411 lwz r8,-4(r6) ; Get the second word
412 subi r6,r6,8 ; Point to the next
413 stw r7,-8(r4) ; Save the first word
414 stw r8,-4(r4) ; Save the second word
415 subi r4,r4,8 ; Bump sink
416
417 ; Sink is quadword aligned here
418
419 balquad: bf 27,balline ; No quad to do...
420 lwz r7,-16(r6) ; Get the first word
421 lwz r8,-12(r6) ; Get the second word
422 lwz r9,-8(r6) ; Get the third word
423 lwz r11,-4(r6) ; Get the fourth word
424 stw r7,-16(r4) ; Save the first word
425 subi r6,r6,16 ; Point to the next
426 stw r8,-12(r4) ; Save the second word
427 stw r9,-8(r4) ; Save the third word
428 stw r11,-4(r4) ; Save the fourth word
429 subi r4,r4,16 ; Bump sink
430
431 ; Sink is line aligned here
432
433 balline: rlwinm. r0,r5,27,5,31 ; Get the number of full lines to move
434 mtcrf 3,r5 ; Make branch mask for backend partial moves
435 beq- bbackend ; No full lines to move
436 #if 0
437 stwu r1,-8(r1) ; Dummy stack for MacOS
438 stw r2,4(r1) ; Save RTOC
439 #endif
440
441
442 ; Registers in use: R0, R1, R3, R4, R5, R6
443 ; Registers not in use: R2, R7, R8, R9, R10, R11, R12 - Ok, we can make another free for 8 of them
444
445 bnxtline: subic. r0,r0,1 ; Account for the line now
446
447 lwz r7,-32(r6) ; Get the first word
448 lwz r5,-28(r6) ; Get the second word
449 lwz r2,-24(r6) ; Get the third word
450 lwz r12,-20(r6) ; Get the third word
451 lwz r11,-16(r6) ; Get the fifth word
452 lwz r10,-12(r6) ; Get the sixth word
453 lwz r9,-8(r6) ; Get the seventh word
454 lwz r8,-4(r6) ; Get the eighth word
455 subi r6,r6,32 ; Point to the next
456
457 stw r7,-32(r4) ; Get the first word
458 ble- bnotouch ; Last time, skip touch of source...
459 dcbt br0,r6 ; Touch in next source line
460
461 bnotouch: stw r5,-28(r4) ; Get the second word
462 stw r2,-24(r4) ; Get the third word
463 stw r12,-20(r4) ; Get the third word
464 stw r11,-16(r4) ; Get the fifth word
465 stw r10,-12(r4) ; Get the sixth word
466 stw r9,-8(r4) ; Get the seventh word
467 stw r8,-4(r4) ; Get the eighth word
468 subi r4,r4,32 ; Bump sink
469
470 bgt+ bnxtline ; Do the next line, if any...
471 #if 0
472 lwz r2,4(r1) ; Restore RTOC
473 lwz r1,0(r1) ; Pop dummy stack
474 #endif
475
476 ;
477 ; Note: We touched these lines in at the beginning
478 ;
479
480 ; Move backend quadword
481
482 bbackend: bf 27,bnoquad ; No quad to do...
483 lwz r7,-16(r6) ; Get the first word
484 lwz r8,-12(r6) ; Get the second word
485 lwz r9,-8(r6) ; Get the third word
486 lwz r11,-4(r6) ; Get the fourth word
487 stw r7,-16(r4) ; Save the first word
488 subi r6,r6,16 ; Point to the next
489 stw r8,-12(r4) ; Save the second word
490 stw r9,-8(r4) ; Save the third word
491 stw r11,-4(r4) ; Save the fourth word
492 subi r4,r4,16 ; Bump sink
493
494 ; Move backend double
495
496 bnoquad: bf 28,bnodouble ; No double to do...
497 lwz r7,-8(r6) ; Get the first word
498 lwz r8,-4(r6) ; Get the second word
499 subi r6,r6,8 ; Point to the next
500 stw r7,-8(r4) ; Save the first word
501 stw r8,-4(r4) ; Save the second word
502 subi r4,r4,8 ; Bump sink
503
504 ; Move backend word
505
506 bnodouble: bf 29,bnoword ; No word to do...
507 lwz r7,-4(r6) ; Get the word
508 subi r6,r6,4 ; Point to the next
509 stw r7,-4(r4) ; Save the word
510 subi r4,r4,4 ; Bump sink
511
512 ; Move backend halfword
513
514 bnoword: bf 30,bnohalf ; No halfword to do...
515 lhz r7,-2(r6) ; Get the halfword
516 subi r6,r6,2 ; Point to the next
517 sth r7,-2(r4) ; Save the halfword
518 subi r4,r4,2 ; Bump sink
519
520 ; Move backend byte
521
522 bnohalf: bflr 31 ; Leave cuz we are all done...
523 lbz r7,-1(r6) ; Get the byte
524 stb r7,-1(r4) ; Save the single
525
526 blr ; Leave cuz we are all done...