]> git.saurik.com Git - apple/xnu.git/blob - osfmk/ppc/bcopy.s
176b9571509db70c1ddf095a5937553b9d75bbe8
[apple/xnu.git] / osfmk / ppc / bcopy.s
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25 ;
26 ; Copy bytes of data around. handles overlapped data.
27 ;
28 ; Change this to use Altivec later on, and maybe floating point.
29 ;
30 ;
31 #include <ppc/asm.h>
32 #include <ppc/proc_reg.h>
33
34 ; Use CR5_lt to indicate non-cached
35 #define noncache 20
36 ; Use CR5_gt to indicate that we need to turn data translation back on
37 #define fixxlate 21
38 ; Use CR5_eq to indicate that we need to invalidate bats
39 #define killbats 22
40
41 ;
42 ; bcopy_nc(from, to, nbytes)
43 ;
44 ; bcopy_nc operates on non-cached memory so we can not use any kind
45 ; of cache instructions.
46 ;
47
48 .align 5
49 .globl EXT(bcopy_nc)
50
51 LEXT(bcopy_nc)
52
53 crset noncache ; Set non-cached
54 b bcpswap
55
56 ;
57 ; void bcopy_physvir(from, to, nbytes)
58 ; Attempt to copy physically addressed memory with translation on if conditions are met.
59 ; Otherwise do a normal bcopy_phys.
60 ;
61 ; Rules are: neither source nor destination can cross a page.
62 ; No accesses above the 2GB line (I/O or ROM).
63 ;
64 ; Interrupts must be disabled throughout the copy when this is called
65
66 ; To do this, we build a
67 ; 128 DBAT for both the source and sink. If both are the same, only one is
68 ; loaded. We do not touch the IBATs, so there is no issue if either physical page
69 ; address is the same as the virtual address of the instructions we are executing.
70 ;
71 ; At the end, we invalidate the used DBATs and reenable interrupts.
72 ;
73 ; Note, this one will not work in user state
74 ;
75
76 .align 5
77 .globl EXT(bcopy_physvir)
78
79 LEXT(bcopy_physvir)
80
81 addic. r0,r5,-1 ; Get length - 1
82 add r11,r3,r0 ; Point to last byte of sink
83 cmplw cr1,r3,r4 ; Does source == sink?
84 add r12,r4,r0 ; Point to last byte of source
85 bltlr- ; Bail if length is 0 or way too big
86 xor r7,r11,r3 ; See if we went to next page
87 xor r8,r12,r4 ; See if we went to next page
88 or r0,r7,r8 ; Combine wrap
89
90 li r9,((PTE_WIMG_CB_CACHED_COHERENT<<3)|2) ; Set default attributes
91 rlwinm. r0,r0,0,0,19 ; Did we overflow a page?
92 li r7,2 ; Set validity flags
93 li r8,2 ; Set validity flags
94 bne- EXT(bcopy_phys) ; Overflowed page, do normal physical copy...
95
96 crset killbats ; Remember to trash BATs on the way out
97 rlwimi r11,r9,0,15,31 ; Set sink lower DBAT value
98 rlwimi r12,r9,0,15,31 ; Set source lower DBAT value
99 rlwimi r7,r11,0,0,14 ; Set sink upper DBAT value
100 rlwimi r8,r12,0,0,14 ; Set source upper DBAT value
101 cmplw cr1,r11,r12 ; See if sink and source are same block
102
103 sync
104
105 mtdbatl 0,r11 ; Set sink lower DBAT
106 mtdbatu 0,r7 ; Set sink upper DBAT
107
108 beq- cr1,bcpvsame ; Source and sink are in same block
109
110 mtdbatl 1,r12 ; Set source lower DBAT
111 mtdbatu 1,r8 ; Set source upper DBAT
112
113 bcpvsame: mr r6,r3 ; Set source
114 crclr noncache ; Set cached
115
116 b copyit ; Go copy it...
117
118
119 ;
120 ; void bcopy_phys(from, to, nbytes)
121 ; Turns off data translation before the copy. Note, this one will
122 ; not work in user state
123 ;
124
125 .align 5
126 .globl EXT(bcopy_phys)
127
128 LEXT(bcopy_phys)
129
130 mfmsr r9 ; Get the MSR
131
132 crclr noncache ; Set cached
133 rlwinm. r8,r9,0,MSR_DR_BIT,MSR_DR_BIT ; Is data translation on?
134
135 cmplw cr1,r4,r3 ; Compare "to" and "from"
136 cmplwi cr7,r5,0 ; Check if we have a 0 length
137 mr r6,r3 ; Set source
138 beqlr- cr1 ; Bail if "to" and "from" are the same
139 xor r9,r9,r8 ; Turn off translation if it is on (should be)
140 beqlr- cr7 ; Bail if length is 0
141
142 rlwinm r9,r9,0,MSR_FP_BIT+1,MSR_FP_BIT-1 ; Force floating point off
143 crclr killbats ; Make sure we do not trash BATs on the way out
144 rlwinm r9,r9,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1 ; Force vectors off
145 mtmsr r9 ; Set DR translation off
146 isync ; Wait for it
147
148 crnot fixxlate,cr0_eq ; Remember to turn on translation if it was
149 b copyit ; Go copy it...
150
151 ;
152 ; void bcopy(from, to, nbytes)
153 ;
154
155 .align 5
156 .globl EXT(bcopy)
157
158 LEXT(bcopy)
159
160 crclr noncache ; Set cached
161
162 bcpswap: cmplw cr1,r4,r3 ; Compare "to" and "from"
163 mr. r5,r5 ; Check if we have a 0 length
164 mr r6,r3 ; Set source
165 crclr killbats ; Make sure we do not trash BATs on the way out
166 beqlr- cr1 ; Bail if "to" and "from" are the same
167 beqlr- ; Bail if length is 0
168 crclr fixxlate ; Set translation already ok
169 b copyit ; Go copy it...
170
171 ;
172 ; When we move the memory, forward overlays must be handled. We
173 ; also can not use the cache instructions if we are from bcopy_nc.
174 ; We need to preserve R3 because it needs to be returned for memcpy.
175 ; We can be interrupted and lose control here.
176 ;
177 ; There is no stack, so in order to used floating point, we would
178 ; need to take the FP exception. Any potential gains by using FP
179 ; would be more than eaten up by this.
180 ;
181 ; Later, we should used Altivec for large moves.
182 ;
183
184 .align 5
185 .globl EXT(memcpy)
186
187 LEXT(memcpy)
188
189 cmplw cr1,r3,r4 ; "to" and "from" the same?
190 mr r6,r4 ; Set the "from"
191 mr. r5,r5 ; Length zero?
192 crclr noncache ; Set cached
193 mr r4,r3 ; Set the "to"
194 crclr fixxlate ; Set translation already ok
195 beqlr- cr1 ; "to" and "from" are the same
196 beqlr- ; Length is 0
197 crclr killbats ; Make sure we do not trash BATs on the way out
198
199 copyit: sub r12,r4,r6 ; Get potential overlap (negative if backward move)
200 lis r8,0x7FFF ; Start up a mask
201 srawi r11,r12,31 ; Propagate the sign bit
202 dcbt br0,r6 ; Touch in the first source line
203 cntlzw r7,r5 ; Get the highest power of 2 factor of the length
204 ori r8,r8,0xFFFF ; Make limit 0x7FFFFFFF
205 xor r9,r12,r11 ; If sink - source was negative, invert bits
206 srw r8,r8,r7 ; Get move length limitation
207 sub r9,r9,r11 ; If sink - source was negative, add 1 and get absolute value
208 cmplw r12,r5 ; See if we actually forward overlap
209 cmplwi cr7,r9,32 ; See if at least a line between source and sink
210 dcbtst br0,r4 ; Touch in the first sink line
211 cmplwi cr1,r5,32 ; Are we moving more than a line?
212 cror noncache,noncache,28 ; Set to not DCBZ output line if not enough space
213 blt- fwdovrlap ; This is a forward overlapping area, handle it...
214
215 ;
216 ; R4 = sink
217 ; R5 = length
218 ; R6 = source
219 ;
220
221 ;
222 ; Here we figure out how much we have to move to get the sink onto a
223 ; cache boundary. If we can, and there are still more that 32 bytes
224 ; left to move, we can really speed things up by DCBZing the sink line.
225 ; We can not do this if noncache is set because we will take an
226 ; alignment exception.
227
228 neg r0,r4 ; Get the number of bytes to move to align to a line boundary
229 rlwinm. r0,r0,0,27,31 ; Clean it up and test it
230 and r0,r0,r8 ; limit to the maximum front end move
231 mtcrf 3,r0 ; Make branch mask for partial moves
232 sub r5,r5,r0 ; Set the length left to move
233 beq alline ; Already on a line...
234
235 bf 31,alhalf ; No single byte to do...
236 lbz r7,0(r6) ; Get the byte
237 addi r6,r6,1 ; Point to the next
238 stb r7,0(r4) ; Save the single
239 addi r4,r4,1 ; Bump sink
240
241 ; Sink is halfword aligned here
242
243 alhalf: bf 30,alword ; No halfword to do...
244 lhz r7,0(r6) ; Get the halfword
245 addi r6,r6,2 ; Point to the next
246 sth r7,0(r4) ; Save the halfword
247 addi r4,r4,2 ; Bump sink
248
249 ; Sink is word aligned here
250
251 alword: bf 29,aldouble ; No word to do...
252 lwz r7,0(r6) ; Get the word
253 addi r6,r6,4 ; Point to the next
254 stw r7,0(r4) ; Save the word
255 addi r4,r4,4 ; Bump sink
256
257 ; Sink is double aligned here
258
259 aldouble: bf 28,alquad ; No double to do...
260 lwz r7,0(r6) ; Get the first word
261 lwz r8,4(r6) ; Get the second word
262 addi r6,r6,8 ; Point to the next
263 stw r7,0(r4) ; Save the first word
264 stw r8,4(r4) ; Save the second word
265 addi r4,r4,8 ; Bump sink
266
267 ; Sink is quadword aligned here
268
269 alquad: bf 27,alline ; No quad to do...
270 lwz r7,0(r6) ; Get the first word
271 lwz r8,4(r6) ; Get the second word
272 lwz r9,8(r6) ; Get the third word
273 stw r7,0(r4) ; Save the first word
274 lwz r11,12(r6) ; Get the fourth word
275 addi r6,r6,16 ; Point to the next
276 stw r8,4(r4) ; Save the second word
277 stw r9,8(r4) ; Save the third word
278 stw r11,12(r4) ; Save the fourth word
279 addi r4,r4,16 ; Bump sink
280
281 ; Sink is line aligned here
282
283 alline: rlwinm. r0,r5,27,5,31 ; Get the number of full lines to move
284 mtcrf 3,r5 ; Make branch mask for backend partial moves
285 rlwinm r11,r5,0,0,26 ; Get number of bytes we are going to move
286 beq- backend ; No full lines to move
287
288 sub r5,r5,r11 ; Calculate the residual
289 li r10,96 ; Stride for touch ahead
290
291 nxtline: subic. r0,r0,1 ; Account for the line now
292
293 bt- noncache,skipz ; Skip if we are not cached...
294 dcbz br0,r4 ; Blow away the whole line because we are replacing it
295 dcbt r6,r10 ; Touch ahead a bit
296
297 skipz: lwz r7,0(r6) ; Get the first word
298 lwz r8,4(r6) ; Get the second word
299 lwz r9,8(r6) ; Get the third word
300 stw r7,0(r4) ; Save the first word
301 lwz r11,12(r6) ; Get the fourth word
302 stw r8,4(r4) ; Save the second word
303 lwz r7,16(r6) ; Get the fifth word
304 stw r9,8(r4) ; Save the third word
305 lwz r8,20(r6) ; Get the sixth word
306 stw r11,12(r4) ; Save the fourth word
307 lwz r9,24(r6) ; Get the seventh word
308 stw r7,16(r4) ; Save the fifth word
309 lwz r11,28(r6) ; Get the eighth word
310 addi r6,r6,32 ; Point to the next
311 stw r8,20(r4) ; Save the sixth word
312 stw r9,24(r4) ; Save the seventh word
313 stw r11,28(r4) ; Save the eighth word
314 addi r4,r4,32 ; Bump sink
315 bgt+ nxtline ; Do the next line, if any...
316
317
318 ; Move backend quadword
319
320 backend: bf 27,noquad ; No quad to do...
321 lwz r7,0(r6) ; Get the first word
322 lwz r8,4(r6) ; Get the second word
323 lwz r9,8(r6) ; Get the third word
324 lwz r11,12(r6) ; Get the fourth word
325 stw r7,0(r4) ; Save the first word
326 addi r6,r6,16 ; Point to the next
327 stw r8,4(r4) ; Save the second word
328 stw r9,8(r4) ; Save the third word
329 stw r11,12(r4) ; Save the fourth word
330 addi r4,r4,16 ; Bump sink
331
332 ; Move backend double
333
334 noquad: bf 28,nodouble ; No double to do...
335 lwz r7,0(r6) ; Get the first word
336 lwz r8,4(r6) ; Get the second word
337 addi r6,r6,8 ; Point to the next
338 stw r7,0(r4) ; Save the first word
339 stw r8,4(r4) ; Save the second word
340 addi r4,r4,8 ; Bump sink
341
342 ; Move backend word
343
344 nodouble: bf 29,noword ; No word to do...
345 lwz r7,0(r6) ; Get the word
346 addi r6,r6,4 ; Point to the next
347 stw r7,0(r4) ; Save the word
348 addi r4,r4,4 ; Bump sink
349
350 ; Move backend halfword
351
352 noword: bf 30,nohalf ; No halfword to do...
353 lhz r7,0(r6) ; Get the halfword
354 addi r6,r6,2 ; Point to the next
355 sth r7,0(r4) ; Save the halfword
356 addi r4,r4,2 ; Bump sink
357
358 ; Move backend byte
359
360 nohalf: bf 31,bcpydone ; Leave cuz we are all done...
361 lbz r7,0(r6) ; Get the byte
362 stb r7,0(r4) ; Save the single
363
364 bcpydone: bt- killbats,bcclrbat ; Jump if we need to clear bats...
365 bflr fixxlate ; Leave now if we do not need to fix translation...
366 mfmsr r9 ; Get the MSR
367 ori r9,r9,lo16(MASK(MSR_DR)) ; Turn data translation on
368 rlwinm r9,r9,0,MSR_FP_BIT+1,MSR_FP_BIT-1 ; Force floating point off
369 rlwinm r9,r9,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1 ; Force vectors off
370 mtmsr r9 ; Just do it
371 isync ; Hang in there
372 blr ; Leave cuz we are all done...
373
374 bcclrbat: li r0,0 ; Get set to invalidate upper half
375 sync ; Make sure all is well
376 mtdbatu 0,r0 ; Clear sink upper DBAT
377 mtdbatu 1,r0 ; Clear source upper DBAT
378 sync
379 isync
380 blr
381
382
383 ;
384 ; 0123456789ABCDEF0123456789ABCDEF
385 ; 0123456789ABCDEF0123456789ABCDEF
386 ; F
387 ; DE
388 ; 9ABC
389 ; 12345678
390 ; 123456789ABCDEF0
391 ; 0
392
393 ;
394 ; Here is where we handle a forward overlapping move. These will be slow
395 ; because we can not kill the cache of the destination until after we have
396 ; loaded/saved the source area. Also, because reading memory backwards is
397 ; slower when the cache line needs to be loaded because the critical
398 ; doubleword is loaded first, i.e., the last, then it goes back to the first,
399 ; and on in order. That means that when we are at the second to last DW we
400 ; have to wait until the whole line is in cache before we can proceed.
401 ;
402
403 fwdovrlap: add r4,r5,r4 ; Point past the last sink byte
404 add r6,r5,r6 ; Point past the last source byte
405 and r0,r4,r8 ; Apply movement limit
406 li r12,-1 ; Make sure we touch in the actual line
407 mtcrf 3,r0 ; Figure out the best way to move backwards
408 dcbt r12,r6 ; Touch in the last line of source
409 rlwinm. r0,r0,0,27,31 ; Calculate the length to adjust to cache boundary
410 dcbtst r12,r4 ; Touch in the last line of the sink
411 beq- balline ; Aready on cache line boundary
412
413 sub r5,r5,r0 ; Precaculate move length left after alignment
414
415 bf 31,balhalf ; No single byte to do...
416 lbz r7,-1(r6) ; Get the byte
417 subi r6,r6,1 ; Point to the next
418 stb r7,-1(r4) ; Save the single
419 subi r4,r4,1 ; Bump sink
420
421 ; Sink is halfword aligned here
422
423 balhalf: bf 30,balword ; No halfword to do...
424 lhz r7,-2(r6) ; Get the halfword
425 subi r6,r6,2 ; Point to the next
426 sth r7,-2(r4) ; Save the halfword
427 subi r4,r4,2 ; Bump sink
428
429 ; Sink is word aligned here
430
431 balword: bf 29,baldouble ; No word to do...
432 lwz r7,-4(r6) ; Get the word
433 subi r6,r6,4 ; Point to the next
434 stw r7,-4(r4) ; Save the word
435 subi r4,r4,4 ; Bump sink
436
437 ; Sink is double aligned here
438
439 baldouble: bf 28,balquad ; No double to do...
440 lwz r7,-8(r6) ; Get the first word
441 lwz r8,-4(r6) ; Get the second word
442 subi r6,r6,8 ; Point to the next
443 stw r7,-8(r4) ; Save the first word
444 stw r8,-4(r4) ; Save the second word
445 subi r4,r4,8 ; Bump sink
446
447 ; Sink is quadword aligned here
448
449 balquad: bf 27,balline ; No quad to do...
450 lwz r7,-16(r6) ; Get the first word
451 lwz r8,-12(r6) ; Get the second word
452 lwz r9,-8(r6) ; Get the third word
453 lwz r11,-4(r6) ; Get the fourth word
454 stw r7,-16(r4) ; Save the first word
455 subi r6,r6,16 ; Point to the next
456 stw r8,-12(r4) ; Save the second word
457 stw r9,-8(r4) ; Save the third word
458 stw r11,-4(r4) ; Save the fourth word
459 subi r4,r4,16 ; Bump sink
460
461 ; Sink is line aligned here
462
463 balline: rlwinm. r0,r5,27,5,31 ; Get the number of full lines to move
464 mtcrf 3,r5 ; Make branch mask for backend partial moves
465 beq- bbackend ; No full lines to move
466
467
468 ; Registers in use: R0, R1, R3, R4, R5, R6
469 ; Registers not in use: R2, R7, R8, R9, R10, R11, R12 - Ok, we can make another free for 8 of them
470
471 bnxtline: subic. r0,r0,1 ; Account for the line now
472
473 lwz r7,-32(r6) ; Get the first word
474 lwz r5,-28(r6) ; Get the second word
475 lwz r2,-24(r6) ; Get the third word
476 lwz r12,-20(r6) ; Get the third word
477 lwz r11,-16(r6) ; Get the fifth word
478 lwz r10,-12(r6) ; Get the sixth word
479 lwz r9,-8(r6) ; Get the seventh word
480 lwz r8,-4(r6) ; Get the eighth word
481 subi r6,r6,32 ; Point to the next
482
483 stw r7,-32(r4) ; Get the first word
484 ble- bnotouch ; Last time, skip touch of source...
485 dcbt br0,r6 ; Touch in next source line
486
487 bnotouch: stw r5,-28(r4) ; Get the second word
488 stw r2,-24(r4) ; Get the third word
489 stw r12,-20(r4) ; Get the third word
490 stw r11,-16(r4) ; Get the fifth word
491 stw r10,-12(r4) ; Get the sixth word
492 stw r9,-8(r4) ; Get the seventh word
493 stw r8,-4(r4) ; Get the eighth word
494 subi r4,r4,32 ; Bump sink
495
496 bgt+ bnxtline ; Do the next line, if any...
497
498 ;
499 ; Note: We touched these lines in at the beginning
500 ;
501
502 ; Move backend quadword
503
504 bbackend: bf 27,bnoquad ; No quad to do...
505 lwz r7,-16(r6) ; Get the first word
506 lwz r8,-12(r6) ; Get the second word
507 lwz r9,-8(r6) ; Get the third word
508 lwz r11,-4(r6) ; Get the fourth word
509 stw r7,-16(r4) ; Save the first word
510 subi r6,r6,16 ; Point to the next
511 stw r8,-12(r4) ; Save the second word
512 stw r9,-8(r4) ; Save the third word
513 stw r11,-4(r4) ; Save the fourth word
514 subi r4,r4,16 ; Bump sink
515
516 ; Move backend double
517
518 bnoquad: bf 28,bnodouble ; No double to do...
519 lwz r7,-8(r6) ; Get the first word
520 lwz r8,-4(r6) ; Get the second word
521 subi r6,r6,8 ; Point to the next
522 stw r7,-8(r4) ; Save the first word
523 stw r8,-4(r4) ; Save the second word
524 subi r4,r4,8 ; Bump sink
525
526 ; Move backend word
527
528 bnodouble: bf 29,bnoword ; No word to do...
529 lwz r7,-4(r6) ; Get the word
530 subi r6,r6,4 ; Point to the next
531 stw r7,-4(r4) ; Save the word
532 subi r4,r4,4 ; Bump sink
533
534 ; Move backend halfword
535
536 bnoword: bf 30,bnohalf ; No halfword to do...
537 lhz r7,-2(r6) ; Get the halfword
538 subi r6,r6,2 ; Point to the next
539 sth r7,-2(r4) ; Save the halfword
540 subi r4,r4,2 ; Bump sink
541
542 ; Move backend byte
543
544 bnohalf: bflr 31 ; Leave cuz we are all done...
545 lbz r7,-1(r6) ; Get the byte
546 stb r7,-1(r4) ; Save the single
547
548 b bcpydone ; Go exit cuz we are all done...