]> git.saurik.com Git - apple/xnu.git/blame - osfmk/ppc/bcopy.s
xnu-344.49.tar.gz
[apple/xnu.git] / osfmk / ppc / bcopy.s
CommitLineData
1c79356b 1/*
de355530 2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
1c79356b
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
43866e37 6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
1c79356b 7 *
43866e37
A
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
43866e37
A
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
1c79356b
A
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25;
26; Copy bytes of data around. handles overlapped data.
27;
28; Change this to use Altivec later on, and maybe floating point.
29;
1c79356b
A
30;
31#include <ppc/asm.h>
32#include <ppc/proc_reg.h>
33
34; Use CR5_lt to indicate non-cached
35#define noncache 20
36; Use CR5_gt to indicate that we need to turn data translation back on
37#define fixxlate 21
de355530
A
38; Use CR5_eq to indicate that we need to invalidate bats
39#define killbats 22
1c79356b
A
40
41;
42; bcopy_nc(from, to, nbytes)
43;
44; bcopy_nc operates on non-cached memory so we can not use any kind
45; of cache instructions.
46;
47
9bccf70c
A
48 .align 5
49 .globl EXT(bcopy_nc)
1c79356b 50
9bccf70c 51LEXT(bcopy_nc)
1c79356b
A
52
53 crset noncache ; Set non-cached
54 b bcpswap
55
9bccf70c
A
56;
57; void bcopy_physvir(from, to, nbytes)
58; Attempt to copy physically addressed memory with translation on if conditions are met.
de355530 59; Otherwise do a normal bcopy_phys.
9bccf70c
A
60;
61; Rules are: neither source nor destination can cross a page.
de355530 62; No accesses above the 2GB line (I/O or ROM).
9bccf70c 63;
de355530
A
64; Interrupts must be disabled throughout the copy when this is called
65
9bccf70c
A
66; To do this, we build a
67; 128 DBAT for both the source and sink. If both are the same, only one is
68; loaded. We do not touch the IBATs, so there is no issue if either physical page
69; address is the same as the virtual address of the instructions we are executing.
70;
de355530 71; At the end, we invalidate the used DBATs and reenable interrupts.
9bccf70c
A
72;
73; Note, this one will not work in user state
74;
75
76 .align 5
77 .globl EXT(bcopy_physvir)
78
79LEXT(bcopy_physvir)
80
de355530 81 addic. r0,r5,-1 ; Get length - 1
9bccf70c 82 add r11,r3,r0 ; Point to last byte of sink
de355530 83 cmplw cr1,r3,r4 ; Does source == sink?
9bccf70c
A
84 add r12,r4,r0 ; Point to last byte of source
85 bltlr- ; Bail if length is 0 or way too big
86 xor r7,r11,r3 ; See if we went to next page
87 xor r8,r12,r4 ; See if we went to next page
88 or r0,r7,r8 ; Combine wrap
89
de355530 90 li r9,((PTE_WIMG_CB_CACHED_COHERENT<<3)|2) ; Set default attributes
9bccf70c
A
91 rlwinm. r0,r0,0,0,19 ; Did we overflow a page?
92 li r7,2 ; Set validity flags
93 li r8,2 ; Set validity flags
de355530 94 bne- EXT(bcopy_phys) ; Overflowed page, do normal physical copy...
9bccf70c 95
de355530 96 crset killbats ; Remember to trash BATs on the way out
9bccf70c
A
97 rlwimi r11,r9,0,15,31 ; Set sink lower DBAT value
98 rlwimi r12,r9,0,15,31 ; Set source lower DBAT value
99 rlwimi r7,r11,0,0,14 ; Set sink upper DBAT value
100 rlwimi r8,r12,0,0,14 ; Set source upper DBAT value
101 cmplw cr1,r11,r12 ; See if sink and source are same block
102
103 sync
104
105 mtdbatl 0,r11 ; Set sink lower DBAT
106 mtdbatu 0,r7 ; Set sink upper DBAT
107
108 beq- cr1,bcpvsame ; Source and sink are in same block
109
110 mtdbatl 1,r12 ; Set source lower DBAT
111 mtdbatu 1,r8 ; Set source upper DBAT
112
113bcpvsame: mr r6,r3 ; Set source
114 crclr noncache ; Set cached
115
de355530
A
116 b copyit ; Go copy it...
117
9bccf70c 118
1c79356b
A
119;
120; void bcopy_phys(from, to, nbytes)
121; Turns off data translation before the copy. Note, this one will
de355530 122; not work in user state
1c79356b
A
123;
124
9bccf70c
A
125 .align 5
126 .globl EXT(bcopy_phys)
127
128LEXT(bcopy_phys)
de355530 129
1c79356b 130 mfmsr r9 ; Get the MSR
de355530 131
1c79356b 132 crclr noncache ; Set cached
de355530
A
133 rlwinm. r8,r9,0,MSR_DR_BIT,MSR_DR_BIT ; Is data translation on?
134
135 cmplw cr1,r4,r3 ; Compare "to" and "from"
1c79356b
A
136 cmplwi cr7,r5,0 ; Check if we have a 0 length
137 mr r6,r3 ; Set source
de355530
A
138 beqlr- cr1 ; Bail if "to" and "from" are the same
139 xor r9,r9,r8 ; Turn off translation if it is on (should be)
1c79356b
A
140 beqlr- cr7 ; Bail if length is 0
141
de355530
A
142 rlwinm r9,r9,0,MSR_FP_BIT+1,MSR_FP_BIT-1 ; Force floating point off
143 crclr killbats ; Make sure we do not trash BATs on the way out
144 rlwinm r9,r9,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1 ; Force vectors off
1c79356b
A
145 mtmsr r9 ; Set DR translation off
146 isync ; Wait for it
147
de355530
A
148 crnot fixxlate,cr0_eq ; Remember to turn on translation if it was
149 b copyit ; Go copy it...
1c79356b
A
150
151;
152; void bcopy(from, to, nbytes)
153;
154
9bccf70c
A
155 .align 5
156 .globl EXT(bcopy)
157
158LEXT(bcopy)
1c79356b
A
159
160 crclr noncache ; Set cached
161
de355530
A
162bcpswap: cmplw cr1,r4,r3 ; Compare "to" and "from"
163 mr. r5,r5 ; Check if we have a 0 length
1c79356b 164 mr r6,r3 ; Set source
de355530
A
165 crclr killbats ; Make sure we do not trash BATs on the way out
166 beqlr- cr1 ; Bail if "to" and "from" are the same
167 beqlr- ; Bail if length is 0
1c79356b 168 crclr fixxlate ; Set translation already ok
de355530 169 b copyit ; Go copy it...
1c79356b
A
170
171;
172; When we move the memory, forward overlays must be handled. We
173; also can not use the cache instructions if we are from bcopy_nc.
174; We need to preserve R3 because it needs to be returned for memcpy.
175; We can be interrupted and lose control here.
176;
de355530
A
177; There is no stack, so in order to used floating point, we would
178; need to take the FP exception. Any potential gains by using FP
1c79356b
A
179; would be more than eaten up by this.
180;
de355530 181; Later, we should used Altivec for large moves.
1c79356b
A
182;
183
9bccf70c
A
184 .align 5
185 .globl EXT(memcpy)
de355530 186
9bccf70c 187LEXT(memcpy)
de355530 188
1c79356b
A
189 cmplw cr1,r3,r4 ; "to" and "from" the same?
190 mr r6,r4 ; Set the "from"
191 mr. r5,r5 ; Length zero?
192 crclr noncache ; Set cached
193 mr r4,r3 ; Set the "to"
194 crclr fixxlate ; Set translation already ok
195 beqlr- cr1 ; "to" and "from" are the same
196 beqlr- ; Length is 0
de355530 197 crclr killbats ; Make sure we do not trash BATs on the way out
1c79356b 198
de355530 199copyit: sub r12,r4,r6 ; Get potential overlap (negative if backward move)
1c79356b
A
200 lis r8,0x7FFF ; Start up a mask
201 srawi r11,r12,31 ; Propagate the sign bit
202 dcbt br0,r6 ; Touch in the first source line
203 cntlzw r7,r5 ; Get the highest power of 2 factor of the length
204 ori r8,r8,0xFFFF ; Make limit 0x7FFFFFFF
205 xor r9,r12,r11 ; If sink - source was negative, invert bits
206 srw r8,r8,r7 ; Get move length limitation
207 sub r9,r9,r11 ; If sink - source was negative, add 1 and get absolute value
208 cmplw r12,r5 ; See if we actually forward overlap
209 cmplwi cr7,r9,32 ; See if at least a line between source and sink
210 dcbtst br0,r4 ; Touch in the first sink line
211 cmplwi cr1,r5,32 ; Are we moving more than a line?
de355530 212 cror noncache,noncache,28 ; Set to not DCBZ output line if not enough space
1c79356b
A
213 blt- fwdovrlap ; This is a forward overlapping area, handle it...
214
215;
216; R4 = sink
217; R5 = length
218; R6 = source
219;
220
221;
222; Here we figure out how much we have to move to get the sink onto a
223; cache boundary. If we can, and there are still more that 32 bytes
224; left to move, we can really speed things up by DCBZing the sink line.
225; We can not do this if noncache is set because we will take an
226; alignment exception.
227
228 neg r0,r4 ; Get the number of bytes to move to align to a line boundary
229 rlwinm. r0,r0,0,27,31 ; Clean it up and test it
230 and r0,r0,r8 ; limit to the maximum front end move
231 mtcrf 3,r0 ; Make branch mask for partial moves
232 sub r5,r5,r0 ; Set the length left to move
233 beq alline ; Already on a line...
234
235 bf 31,alhalf ; No single byte to do...
236 lbz r7,0(r6) ; Get the byte
237 addi r6,r6,1 ; Point to the next
238 stb r7,0(r4) ; Save the single
239 addi r4,r4,1 ; Bump sink
240
241; Sink is halfword aligned here
242
243alhalf: bf 30,alword ; No halfword to do...
244 lhz r7,0(r6) ; Get the halfword
245 addi r6,r6,2 ; Point to the next
246 sth r7,0(r4) ; Save the halfword
247 addi r4,r4,2 ; Bump sink
248
249; Sink is word aligned here
250
251alword: bf 29,aldouble ; No word to do...
252 lwz r7,0(r6) ; Get the word
253 addi r6,r6,4 ; Point to the next
254 stw r7,0(r4) ; Save the word
255 addi r4,r4,4 ; Bump sink
256
257; Sink is double aligned here
258
259aldouble: bf 28,alquad ; No double to do...
260 lwz r7,0(r6) ; Get the first word
261 lwz r8,4(r6) ; Get the second word
262 addi r6,r6,8 ; Point to the next
263 stw r7,0(r4) ; Save the first word
264 stw r8,4(r4) ; Save the second word
265 addi r4,r4,8 ; Bump sink
266
267; Sink is quadword aligned here
268
269alquad: bf 27,alline ; No quad to do...
270 lwz r7,0(r6) ; Get the first word
271 lwz r8,4(r6) ; Get the second word
272 lwz r9,8(r6) ; Get the third word
273 stw r7,0(r4) ; Save the first word
274 lwz r11,12(r6) ; Get the fourth word
275 addi r6,r6,16 ; Point to the next
276 stw r8,4(r4) ; Save the second word
277 stw r9,8(r4) ; Save the third word
278 stw r11,12(r4) ; Save the fourth word
279 addi r4,r4,16 ; Bump sink
280
281; Sink is line aligned here
282
283alline: rlwinm. r0,r5,27,5,31 ; Get the number of full lines to move
284 mtcrf 3,r5 ; Make branch mask for backend partial moves
285 rlwinm r11,r5,0,0,26 ; Get number of bytes we are going to move
286 beq- backend ; No full lines to move
287
288 sub r5,r5,r11 ; Calculate the residual
289 li r10,96 ; Stride for touch ahead
290
291nxtline: subic. r0,r0,1 ; Account for the line now
292
293 bt- noncache,skipz ; Skip if we are not cached...
294 dcbz br0,r4 ; Blow away the whole line because we are replacing it
295 dcbt r6,r10 ; Touch ahead a bit
296
297skipz: lwz r7,0(r6) ; Get the first word
298 lwz r8,4(r6) ; Get the second word
299 lwz r9,8(r6) ; Get the third word
300 stw r7,0(r4) ; Save the first word
301 lwz r11,12(r6) ; Get the fourth word
302 stw r8,4(r4) ; Save the second word
303 lwz r7,16(r6) ; Get the fifth word
304 stw r9,8(r4) ; Save the third word
305 lwz r8,20(r6) ; Get the sixth word
306 stw r11,12(r4) ; Save the fourth word
307 lwz r9,24(r6) ; Get the seventh word
308 stw r7,16(r4) ; Save the fifth word
309 lwz r11,28(r6) ; Get the eighth word
310 addi r6,r6,32 ; Point to the next
311 stw r8,20(r4) ; Save the sixth word
312 stw r9,24(r4) ; Save the seventh word
313 stw r11,28(r4) ; Save the eighth word
314 addi r4,r4,32 ; Bump sink
315 bgt+ nxtline ; Do the next line, if any...
316
317
318; Move backend quadword
319
320backend: bf 27,noquad ; No quad to do...
321 lwz r7,0(r6) ; Get the first word
322 lwz r8,4(r6) ; Get the second word
323 lwz r9,8(r6) ; Get the third word
324 lwz r11,12(r6) ; Get the fourth word
325 stw r7,0(r4) ; Save the first word
326 addi r6,r6,16 ; Point to the next
327 stw r8,4(r4) ; Save the second word
328 stw r9,8(r4) ; Save the third word
329 stw r11,12(r4) ; Save the fourth word
330 addi r4,r4,16 ; Bump sink
331
332; Move backend double
333
334noquad: bf 28,nodouble ; No double to do...
335 lwz r7,0(r6) ; Get the first word
336 lwz r8,4(r6) ; Get the second word
337 addi r6,r6,8 ; Point to the next
338 stw r7,0(r4) ; Save the first word
339 stw r8,4(r4) ; Save the second word
340 addi r4,r4,8 ; Bump sink
341
342; Move backend word
343
344nodouble: bf 29,noword ; No word to do...
345 lwz r7,0(r6) ; Get the word
346 addi r6,r6,4 ; Point to the next
347 stw r7,0(r4) ; Save the word
348 addi r4,r4,4 ; Bump sink
349
350; Move backend halfword
351
352noword: bf 30,nohalf ; No halfword to do...
353 lhz r7,0(r6) ; Get the halfword
354 addi r6,r6,2 ; Point to the next
355 sth r7,0(r4) ; Save the halfword
356 addi r4,r4,2 ; Bump sink
357
358; Move backend byte
359
360nohalf: bf 31,bcpydone ; Leave cuz we are all done...
361 lbz r7,0(r6) ; Get the byte
362 stb r7,0(r4) ; Save the single
363
de355530
A
364bcpydone: bt- killbats,bcclrbat ; Jump if we need to clear bats...
365 bflr fixxlate ; Leave now if we do not need to fix translation...
1c79356b
A
366 mfmsr r9 ; Get the MSR
367 ori r9,r9,lo16(MASK(MSR_DR)) ; Turn data translation on
de355530
A
368 rlwinm r9,r9,0,MSR_FP_BIT+1,MSR_FP_BIT-1 ; Force floating point off
369 rlwinm r9,r9,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1 ; Force vectors off
1c79356b
A
370 mtmsr r9 ; Just do it
371 isync ; Hang in there
de355530
A
372 blr ; Leave cuz we are all done...
373
374bcclrbat: li r0,0 ; Get set to invalidate upper half
9bccf70c
A
375 sync ; Make sure all is well
376 mtdbatu 0,r0 ; Clear sink upper DBAT
377 mtdbatu 1,r0 ; Clear source upper DBAT
378 sync
379 isync
380 blr
381
382
1c79356b
A
383;
384; 0123456789ABCDEF0123456789ABCDEF
385; 0123456789ABCDEF0123456789ABCDEF
386; F
387; DE
388; 9ABC
389; 12345678
390; 123456789ABCDEF0
391; 0
392
393;
394; Here is where we handle a forward overlapping move. These will be slow
395; because we can not kill the cache of the destination until after we have
396; loaded/saved the source area. Also, because reading memory backwards is
397; slower when the cache line needs to be loaded because the critical
398; doubleword is loaded first, i.e., the last, then it goes back to the first,
399; and on in order. That means that when we are at the second to last DW we
400; have to wait until the whole line is in cache before we can proceed.
401;
de355530 402
1c79356b
A
403fwdovrlap: add r4,r5,r4 ; Point past the last sink byte
404 add r6,r5,r6 ; Point past the last source byte
405 and r0,r4,r8 ; Apply movement limit
406 li r12,-1 ; Make sure we touch in the actual line
407 mtcrf 3,r0 ; Figure out the best way to move backwards
408 dcbt r12,r6 ; Touch in the last line of source
409 rlwinm. r0,r0,0,27,31 ; Calculate the length to adjust to cache boundary
410 dcbtst r12,r4 ; Touch in the last line of the sink
411 beq- balline ; Aready on cache line boundary
412
413 sub r5,r5,r0 ; Precaculate move length left after alignment
414
415 bf 31,balhalf ; No single byte to do...
416 lbz r7,-1(r6) ; Get the byte
417 subi r6,r6,1 ; Point to the next
418 stb r7,-1(r4) ; Save the single
419 subi r4,r4,1 ; Bump sink
420
421; Sink is halfword aligned here
422
423balhalf: bf 30,balword ; No halfword to do...
424 lhz r7,-2(r6) ; Get the halfword
425 subi r6,r6,2 ; Point to the next
426 sth r7,-2(r4) ; Save the halfword
427 subi r4,r4,2 ; Bump sink
428
429; Sink is word aligned here
430
431balword: bf 29,baldouble ; No word to do...
432 lwz r7,-4(r6) ; Get the word
433 subi r6,r6,4 ; Point to the next
434 stw r7,-4(r4) ; Save the word
435 subi r4,r4,4 ; Bump sink
436
437; Sink is double aligned here
438
439baldouble: bf 28,balquad ; No double to do...
440 lwz r7,-8(r6) ; Get the first word
441 lwz r8,-4(r6) ; Get the second word
442 subi r6,r6,8 ; Point to the next
443 stw r7,-8(r4) ; Save the first word
444 stw r8,-4(r4) ; Save the second word
445 subi r4,r4,8 ; Bump sink
446
447; Sink is quadword aligned here
448
449balquad: bf 27,balline ; No quad to do...
450 lwz r7,-16(r6) ; Get the first word
451 lwz r8,-12(r6) ; Get the second word
452 lwz r9,-8(r6) ; Get the third word
453 lwz r11,-4(r6) ; Get the fourth word
454 stw r7,-16(r4) ; Save the first word
455 subi r6,r6,16 ; Point to the next
456 stw r8,-12(r4) ; Save the second word
457 stw r9,-8(r4) ; Save the third word
458 stw r11,-4(r4) ; Save the fourth word
459 subi r4,r4,16 ; Bump sink
460
461; Sink is line aligned here
462
463balline: rlwinm. r0,r5,27,5,31 ; Get the number of full lines to move
464 mtcrf 3,r5 ; Make branch mask for backend partial moves
465 beq- bbackend ; No full lines to move
1c79356b
A
466
467
468; Registers in use: R0, R1, R3, R4, R5, R6
469; Registers not in use: R2, R7, R8, R9, R10, R11, R12 - Ok, we can make another free for 8 of them
470
471bnxtline: subic. r0,r0,1 ; Account for the line now
472
473 lwz r7,-32(r6) ; Get the first word
474 lwz r5,-28(r6) ; Get the second word
475 lwz r2,-24(r6) ; Get the third word
476 lwz r12,-20(r6) ; Get the third word
477 lwz r11,-16(r6) ; Get the fifth word
478 lwz r10,-12(r6) ; Get the sixth word
479 lwz r9,-8(r6) ; Get the seventh word
480 lwz r8,-4(r6) ; Get the eighth word
481 subi r6,r6,32 ; Point to the next
482
483 stw r7,-32(r4) ; Get the first word
484 ble- bnotouch ; Last time, skip touch of source...
485 dcbt br0,r6 ; Touch in next source line
486
487bnotouch: stw r5,-28(r4) ; Get the second word
488 stw r2,-24(r4) ; Get the third word
489 stw r12,-20(r4) ; Get the third word
490 stw r11,-16(r4) ; Get the fifth word
491 stw r10,-12(r4) ; Get the sixth word
492 stw r9,-8(r4) ; Get the seventh word
493 stw r8,-4(r4) ; Get the eighth word
494 subi r4,r4,32 ; Bump sink
495
496 bgt+ bnxtline ; Do the next line, if any...
1c79356b
A
497
498;
499; Note: We touched these lines in at the beginning
500;
501
502; Move backend quadword
503
504bbackend: bf 27,bnoquad ; No quad to do...
505 lwz r7,-16(r6) ; Get the first word
506 lwz r8,-12(r6) ; Get the second word
507 lwz r9,-8(r6) ; Get the third word
508 lwz r11,-4(r6) ; Get the fourth word
509 stw r7,-16(r4) ; Save the first word
510 subi r6,r6,16 ; Point to the next
511 stw r8,-12(r4) ; Save the second word
512 stw r9,-8(r4) ; Save the third word
513 stw r11,-4(r4) ; Save the fourth word
514 subi r4,r4,16 ; Bump sink
515
516; Move backend double
517
518bnoquad: bf 28,bnodouble ; No double to do...
519 lwz r7,-8(r6) ; Get the first word
520 lwz r8,-4(r6) ; Get the second word
521 subi r6,r6,8 ; Point to the next
522 stw r7,-8(r4) ; Save the first word
523 stw r8,-4(r4) ; Save the second word
524 subi r4,r4,8 ; Bump sink
525
526; Move backend word
527
528bnodouble: bf 29,bnoword ; No word to do...
529 lwz r7,-4(r6) ; Get the word
530 subi r6,r6,4 ; Point to the next
531 stw r7,-4(r4) ; Save the word
532 subi r4,r4,4 ; Bump sink
533
534; Move backend halfword
535
536bnoword: bf 30,bnohalf ; No halfword to do...
537 lhz r7,-2(r6) ; Get the halfword
538 subi r6,r6,2 ; Point to the next
539 sth r7,-2(r4) ; Save the halfword
540 subi r4,r4,2 ; Bump sink
541
542; Move backend byte
543
544bnohalf: bflr 31 ; Leave cuz we are all done...
545 lbz r7,-1(r6) ; Get the byte
546 stb r7,-1(r4) ; Save the single
547
9bccf70c 548 b bcpydone ; Go exit cuz we are all done...