]> git.saurik.com Git - apple/xnu.git/blame - osfmk/ppc/bcopy.s
xnu-344.34.tar.gz
[apple/xnu.git] / osfmk / ppc / bcopy.s
CommitLineData
1c79356b 1/*
de355530 2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
1c79356b
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
de355530
A
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
1c79356b 11 *
de355530
A
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
de355530
A
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
1c79356b
A
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22;
23; Copy bytes of data around. handles overlapped data.
24;
25; Change this to use Altivec later on, and maybe floating point.
26;
1c79356b
A
27;
28#include <ppc/asm.h>
29#include <ppc/proc_reg.h>
30
31; Use CR5_lt to indicate non-cached
32#define noncache 20
33; Use CR5_gt to indicate that we need to turn data translation back on
34#define fixxlate 21
de355530
A
35; Use CR5_eq to indicate that we need to invalidate bats
36#define killbats 22
1c79356b
A
37
38;
39; bcopy_nc(from, to, nbytes)
40;
41; bcopy_nc operates on non-cached memory so we can not use any kind
42; of cache instructions.
43;
44
9bccf70c
A
45 .align 5
46 .globl EXT(bcopy_nc)
1c79356b 47
9bccf70c 48LEXT(bcopy_nc)
1c79356b
A
49
50 crset noncache ; Set non-cached
51 b bcpswap
52
9bccf70c
A
53;
54; void bcopy_physvir(from, to, nbytes)
55; Attempt to copy physically addressed memory with translation on if conditions are met.
de355530 56; Otherwise do a normal bcopy_phys.
9bccf70c
A
57;
58; Rules are: neither source nor destination can cross a page.
de355530 59; No accesses above the 2GB line (I/O or ROM).
9bccf70c 60;
de355530
A
61; Interrupts must be disabled throughout the copy when this is called
62
9bccf70c
A
63; To do this, we build a
64; 128 DBAT for both the source and sink. If both are the same, only one is
65; loaded. We do not touch the IBATs, so there is no issue if either physical page
66; address is the same as the virtual address of the instructions we are executing.
67;
de355530 68; At the end, we invalidate the used DBATs and reenable interrupts.
9bccf70c
A
69;
70; Note, this one will not work in user state
71;
72
73 .align 5
74 .globl EXT(bcopy_physvir)
75
76LEXT(bcopy_physvir)
77
de355530 78 addic. r0,r5,-1 ; Get length - 1
9bccf70c 79 add r11,r3,r0 ; Point to last byte of sink
de355530 80 cmplw cr1,r3,r4 ; Does source == sink?
9bccf70c
A
81 add r12,r4,r0 ; Point to last byte of source
82 bltlr- ; Bail if length is 0 or way too big
83 xor r7,r11,r3 ; See if we went to next page
84 xor r8,r12,r4 ; See if we went to next page
85 or r0,r7,r8 ; Combine wrap
86
de355530 87 li r9,((PTE_WIMG_CB_CACHED_COHERENT<<3)|2) ; Set default attributes
9bccf70c
A
88 rlwinm. r0,r0,0,0,19 ; Did we overflow a page?
89 li r7,2 ; Set validity flags
90 li r8,2 ; Set validity flags
de355530 91 bne- EXT(bcopy_phys) ; Overflowed page, do normal physical copy...
9bccf70c 92
de355530 93 crset killbats ; Remember to trash BATs on the way out
9bccf70c
A
94 rlwimi r11,r9,0,15,31 ; Set sink lower DBAT value
95 rlwimi r12,r9,0,15,31 ; Set source lower DBAT value
96 rlwimi r7,r11,0,0,14 ; Set sink upper DBAT value
97 rlwimi r8,r12,0,0,14 ; Set source upper DBAT value
98 cmplw cr1,r11,r12 ; See if sink and source are same block
99
100 sync
101
102 mtdbatl 0,r11 ; Set sink lower DBAT
103 mtdbatu 0,r7 ; Set sink upper DBAT
104
105 beq- cr1,bcpvsame ; Source and sink are in same block
106
107 mtdbatl 1,r12 ; Set source lower DBAT
108 mtdbatu 1,r8 ; Set source upper DBAT
109
110bcpvsame: mr r6,r3 ; Set source
111 crclr noncache ; Set cached
112
de355530
A
113 b copyit ; Go copy it...
114
9bccf70c 115
1c79356b
A
116;
117; void bcopy_phys(from, to, nbytes)
118; Turns off data translation before the copy. Note, this one will
de355530 119; not work in user state
1c79356b
A
120;
121
9bccf70c
A
122 .align 5
123 .globl EXT(bcopy_phys)
124
125LEXT(bcopy_phys)
de355530 126
1c79356b 127 mfmsr r9 ; Get the MSR
de355530 128
1c79356b 129 crclr noncache ; Set cached
de355530
A
130 rlwinm. r8,r9,0,MSR_DR_BIT,MSR_DR_BIT ; Is data translation on?
131
132 cmplw cr1,r4,r3 ; Compare "to" and "from"
1c79356b
A
133 cmplwi cr7,r5,0 ; Check if we have a 0 length
134 mr r6,r3 ; Set source
de355530
A
135 beqlr- cr1 ; Bail if "to" and "from" are the same
136 xor r9,r9,r8 ; Turn off translation if it is on (should be)
1c79356b
A
137 beqlr- cr7 ; Bail if length is 0
138
de355530
A
139 rlwinm r9,r9,0,MSR_FP_BIT+1,MSR_FP_BIT-1 ; Force floating point off
140 crclr killbats ; Make sure we do not trash BATs on the way out
141 rlwinm r9,r9,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1 ; Force vectors off
1c79356b
A
142 mtmsr r9 ; Set DR translation off
143 isync ; Wait for it
144
de355530
A
145 crnot fixxlate,cr0_eq ; Remember to turn on translation if it was
146 b copyit ; Go copy it...
1c79356b
A
147
148;
149; void bcopy(from, to, nbytes)
150;
151
9bccf70c
A
152 .align 5
153 .globl EXT(bcopy)
154
155LEXT(bcopy)
1c79356b
A
156
157 crclr noncache ; Set cached
158
de355530
A
159bcpswap: cmplw cr1,r4,r3 ; Compare "to" and "from"
160 mr. r5,r5 ; Check if we have a 0 length
1c79356b 161 mr r6,r3 ; Set source
de355530
A
162 crclr killbats ; Make sure we do not trash BATs on the way out
163 beqlr- cr1 ; Bail if "to" and "from" are the same
164 beqlr- ; Bail if length is 0
1c79356b 165 crclr fixxlate ; Set translation already ok
de355530 166 b copyit ; Go copy it...
1c79356b
A
167
168;
169; When we move the memory, forward overlays must be handled. We
170; also can not use the cache instructions if we are from bcopy_nc.
171; We need to preserve R3 because it needs to be returned for memcpy.
172; We can be interrupted and lose control here.
173;
de355530
A
174; There is no stack, so in order to used floating point, we would
175; need to take the FP exception. Any potential gains by using FP
1c79356b
A
176; would be more than eaten up by this.
177;
de355530 178; Later, we should used Altivec for large moves.
1c79356b
A
179;
180
9bccf70c
A
181 .align 5
182 .globl EXT(memcpy)
de355530 183
9bccf70c 184LEXT(memcpy)
de355530 185
1c79356b
A
186 cmplw cr1,r3,r4 ; "to" and "from" the same?
187 mr r6,r4 ; Set the "from"
188 mr. r5,r5 ; Length zero?
189 crclr noncache ; Set cached
190 mr r4,r3 ; Set the "to"
191 crclr fixxlate ; Set translation already ok
192 beqlr- cr1 ; "to" and "from" are the same
193 beqlr- ; Length is 0
de355530 194 crclr killbats ; Make sure we do not trash BATs on the way out
1c79356b 195
de355530 196copyit: sub r12,r4,r6 ; Get potential overlap (negative if backward move)
1c79356b
A
197 lis r8,0x7FFF ; Start up a mask
198 srawi r11,r12,31 ; Propagate the sign bit
199 dcbt br0,r6 ; Touch in the first source line
200 cntlzw r7,r5 ; Get the highest power of 2 factor of the length
201 ori r8,r8,0xFFFF ; Make limit 0x7FFFFFFF
202 xor r9,r12,r11 ; If sink - source was negative, invert bits
203 srw r8,r8,r7 ; Get move length limitation
204 sub r9,r9,r11 ; If sink - source was negative, add 1 and get absolute value
205 cmplw r12,r5 ; See if we actually forward overlap
206 cmplwi cr7,r9,32 ; See if at least a line between source and sink
207 dcbtst br0,r4 ; Touch in the first sink line
208 cmplwi cr1,r5,32 ; Are we moving more than a line?
de355530 209 cror noncache,noncache,28 ; Set to not DCBZ output line if not enough space
1c79356b
A
210 blt- fwdovrlap ; This is a forward overlapping area, handle it...
211
212;
213; R4 = sink
214; R5 = length
215; R6 = source
216;
217
218;
219; Here we figure out how much we have to move to get the sink onto a
220; cache boundary. If we can, and there are still more that 32 bytes
221; left to move, we can really speed things up by DCBZing the sink line.
222; We can not do this if noncache is set because we will take an
223; alignment exception.
224
225 neg r0,r4 ; Get the number of bytes to move to align to a line boundary
226 rlwinm. r0,r0,0,27,31 ; Clean it up and test it
227 and r0,r0,r8 ; limit to the maximum front end move
228 mtcrf 3,r0 ; Make branch mask for partial moves
229 sub r5,r5,r0 ; Set the length left to move
230 beq alline ; Already on a line...
231
232 bf 31,alhalf ; No single byte to do...
233 lbz r7,0(r6) ; Get the byte
234 addi r6,r6,1 ; Point to the next
235 stb r7,0(r4) ; Save the single
236 addi r4,r4,1 ; Bump sink
237
238; Sink is halfword aligned here
239
240alhalf: bf 30,alword ; No halfword to do...
241 lhz r7,0(r6) ; Get the halfword
242 addi r6,r6,2 ; Point to the next
243 sth r7,0(r4) ; Save the halfword
244 addi r4,r4,2 ; Bump sink
245
246; Sink is word aligned here
247
248alword: bf 29,aldouble ; No word to do...
249 lwz r7,0(r6) ; Get the word
250 addi r6,r6,4 ; Point to the next
251 stw r7,0(r4) ; Save the word
252 addi r4,r4,4 ; Bump sink
253
254; Sink is double aligned here
255
256aldouble: bf 28,alquad ; No double to do...
257 lwz r7,0(r6) ; Get the first word
258 lwz r8,4(r6) ; Get the second word
259 addi r6,r6,8 ; Point to the next
260 stw r7,0(r4) ; Save the first word
261 stw r8,4(r4) ; Save the second word
262 addi r4,r4,8 ; Bump sink
263
264; Sink is quadword aligned here
265
266alquad: bf 27,alline ; No quad to do...
267 lwz r7,0(r6) ; Get the first word
268 lwz r8,4(r6) ; Get the second word
269 lwz r9,8(r6) ; Get the third word
270 stw r7,0(r4) ; Save the first word
271 lwz r11,12(r6) ; Get the fourth word
272 addi r6,r6,16 ; Point to the next
273 stw r8,4(r4) ; Save the second word
274 stw r9,8(r4) ; Save the third word
275 stw r11,12(r4) ; Save the fourth word
276 addi r4,r4,16 ; Bump sink
277
278; Sink is line aligned here
279
280alline: rlwinm. r0,r5,27,5,31 ; Get the number of full lines to move
281 mtcrf 3,r5 ; Make branch mask for backend partial moves
282 rlwinm r11,r5,0,0,26 ; Get number of bytes we are going to move
283 beq- backend ; No full lines to move
284
285 sub r5,r5,r11 ; Calculate the residual
286 li r10,96 ; Stride for touch ahead
287
288nxtline: subic. r0,r0,1 ; Account for the line now
289
290 bt- noncache,skipz ; Skip if we are not cached...
291 dcbz br0,r4 ; Blow away the whole line because we are replacing it
292 dcbt r6,r10 ; Touch ahead a bit
293
294skipz: lwz r7,0(r6) ; Get the first word
295 lwz r8,4(r6) ; Get the second word
296 lwz r9,8(r6) ; Get the third word
297 stw r7,0(r4) ; Save the first word
298 lwz r11,12(r6) ; Get the fourth word
299 stw r8,4(r4) ; Save the second word
300 lwz r7,16(r6) ; Get the fifth word
301 stw r9,8(r4) ; Save the third word
302 lwz r8,20(r6) ; Get the sixth word
303 stw r11,12(r4) ; Save the fourth word
304 lwz r9,24(r6) ; Get the seventh word
305 stw r7,16(r4) ; Save the fifth word
306 lwz r11,28(r6) ; Get the eighth word
307 addi r6,r6,32 ; Point to the next
308 stw r8,20(r4) ; Save the sixth word
309 stw r9,24(r4) ; Save the seventh word
310 stw r11,28(r4) ; Save the eighth word
311 addi r4,r4,32 ; Bump sink
312 bgt+ nxtline ; Do the next line, if any...
313
314
315; Move backend quadword
316
317backend: bf 27,noquad ; No quad to do...
318 lwz r7,0(r6) ; Get the first word
319 lwz r8,4(r6) ; Get the second word
320 lwz r9,8(r6) ; Get the third word
321 lwz r11,12(r6) ; Get the fourth word
322 stw r7,0(r4) ; Save the first word
323 addi r6,r6,16 ; Point to the next
324 stw r8,4(r4) ; Save the second word
325 stw r9,8(r4) ; Save the third word
326 stw r11,12(r4) ; Save the fourth word
327 addi r4,r4,16 ; Bump sink
328
329; Move backend double
330
331noquad: bf 28,nodouble ; No double to do...
332 lwz r7,0(r6) ; Get the first word
333 lwz r8,4(r6) ; Get the second word
334 addi r6,r6,8 ; Point to the next
335 stw r7,0(r4) ; Save the first word
336 stw r8,4(r4) ; Save the second word
337 addi r4,r4,8 ; Bump sink
338
339; Move backend word
340
341nodouble: bf 29,noword ; No word to do...
342 lwz r7,0(r6) ; Get the word
343 addi r6,r6,4 ; Point to the next
344 stw r7,0(r4) ; Save the word
345 addi r4,r4,4 ; Bump sink
346
347; Move backend halfword
348
349noword: bf 30,nohalf ; No halfword to do...
350 lhz r7,0(r6) ; Get the halfword
351 addi r6,r6,2 ; Point to the next
352 sth r7,0(r4) ; Save the halfword
353 addi r4,r4,2 ; Bump sink
354
355; Move backend byte
356
357nohalf: bf 31,bcpydone ; Leave cuz we are all done...
358 lbz r7,0(r6) ; Get the byte
359 stb r7,0(r4) ; Save the single
360
de355530
A
361bcpydone: bt- killbats,bcclrbat ; Jump if we need to clear bats...
362 bflr fixxlate ; Leave now if we do not need to fix translation...
1c79356b
A
363 mfmsr r9 ; Get the MSR
364 ori r9,r9,lo16(MASK(MSR_DR)) ; Turn data translation on
de355530
A
365 rlwinm r9,r9,0,MSR_FP_BIT+1,MSR_FP_BIT-1 ; Force floating point off
366 rlwinm r9,r9,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1 ; Force vectors off
1c79356b
A
367 mtmsr r9 ; Just do it
368 isync ; Hang in there
de355530
A
369 blr ; Leave cuz we are all done...
370
371bcclrbat: li r0,0 ; Get set to invalidate upper half
9bccf70c
A
372 sync ; Make sure all is well
373 mtdbatu 0,r0 ; Clear sink upper DBAT
374 mtdbatu 1,r0 ; Clear source upper DBAT
375 sync
376 isync
377 blr
378
379
1c79356b
A
380;
381; 0123456789ABCDEF0123456789ABCDEF
382; 0123456789ABCDEF0123456789ABCDEF
383; F
384; DE
385; 9ABC
386; 12345678
387; 123456789ABCDEF0
388; 0
389
390;
391; Here is where we handle a forward overlapping move. These will be slow
392; because we can not kill the cache of the destination until after we have
393; loaded/saved the source area. Also, because reading memory backwards is
394; slower when the cache line needs to be loaded because the critical
395; doubleword is loaded first, i.e., the last, then it goes back to the first,
396; and on in order. That means that when we are at the second to last DW we
397; have to wait until the whole line is in cache before we can proceed.
398;
de355530 399
1c79356b
A
400fwdovrlap: add r4,r5,r4 ; Point past the last sink byte
401 add r6,r5,r6 ; Point past the last source byte
402 and r0,r4,r8 ; Apply movement limit
403 li r12,-1 ; Make sure we touch in the actual line
404 mtcrf 3,r0 ; Figure out the best way to move backwards
405 dcbt r12,r6 ; Touch in the last line of source
406 rlwinm. r0,r0,0,27,31 ; Calculate the length to adjust to cache boundary
407 dcbtst r12,r4 ; Touch in the last line of the sink
408 beq- balline ; Aready on cache line boundary
409
410 sub r5,r5,r0 ; Precaculate move length left after alignment
411
412 bf 31,balhalf ; No single byte to do...
413 lbz r7,-1(r6) ; Get the byte
414 subi r6,r6,1 ; Point to the next
415 stb r7,-1(r4) ; Save the single
416 subi r4,r4,1 ; Bump sink
417
418; Sink is halfword aligned here
419
420balhalf: bf 30,balword ; No halfword to do...
421 lhz r7,-2(r6) ; Get the halfword
422 subi r6,r6,2 ; Point to the next
423 sth r7,-2(r4) ; Save the halfword
424 subi r4,r4,2 ; Bump sink
425
426; Sink is word aligned here
427
428balword: bf 29,baldouble ; No word to do...
429 lwz r7,-4(r6) ; Get the word
430 subi r6,r6,4 ; Point to the next
431 stw r7,-4(r4) ; Save the word
432 subi r4,r4,4 ; Bump sink
433
434; Sink is double aligned here
435
436baldouble: bf 28,balquad ; No double to do...
437 lwz r7,-8(r6) ; Get the first word
438 lwz r8,-4(r6) ; Get the second word
439 subi r6,r6,8 ; Point to the next
440 stw r7,-8(r4) ; Save the first word
441 stw r8,-4(r4) ; Save the second word
442 subi r4,r4,8 ; Bump sink
443
444; Sink is quadword aligned here
445
446balquad: bf 27,balline ; No quad to do...
447 lwz r7,-16(r6) ; Get the first word
448 lwz r8,-12(r6) ; Get the second word
449 lwz r9,-8(r6) ; Get the third word
450 lwz r11,-4(r6) ; Get the fourth word
451 stw r7,-16(r4) ; Save the first word
452 subi r6,r6,16 ; Point to the next
453 stw r8,-12(r4) ; Save the second word
454 stw r9,-8(r4) ; Save the third word
455 stw r11,-4(r4) ; Save the fourth word
456 subi r4,r4,16 ; Bump sink
457
458; Sink is line aligned here
459
460balline: rlwinm. r0,r5,27,5,31 ; Get the number of full lines to move
461 mtcrf 3,r5 ; Make branch mask for backend partial moves
462 beq- bbackend ; No full lines to move
1c79356b
A
463
464
465; Registers in use: R0, R1, R3, R4, R5, R6
466; Registers not in use: R2, R7, R8, R9, R10, R11, R12 - Ok, we can make another free for 8 of them
467
468bnxtline: subic. r0,r0,1 ; Account for the line now
469
470 lwz r7,-32(r6) ; Get the first word
471 lwz r5,-28(r6) ; Get the second word
472 lwz r2,-24(r6) ; Get the third word
473 lwz r12,-20(r6) ; Get the third word
474 lwz r11,-16(r6) ; Get the fifth word
475 lwz r10,-12(r6) ; Get the sixth word
476 lwz r9,-8(r6) ; Get the seventh word
477 lwz r8,-4(r6) ; Get the eighth word
478 subi r6,r6,32 ; Point to the next
479
480 stw r7,-32(r4) ; Get the first word
481 ble- bnotouch ; Last time, skip touch of source...
482 dcbt br0,r6 ; Touch in next source line
483
484bnotouch: stw r5,-28(r4) ; Get the second word
485 stw r2,-24(r4) ; Get the third word
486 stw r12,-20(r4) ; Get the third word
487 stw r11,-16(r4) ; Get the fifth word
488 stw r10,-12(r4) ; Get the sixth word
489 stw r9,-8(r4) ; Get the seventh word
490 stw r8,-4(r4) ; Get the eighth word
491 subi r4,r4,32 ; Bump sink
492
493 bgt+ bnxtline ; Do the next line, if any...
1c79356b
A
494
495;
496; Note: We touched these lines in at the beginning
497;
498
499; Move backend quadword
500
501bbackend: bf 27,bnoquad ; No quad to do...
502 lwz r7,-16(r6) ; Get the first word
503 lwz r8,-12(r6) ; Get the second word
504 lwz r9,-8(r6) ; Get the third word
505 lwz r11,-4(r6) ; Get the fourth word
506 stw r7,-16(r4) ; Save the first word
507 subi r6,r6,16 ; Point to the next
508 stw r8,-12(r4) ; Save the second word
509 stw r9,-8(r4) ; Save the third word
510 stw r11,-4(r4) ; Save the fourth word
511 subi r4,r4,16 ; Bump sink
512
513; Move backend double
514
515bnoquad: bf 28,bnodouble ; No double to do...
516 lwz r7,-8(r6) ; Get the first word
517 lwz r8,-4(r6) ; Get the second word
518 subi r6,r6,8 ; Point to the next
519 stw r7,-8(r4) ; Save the first word
520 stw r8,-4(r4) ; Save the second word
521 subi r4,r4,8 ; Bump sink
522
523; Move backend word
524
525bnodouble: bf 29,bnoword ; No word to do...
526 lwz r7,-4(r6) ; Get the word
527 subi r6,r6,4 ; Point to the next
528 stw r7,-4(r4) ; Save the word
529 subi r4,r4,4 ; Bump sink
530
531; Move backend halfword
532
533bnoword: bf 30,bnohalf ; No halfword to do...
534 lhz r7,-2(r6) ; Get the halfword
535 subi r6,r6,2 ; Point to the next
536 sth r7,-2(r4) ; Save the halfword
537 subi r4,r4,2 ; Bump sink
538
539; Move backend byte
540
541bnohalf: bflr 31 ; Leave cuz we are all done...
542 lbz r7,-1(r6) ; Get the byte
543 stb r7,-1(r4) ; Save the single
544
9bccf70c 545 b bcpydone ; Go exit cuz we are all done...