]> git.saurik.com Git - apple/xnu.git/blob - osfmk/ppc/bcopy.s
1a18bf37a9a1e848c1d4012f92afdf4b5602eb82
[apple/xnu.git] / osfmk / ppc / bcopy.s
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 ;
23 ; Copy bytes of data around. handles overlapped data.
24 ;
25 ; Change this to use Altivec later on, and maybe floating point.
26 ;
27 ;
28 #include <ppc/asm.h>
29 #include <ppc/proc_reg.h>
30
31 ; Use CR5_lt to indicate non-cached
32 #define noncache 20
33 ; Use CR5_gt to indicate that we need to turn data translation back on
34 #define fixxlate 21
35 ; Use CR5_eq to indicate that we need to invalidate bats
36 #define killbats 22
37
38 ;
39 ; bcopy_nc(from, to, nbytes)
40 ;
41 ; bcopy_nc operates on non-cached memory so we can not use any kind
42 ; of cache instructions.
43 ;
44
45 .align 5
46 .globl EXT(bcopy_nc)
47
48 LEXT(bcopy_nc)
49
50 crset noncache ; Set non-cached
51 b bcpswap
52
53 ;
54 ; void bcopy_physvir(from, to, nbytes)
55 ; Attempt to copy physically addressed memory with translation on if conditions are met.
56 ; Otherwise do a normal bcopy_phys.
57 ;
58 ; Rules are: neither source nor destination can cross a page.
59 ; No accesses above the 2GB line (I/O or ROM).
60 ;
61 ; Interrupts must be disabled throughout the copy when this is called
62
63 ; To do this, we build a
64 ; 128 DBAT for both the source and sink. If both are the same, only one is
65 ; loaded. We do not touch the IBATs, so there is no issue if either physical page
66 ; address is the same as the virtual address of the instructions we are executing.
67 ;
68 ; At the end, we invalidate the used DBATs and reenable interrupts.
69 ;
70 ; Note, this one will not work in user state
71 ;
72
73 .align 5
74 .globl EXT(bcopy_physvir)
75
76 LEXT(bcopy_physvir)
77
78 addic. r0,r5,-1 ; Get length - 1
79 add r11,r3,r0 ; Point to last byte of sink
80 cmplw cr1,r3,r4 ; Does source == sink?
81 add r12,r4,r0 ; Point to last byte of source
82 bltlr- ; Bail if length is 0 or way too big
83 xor r7,r11,r3 ; See if we went to next page
84 xor r8,r12,r4 ; See if we went to next page
85 or r0,r7,r8 ; Combine wrap
86
87 li r9,((PTE_WIMG_CB_CACHED_COHERENT<<3)|2) ; Set default attributes
88 rlwinm. r0,r0,0,0,19 ; Did we overflow a page?
89 li r7,2 ; Set validity flags
90 li r8,2 ; Set validity flags
91 bne- EXT(bcopy_phys) ; Overflowed page, do normal physical copy...
92
93 crset killbats ; Remember to trash BATs on the way out
94 rlwimi r11,r9,0,15,31 ; Set sink lower DBAT value
95 rlwimi r12,r9,0,15,31 ; Set source lower DBAT value
96 rlwimi r7,r11,0,0,14 ; Set sink upper DBAT value
97 rlwimi r8,r12,0,0,14 ; Set source upper DBAT value
98 cmplw cr1,r11,r12 ; See if sink and source are same block
99
100 sync
101
102 mtdbatl 0,r11 ; Set sink lower DBAT
103 mtdbatu 0,r7 ; Set sink upper DBAT
104
105 beq- cr1,bcpvsame ; Source and sink are in same block
106
107 mtdbatl 1,r12 ; Set source lower DBAT
108 mtdbatu 1,r8 ; Set source upper DBAT
109
110 bcpvsame: mr r6,r3 ; Set source
111 crclr noncache ; Set cached
112
113 b copyit ; Go copy it...
114
115
116 ;
117 ; void bcopy_phys(from, to, nbytes)
118 ; Turns off data translation before the copy. Note, this one will
119 ; not work in user state
120 ;
121
122 .align 5
123 .globl EXT(bcopy_phys)
124
125 LEXT(bcopy_phys)
126
127 mfmsr r9 ; Get the MSR
128
129 crclr noncache ; Set cached
130 rlwinm. r8,r9,0,MSR_DR_BIT,MSR_DR_BIT ; Is data translation on?
131
132 cmplw cr1,r4,r3 ; Compare "to" and "from"
133 cmplwi cr7,r5,0 ; Check if we have a 0 length
134 mr r6,r3 ; Set source
135 beqlr- cr1 ; Bail if "to" and "from" are the same
136 xor r9,r9,r8 ; Turn off translation if it is on (should be)
137 beqlr- cr7 ; Bail if length is 0
138
139 rlwinm r9,r9,0,MSR_FP_BIT+1,MSR_FP_BIT-1 ; Force floating point off
140 crclr killbats ; Make sure we do not trash BATs on the way out
141 rlwinm r9,r9,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1 ; Force vectors off
142 mtmsr r9 ; Set DR translation off
143 isync ; Wait for it
144
145 crnot fixxlate,cr0_eq ; Remember to turn on translation if it was
146 b copyit ; Go copy it...
147
148 ;
149 ; void bcopy(from, to, nbytes)
150 ;
151
152 .align 5
153 .globl EXT(bcopy)
154
155 LEXT(bcopy)
156
157 crclr noncache ; Set cached
158
159 bcpswap: cmplw cr1,r4,r3 ; Compare "to" and "from"
160 mr. r5,r5 ; Check if we have a 0 length
161 mr r6,r3 ; Set source
162 crclr killbats ; Make sure we do not trash BATs on the way out
163 beqlr- cr1 ; Bail if "to" and "from" are the same
164 beqlr- ; Bail if length is 0
165 crclr fixxlate ; Set translation already ok
166 b copyit ; Go copy it...
167
168 ;
169 ; When we move the memory, forward overlays must be handled. We
170 ; also can not use the cache instructions if we are from bcopy_nc.
171 ; We need to preserve R3 because it needs to be returned for memcpy.
172 ; We can be interrupted and lose control here.
173 ;
174 ; There is no stack, so in order to used floating point, we would
175 ; need to take the FP exception. Any potential gains by using FP
176 ; would be more than eaten up by this.
177 ;
178 ; Later, we should used Altivec for large moves.
179 ;
180
181 .align 5
182 .globl EXT(memcpy)
183
184 LEXT(memcpy)
185
186 cmplw cr1,r3,r4 ; "to" and "from" the same?
187 mr r6,r4 ; Set the "from"
188 mr. r5,r5 ; Length zero?
189 crclr noncache ; Set cached
190 mr r4,r3 ; Set the "to"
191 crclr fixxlate ; Set translation already ok
192 beqlr- cr1 ; "to" and "from" are the same
193 beqlr- ; Length is 0
194 crclr killbats ; Make sure we do not trash BATs on the way out
195
196 copyit: sub r12,r4,r6 ; Get potential overlap (negative if backward move)
197 lis r8,0x7FFF ; Start up a mask
198 srawi r11,r12,31 ; Propagate the sign bit
199 dcbt br0,r6 ; Touch in the first source line
200 cntlzw r7,r5 ; Get the highest power of 2 factor of the length
201 ori r8,r8,0xFFFF ; Make limit 0x7FFFFFFF
202 xor r9,r12,r11 ; If sink - source was negative, invert bits
203 srw r8,r8,r7 ; Get move length limitation
204 sub r9,r9,r11 ; If sink - source was negative, add 1 and get absolute value
205 cmplw r12,r5 ; See if we actually forward overlap
206 cmplwi cr7,r9,32 ; See if at least a line between source and sink
207 dcbtst br0,r4 ; Touch in the first sink line
208 cmplwi cr1,r5,32 ; Are we moving more than a line?
209 cror noncache,noncache,28 ; Set to not DCBZ output line if not enough space
210 blt- fwdovrlap ; This is a forward overlapping area, handle it...
211
212 ;
213 ; R4 = sink
214 ; R5 = length
215 ; R6 = source
216 ;
217
218 ;
219 ; Here we figure out how much we have to move to get the sink onto a
220 ; cache boundary. If we can, and there are still more that 32 bytes
221 ; left to move, we can really speed things up by DCBZing the sink line.
222 ; We can not do this if noncache is set because we will take an
223 ; alignment exception.
224
225 neg r0,r4 ; Get the number of bytes to move to align to a line boundary
226 rlwinm. r0,r0,0,27,31 ; Clean it up and test it
227 and r0,r0,r8 ; limit to the maximum front end move
228 mtcrf 3,r0 ; Make branch mask for partial moves
229 sub r5,r5,r0 ; Set the length left to move
230 beq alline ; Already on a line...
231
232 bf 31,alhalf ; No single byte to do...
233 lbz r7,0(r6) ; Get the byte
234 addi r6,r6,1 ; Point to the next
235 stb r7,0(r4) ; Save the single
236 addi r4,r4,1 ; Bump sink
237
238 ; Sink is halfword aligned here
239
240 alhalf: bf 30,alword ; No halfword to do...
241 lhz r7,0(r6) ; Get the halfword
242 addi r6,r6,2 ; Point to the next
243 sth r7,0(r4) ; Save the halfword
244 addi r4,r4,2 ; Bump sink
245
246 ; Sink is word aligned here
247
248 alword: bf 29,aldouble ; No word to do...
249 lwz r7,0(r6) ; Get the word
250 addi r6,r6,4 ; Point to the next
251 stw r7,0(r4) ; Save the word
252 addi r4,r4,4 ; Bump sink
253
254 ; Sink is double aligned here
255
256 aldouble: bf 28,alquad ; No double to do...
257 lwz r7,0(r6) ; Get the first word
258 lwz r8,4(r6) ; Get the second word
259 addi r6,r6,8 ; Point to the next
260 stw r7,0(r4) ; Save the first word
261 stw r8,4(r4) ; Save the second word
262 addi r4,r4,8 ; Bump sink
263
264 ; Sink is quadword aligned here
265
266 alquad: bf 27,alline ; No quad to do...
267 lwz r7,0(r6) ; Get the first word
268 lwz r8,4(r6) ; Get the second word
269 lwz r9,8(r6) ; Get the third word
270 stw r7,0(r4) ; Save the first word
271 lwz r11,12(r6) ; Get the fourth word
272 addi r6,r6,16 ; Point to the next
273 stw r8,4(r4) ; Save the second word
274 stw r9,8(r4) ; Save the third word
275 stw r11,12(r4) ; Save the fourth word
276 addi r4,r4,16 ; Bump sink
277
278 ; Sink is line aligned here
279
280 alline: rlwinm. r0,r5,27,5,31 ; Get the number of full lines to move
281 mtcrf 3,r5 ; Make branch mask for backend partial moves
282 rlwinm r11,r5,0,0,26 ; Get number of bytes we are going to move
283 beq- backend ; No full lines to move
284
285 sub r5,r5,r11 ; Calculate the residual
286 li r10,96 ; Stride for touch ahead
287
288 nxtline: subic. r0,r0,1 ; Account for the line now
289
290 bt- noncache,skipz ; Skip if we are not cached...
291 dcbz br0,r4 ; Blow away the whole line because we are replacing it
292 dcbt r6,r10 ; Touch ahead a bit
293
294 skipz: lwz r7,0(r6) ; Get the first word
295 lwz r8,4(r6) ; Get the second word
296 lwz r9,8(r6) ; Get the third word
297 stw r7,0(r4) ; Save the first word
298 lwz r11,12(r6) ; Get the fourth word
299 stw r8,4(r4) ; Save the second word
300 lwz r7,16(r6) ; Get the fifth word
301 stw r9,8(r4) ; Save the third word
302 lwz r8,20(r6) ; Get the sixth word
303 stw r11,12(r4) ; Save the fourth word
304 lwz r9,24(r6) ; Get the seventh word
305 stw r7,16(r4) ; Save the fifth word
306 lwz r11,28(r6) ; Get the eighth word
307 addi r6,r6,32 ; Point to the next
308 stw r8,20(r4) ; Save the sixth word
309 stw r9,24(r4) ; Save the seventh word
310 stw r11,28(r4) ; Save the eighth word
311 addi r4,r4,32 ; Bump sink
312 bgt+ nxtline ; Do the next line, if any...
313
314
315 ; Move backend quadword
316
317 backend: bf 27,noquad ; No quad to do...
318 lwz r7,0(r6) ; Get the first word
319 lwz r8,4(r6) ; Get the second word
320 lwz r9,8(r6) ; Get the third word
321 lwz r11,12(r6) ; Get the fourth word
322 stw r7,0(r4) ; Save the first word
323 addi r6,r6,16 ; Point to the next
324 stw r8,4(r4) ; Save the second word
325 stw r9,8(r4) ; Save the third word
326 stw r11,12(r4) ; Save the fourth word
327 addi r4,r4,16 ; Bump sink
328
329 ; Move backend double
330
331 noquad: bf 28,nodouble ; No double to do...
332 lwz r7,0(r6) ; Get the first word
333 lwz r8,4(r6) ; Get the second word
334 addi r6,r6,8 ; Point to the next
335 stw r7,0(r4) ; Save the first word
336 stw r8,4(r4) ; Save the second word
337 addi r4,r4,8 ; Bump sink
338
339 ; Move backend word
340
341 nodouble: bf 29,noword ; No word to do...
342 lwz r7,0(r6) ; Get the word
343 addi r6,r6,4 ; Point to the next
344 stw r7,0(r4) ; Save the word
345 addi r4,r4,4 ; Bump sink
346
347 ; Move backend halfword
348
349 noword: bf 30,nohalf ; No halfword to do...
350 lhz r7,0(r6) ; Get the halfword
351 addi r6,r6,2 ; Point to the next
352 sth r7,0(r4) ; Save the halfword
353 addi r4,r4,2 ; Bump sink
354
355 ; Move backend byte
356
357 nohalf: bf 31,bcpydone ; Leave cuz we are all done...
358 lbz r7,0(r6) ; Get the byte
359 stb r7,0(r4) ; Save the single
360
361 bcpydone: bt- killbats,bcclrbat ; Jump if we need to clear bats...
362 bflr fixxlate ; Leave now if we do not need to fix translation...
363 mfmsr r9 ; Get the MSR
364 ori r9,r9,lo16(MASK(MSR_DR)) ; Turn data translation on
365 rlwinm r9,r9,0,MSR_FP_BIT+1,MSR_FP_BIT-1 ; Force floating point off
366 rlwinm r9,r9,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1 ; Force vectors off
367 mtmsr r9 ; Just do it
368 isync ; Hang in there
369 blr ; Leave cuz we are all done...
370
371 bcclrbat: li r0,0 ; Get set to invalidate upper half
372 sync ; Make sure all is well
373 mtdbatu 0,r0 ; Clear sink upper DBAT
374 mtdbatu 1,r0 ; Clear source upper DBAT
375 sync
376 isync
377 blr
378
379
380 ;
381 ; 0123456789ABCDEF0123456789ABCDEF
382 ; 0123456789ABCDEF0123456789ABCDEF
383 ; F
384 ; DE
385 ; 9ABC
386 ; 12345678
387 ; 123456789ABCDEF0
388 ; 0
389
390 ;
391 ; Here is where we handle a forward overlapping move. These will be slow
392 ; because we can not kill the cache of the destination until after we have
393 ; loaded/saved the source area. Also, because reading memory backwards is
394 ; slower when the cache line needs to be loaded because the critical
395 ; doubleword is loaded first, i.e., the last, then it goes back to the first,
396 ; and on in order. That means that when we are at the second to last DW we
397 ; have to wait until the whole line is in cache before we can proceed.
398 ;
399
400 fwdovrlap: add r4,r5,r4 ; Point past the last sink byte
401 add r6,r5,r6 ; Point past the last source byte
402 and r0,r4,r8 ; Apply movement limit
403 li r12,-1 ; Make sure we touch in the actual line
404 mtcrf 3,r0 ; Figure out the best way to move backwards
405 dcbt r12,r6 ; Touch in the last line of source
406 rlwinm. r0,r0,0,27,31 ; Calculate the length to adjust to cache boundary
407 dcbtst r12,r4 ; Touch in the last line of the sink
408 beq- balline ; Aready on cache line boundary
409
410 sub r5,r5,r0 ; Precaculate move length left after alignment
411
412 bf 31,balhalf ; No single byte to do...
413 lbz r7,-1(r6) ; Get the byte
414 subi r6,r6,1 ; Point to the next
415 stb r7,-1(r4) ; Save the single
416 subi r4,r4,1 ; Bump sink
417
418 ; Sink is halfword aligned here
419
420 balhalf: bf 30,balword ; No halfword to do...
421 lhz r7,-2(r6) ; Get the halfword
422 subi r6,r6,2 ; Point to the next
423 sth r7,-2(r4) ; Save the halfword
424 subi r4,r4,2 ; Bump sink
425
426 ; Sink is word aligned here
427
428 balword: bf 29,baldouble ; No word to do...
429 lwz r7,-4(r6) ; Get the word
430 subi r6,r6,4 ; Point to the next
431 stw r7,-4(r4) ; Save the word
432 subi r4,r4,4 ; Bump sink
433
434 ; Sink is double aligned here
435
436 baldouble: bf 28,balquad ; No double to do...
437 lwz r7,-8(r6) ; Get the first word
438 lwz r8,-4(r6) ; Get the second word
439 subi r6,r6,8 ; Point to the next
440 stw r7,-8(r4) ; Save the first word
441 stw r8,-4(r4) ; Save the second word
442 subi r4,r4,8 ; Bump sink
443
444 ; Sink is quadword aligned here
445
446 balquad: bf 27,balline ; No quad to do...
447 lwz r7,-16(r6) ; Get the first word
448 lwz r8,-12(r6) ; Get the second word
449 lwz r9,-8(r6) ; Get the third word
450 lwz r11,-4(r6) ; Get the fourth word
451 stw r7,-16(r4) ; Save the first word
452 subi r6,r6,16 ; Point to the next
453 stw r8,-12(r4) ; Save the second word
454 stw r9,-8(r4) ; Save the third word
455 stw r11,-4(r4) ; Save the fourth word
456 subi r4,r4,16 ; Bump sink
457
458 ; Sink is line aligned here
459
460 balline: rlwinm. r0,r5,27,5,31 ; Get the number of full lines to move
461 mtcrf 3,r5 ; Make branch mask for backend partial moves
462 beq- bbackend ; No full lines to move
463
464
465 ; Registers in use: R0, R1, R3, R4, R5, R6
466 ; Registers not in use: R2, R7, R8, R9, R10, R11, R12 - Ok, we can make another free for 8 of them
467
468 bnxtline: subic. r0,r0,1 ; Account for the line now
469
470 lwz r7,-32(r6) ; Get the first word
471 lwz r5,-28(r6) ; Get the second word
472 lwz r2,-24(r6) ; Get the third word
473 lwz r12,-20(r6) ; Get the third word
474 lwz r11,-16(r6) ; Get the fifth word
475 lwz r10,-12(r6) ; Get the sixth word
476 lwz r9,-8(r6) ; Get the seventh word
477 lwz r8,-4(r6) ; Get the eighth word
478 subi r6,r6,32 ; Point to the next
479
480 stw r7,-32(r4) ; Get the first word
481 ble- bnotouch ; Last time, skip touch of source...
482 dcbt br0,r6 ; Touch in next source line
483
484 bnotouch: stw r5,-28(r4) ; Get the second word
485 stw r2,-24(r4) ; Get the third word
486 stw r12,-20(r4) ; Get the third word
487 stw r11,-16(r4) ; Get the fifth word
488 stw r10,-12(r4) ; Get the sixth word
489 stw r9,-8(r4) ; Get the seventh word
490 stw r8,-4(r4) ; Get the eighth word
491 subi r4,r4,32 ; Bump sink
492
493 bgt+ bnxtline ; Do the next line, if any...
494
495 ;
496 ; Note: We touched these lines in at the beginning
497 ;
498
499 ; Move backend quadword
500
501 bbackend: bf 27,bnoquad ; No quad to do...
502 lwz r7,-16(r6) ; Get the first word
503 lwz r8,-12(r6) ; Get the second word
504 lwz r9,-8(r6) ; Get the third word
505 lwz r11,-4(r6) ; Get the fourth word
506 stw r7,-16(r4) ; Save the first word
507 subi r6,r6,16 ; Point to the next
508 stw r8,-12(r4) ; Save the second word
509 stw r9,-8(r4) ; Save the third word
510 stw r11,-4(r4) ; Save the fourth word
511 subi r4,r4,16 ; Bump sink
512
513 ; Move backend double
514
515 bnoquad: bf 28,bnodouble ; No double to do...
516 lwz r7,-8(r6) ; Get the first word
517 lwz r8,-4(r6) ; Get the second word
518 subi r6,r6,8 ; Point to the next
519 stw r7,-8(r4) ; Save the first word
520 stw r8,-4(r4) ; Save the second word
521 subi r4,r4,8 ; Bump sink
522
523 ; Move backend word
524
525 bnodouble: bf 29,bnoword ; No word to do...
526 lwz r7,-4(r6) ; Get the word
527 subi r6,r6,4 ; Point to the next
528 stw r7,-4(r4) ; Save the word
529 subi r4,r4,4 ; Bump sink
530
531 ; Move backend halfword
532
533 bnoword: bf 30,bnohalf ; No halfword to do...
534 lhz r7,-2(r6) ; Get the halfword
535 subi r6,r6,2 ; Point to the next
536 sth r7,-2(r4) ; Save the halfword
537 subi r4,r4,2 ; Bump sink
538
539 ; Move backend byte
540
541 bnohalf: bflr 31 ; Leave cuz we are all done...
542 lbz r7,-1(r6) ; Get the byte
543 stb r7,-1(r4) ; Save the single
544
545 b bcpydone ; Go exit cuz we are all done...