osfmk/ppc/movc.s

   1 /*
   2  * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 #include <debug.h>
  32 #include <ppc/asm.h>
  33 #include <ppc/proc_reg.h>
  34 #include <mach/ppc/vm_param.h>
  35 #include <assym.s>
  36 #include <sys/errno.h>
  37
  38 #define INSTRUMENT 0
  39
  40 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
  41 /*
  42  * void pmap_zero_page(vm_offset_t pa)
  43  *
  44  * Zero a page of physical memory.  This routine runs in 32 or 64-bit mode,
  45  * and handles 32 and 128-byte cache lines.
  46  */
  47
  48
  49                 .align  5
  50                 .globl  EXT(pmap_zero_page)
  51
  52 LEXT(pmap_zero_page)
  53
  54         mflr    r12                                                             // save return address
  55         bl              EXT(ml_set_physical_disabled)   // turn DR and EE off, SF on, get features in r10
  56         mtlr    r12                                                             // restore return address
  57         andi.   r9,r10,pf32Byte+pf128Byte               // r9 <- cache line size
  58
  59         subfic  r4,r9,PPC_PGBYTES                               // r4 <- starting offset in page
  60
  61                 bt++    pf64Bitb,page0S4                                // Go do the big guys...
  62
  63                 slwi    r3,r3,12                                                // get page address from page num
  64                 b               page_zero_1                                             // Jump to line aligned loop...
  65
  66         .align  5
  67
  68                 nop
  69                 nop
  70                 nop
  71                 nop
  72                 nop
  73                 nop
  74                 nop
  75
  76 page0S4:
  77                 sldi    r3,r3,12                                                // get page address from page num
  78
  79 page_zero_1:                                                                    // loop zeroing cache lines
  80         sub.    r5,r4,r9                                                // more to go?
  81         dcbz128 r3,r4                                                   // zero either 32 or 128 bytes
  82         sub             r4,r5,r9                                                // generate next offset
  83         dcbz128 r3,r5
  84         bne--   page_zero_1
  85
  86         b               EXT(ml_restore)                                 // restore MSR and do the isync
  87
  88
  89 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
  90 /* void
  91  * phys_copy(src, dst, bytecount)
  92  *      addr64_t            src;
  93  *      addr64_t            dst;
  94  *      int             bytecount
  95  *
  96  * This routine will copy bytecount bytes from physical address src to physical
  97  * address dst.  It runs in 64-bit mode if necessary, but does not handle
  98  * overlap or make any attempt to be optimal.  Length must be a signed word.
  99  * Not performance critical.
 100  */
 101
 102
 103                 .align  5
 104                 .globl  EXT(phys_copy)
 105
 106 LEXT(phys_copy)
 107
 108                 rlwinm  r3,r3,0,1,0                                     ; Duplicate high half of long long paddr into top of reg
 109         mflr    r12                                                             // get return address
 110                 rlwimi  r3,r4,0,0,31                            ; Combine bottom of long long to full 64-bits
 111                 rlwinm  r4,r5,0,1,0                                     ; Duplicate high half of long long paddr into top of reg
 112         bl              EXT(ml_set_physical_disabled)   // turn DR and EE off, SF on, get features in r10
 113                 rlwimi  r4,r6,0,0,31                            ; Combine bottom of long long to full 64-bits
 114         mtlr    r12                                                             // restore return address
 115         subic.  r5,r7,4                                                 // a word to copy?
 116         b               phys_copy_2
 117
 118                 .align  5
 119
 120 phys_copy_1:                                                                    // loop copying words
 121         subic.  r5,r5,4                                                 // more to go?
 122         lwz             r0,0(r3)
 123         addi    r3,r3,4
 124         stw             r0,0(r4)
 125         addi    r4,r4,4
 126 phys_copy_2:
 127         bge             phys_copy_1
 128         addic.  r5,r5,4                                                 // restore count
 129         ble             phys_copy_4                                             // no more
 130
 131                                                                                         // Loop is aligned here
 132
 133 phys_copy_3:                                                                    // loop copying bytes
 134         subic.  r5,r5,1                                                 // more to go?
 135         lbz             r0,0(r3)
 136         addi    r3,r3,1
 137         stb             r0,0(r4)
 138         addi    r4,r4,1
 139         bgt             phys_copy_3
 140 phys_copy_4:
 141         b               EXT(ml_restore)                                 // restore MSR and do the isync
 142
 143
 144 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
 145 /* void
 146  * pmap_copy_page(src, dst)
 147  *      ppnum_t     src;
 148  *      ppnum_t     dst;
 149  *
 150  * This routine will copy the physical page src to physical page dst
 151  *
 152  * This routine assumes that the src and dst are page numbers and that the
 153  * destination is cached.  It runs on 32 and 64 bit processors, with and
 154  * without altivec, and with 32 and 128 byte cache lines.
 155  * We also must assume that no-one will be executing within the destination
 156  * page, and that this will be used for paging.  Because this
 157  * is a common routine, we have tuned loops for each processor class.
 158  *
 159  */
 160 #define kSFSize (FM_SIZE+160)
 161
 162 ENTRY(pmap_copy_page, TAG_NO_FRAME_USED)
 163
 164                 lis             r2,hi16(MASK(MSR_VEC))                  ; Get the vector flag
 165         mflr    r0                                                              // get return
 166                 ori             r2,r2,lo16(MASK(MSR_FP))                ; Add the FP flag
 167                 stw             r0,8(r1)                                                // save
 168         stwu    r1,-kSFSize(r1)                                 // set up a stack frame for VRs or FPRs
 169         mfmsr   r11                                                             // save MSR at entry
 170         mfsprg  r10,2                                                   // get feature flags
 171         andc    r11,r11,r2                                              // Clear out vec and fp
 172         ori             r2,r2,lo16(MASK(MSR_EE))                // Get EE on also
 173         andc    r2,r11,r2                                               // Clear out EE as well
 174         mtcrf   0x02,r10                                                // we need to test pf64Bit
 175         ori             r2,r2,MASK(MSR_FP)                              // must enable FP for G3...
 176         mtcrf   0x80,r10                                                // we need to test pfAltivec too
 177         oris    r2,r2,hi16(MASK(MSR_VEC))               // enable altivec for G4 (ignored if G3)
 178         mtmsr   r2                                                              // turn EE off, FP and VEC on
 179         isync
 180         bt++    pf64Bitb,pmap_copy_64                   // skip if 64-bit processor (only they take hint)
 181                 slwi    r3,r3,12                                                // get page address from page num
 182                 slwi    r4,r4,12                                                // get page address from page num
 183         rlwinm  r12,r2,0,MSR_DR_BIT+1,MSR_DR_BIT-1      // get ready to turn off DR
 184         bt              pfAltivecb,pmap_copy_g4                 // altivec but not 64-bit means G4
 185
 186
 187         // G3 -- copy using FPRs
 188
 189         stfd    f0,FM_SIZE+0(r1)                                // save the 4 FPRs we use to copy
 190         stfd    f1,FM_SIZE+8(r1)
 191         li              r5,PPC_PGBYTES/32                               // count of cache lines in a page
 192         stfd    f2,FM_SIZE+16(r1)
 193         mtctr   r5
 194         stfd    f3,FM_SIZE+24(r1)
 195         mtmsr   r12                                                             // turn off DR after saving FPRs on stack
 196         isync
 197
 198 pmap_g3_copy_loop:                                                              // loop over 32-byte cache lines
 199         dcbz    0,r4                                                    // avoid read of dest line
 200         lfd             f0,0(r3)
 201         lfd             f1,8(r3)
 202         lfd             f2,16(r3)
 203         lfd             f3,24(r3)
 204         addi    r3,r3,32
 205         stfd    f0,0(r4)
 206         stfd    f1,8(r4)
 207         stfd    f2,16(r4)
 208         stfd    f3,24(r4)
 209         dcbst   0,r4                                                    // flush dest line to RAM
 210         addi    r4,r4,32
 211         bdnz    pmap_g3_copy_loop
 212
 213         sync                                                                    // wait for stores to take
 214         subi    r4,r4,PPC_PGBYTES                               // restore ptr to destintation page
 215         li              r6,PPC_PGBYTES-32                               // point to last line in page
 216 pmap_g3_icache_flush:
 217         subic.  r5,r6,32                                                // more to go?
 218         icbi    r4,r6                                                   // flush another line in icache
 219         subi    r6,r5,32                                                // get offset to next line
 220         icbi    r4,r5
 221         bne             pmap_g3_icache_flush
 222
 223         sync
 224         mtmsr   r2                                                              // turn DR back on
 225         isync
 226         lfd             f0,FM_SIZE+0(r1)                                // restore the FPRs
 227         lfd             f1,FM_SIZE+8(r1)
 228         lfd             f2,FM_SIZE+16(r1)
 229         lfd             f3,FM_SIZE+24(r1)
 230
 231         b               pmap_g4_restore                                 // restore MSR and done
 232
 233
 234         // G4 -- copy using VRs
 235
 236 pmap_copy_g4:                                                                   // r2=(MSR-EE), r12=(r2-DR), r10=features, r11=old MSR
 237         la              r9,FM_SIZE+16(r1)                               // place where we save VRs to r9
 238         li              r5,16                                                   // load x-form offsets into r5-r9
 239         li              r6,32                                                   // another offset
 240         stvx    v0,0,r9                                                 // save some VRs so we can use to copy
 241         li              r7,48                                                   // another offset
 242         stvx    v1,r5,r9
 243         li              r0,PPC_PGBYTES/64                               // we loop over 64-byte chunks
 244         stvx    v2,r6,r9
 245         mtctr   r0
 246         li              r8,96                                                   // get look-ahead for touch
 247         stvx    v3,r7,r9
 248         li              r9,128
 249         mtmsr   r12                                                             // now we've saved VRs on stack, turn off DR
 250         isync                                                                   // wait for it to happen
 251         b               pmap_g4_copy_loop
 252
 253         .align  5                                                               // align inner loops
 254 pmap_g4_copy_loop:                                                              // loop over 64-byte chunks
 255         dcbt    r3,r8                                                   // touch 3 lines ahead
 256         nop                                                                             // avoid a 17-word loop...
 257         dcbt    r3,r9                                                   // touch 4 lines ahead
 258         nop                                                                             // more padding
 259         dcba    0,r4                                                    // avoid pre-fetch of 1st dest line
 260         lvx             v0,0,r3                                                 // offset 0
 261         lvx             v1,r5,r3                                                // offset 16
 262         lvx             v2,r6,r3                                                // offset 32
 263         lvx             v3,r7,r3                                                // offset 48
 264         addi    r3,r3,64
 265         dcba    r6,r4                                                   // avoid pre-fetch of 2nd line
 266         stvx    v0,0,r4                                                 // offset 0
 267         stvx    v1,r5,r4                                                // offset 16
 268         stvx    v2,r6,r4                                                // offset 32
 269         stvx    v3,r7,r4                                                // offset 48
 270         dcbf    0,r4                                                    // push line 1
 271         dcbf    r6,r4                                                   // and line 2
 272         addi    r4,r4,64
 273         bdnz    pmap_g4_copy_loop
 274
 275         sync                                                                    // wait for stores to take
 276         subi    r4,r4,PPC_PGBYTES                               // restore ptr to destintation page
 277         li              r8,PPC_PGBYTES-32                               // point to last line in page
 278 pmap_g4_icache_flush:
 279         subic.  r9,r8,32                                                // more to go?
 280         icbi    r4,r8                                                   // flush from icache
 281         subi    r8,r9,32                                                // get offset to next line
 282         icbi    r4,r9
 283         bne             pmap_g4_icache_flush
 284
 285         sync
 286         mtmsr   r2                                                              // turn DR back on
 287         isync
 288         la              r9,FM_SIZE+16(r1)                               // get base of VR save area
 289         lvx             v0,0,r9                                                 // restore the VRs
 290         lvx             v1,r5,r9
 291         lvx             v2,r6,r9
 292         lvx             v3,r7,r9
 293
 294 pmap_g4_restore:                                                                // r11=MSR
 295         mtmsr   r11                                                             // turn EE on, VEC and FR off
 296         isync                                                                   // wait for it to happen
 297         addi    r1,r1,kSFSize                                   // pop off our stack frame
 298         lwz             r0,8(r1)                                                // restore return address
 299         mtlr    r0
 300         blr
 301
 302
 303         // 64-bit/128-byte processor: copy using VRs
 304
 305 pmap_copy_64:                                                                   // r10=features, r11=old MSR
 306                 sldi    r3,r3,12                                                // get page address from page num
 307                 sldi    r4,r4,12                                                // get page address from page num
 308                 la              r9,FM_SIZE+16(r1)                               // get base of VR save area
 309         li              r5,16                                                   // load x-form offsets into r5-r9
 310         li              r6,32                                                   // another offset
 311         bf              pfAltivecb,pmap_novmx_copy              // altivec suppressed...
 312         stvx    v0,0,r9                                                 // save 8 VRs so we can copy wo bubbles
 313         stvx    v1,r5,r9
 314         li              r7,48                                                   // another offset
 315         li              r0,PPC_PGBYTES/128                              // we loop over 128-byte chunks
 316         stvx    v2,r6,r9
 317         stvx    v3,r7,r9
 318         addi    r9,r9,64                                                // advance base ptr so we can store another 4
 319         mtctr   r0
 320         li              r0,MASK(MSR_DR)                                 // get DR bit
 321         stvx    v4,0,r9
 322         stvx    v5,r5,r9
 323         andc    r12,r2,r0                                               // turn off DR bit
 324         li              r0,1                                                    // get a 1 to slam into SF
 325         stvx    v6,r6,r9
 326         stvx    v7,r7,r9
 327         rldimi  r12,r0,63,MSR_SF_BIT                    // set SF bit (bit 0)
 328         li              r8,-128                                                 // offset so we can reach back one line
 329         mtmsrd  r12                                                             // now we've saved VRs, turn DR off and SF on
 330         isync                                                                   // wait for it to happen
 331         dcbt128 0,r3,1                                                  // start a forward stream
 332         b               pmap_64_copy_loop
 333
 334         .align  5                                                               // align inner loops
 335 pmap_64_copy_loop:                                                              // loop over 128-byte chunks
 336         dcbz128 0,r4                                                    // avoid read of destination line
 337         lvx             v0,0,r3                                                 // offset 0
 338         lvx             v1,r5,r3                                                // offset 16
 339         lvx             v2,r6,r3                                                // offset 32
 340         lvx             v3,r7,r3                                                // offset 48
 341         addi    r3,r3,64                                                // don't have enough GPRs so add 64 2x
 342         lvx             v4,0,r3                                                 // offset 64
 343         lvx             v5,r5,r3                                                // offset 80
 344         lvx             v6,r6,r3                                                // offset 96
 345         lvx             v7,r7,r3                                                // offset 112
 346         addi    r3,r3,64
 347         stvx    v0,0,r4                                                 // offset 0
 348         stvx    v1,r5,r4                                                // offset 16
 349         stvx    v2,r6,r4                                                // offset 32
 350         stvx    v3,r7,r4                                                // offset 48
 351         addi    r4,r4,64
 352         stvx    v4,0,r4                                                 // offset 64
 353         stvx    v5,r5,r4                                                // offset 80
 354         stvx    v6,r6,r4                                                // offset 96
 355         stvx    v7,r7,r4                                                // offset 112
 356         addi    r4,r4,64
 357         dcbf    r8,r4                                                   // flush the line we just wrote
 358         bdnz    pmap_64_copy_loop
 359
 360         sync                                                                    // wait for stores to take
 361         subi    r4,r4,PPC_PGBYTES                               // restore ptr to destintation page
 362         li              r8,PPC_PGBYTES-128                              // point to last line in page
 363 pmap_64_icache_flush:
 364         subic.  r9,r8,128                                               // more to go?
 365         icbi    r4,r8                                                   // flush from icache
 366         subi    r8,r9,128                                               // get offset to next line
 367         icbi    r4,r9
 368         bne             pmap_64_icache_flush
 369
 370         sync
 371         mtmsrd  r2                                                              // turn DR back on, SF off
 372         isync
 373         la              r9,FM_SIZE+16(r1)                               // get base address of VR save area on stack
 374         lvx             v0,0,r9                                                 // restore the VRs
 375         lvx             v1,r5,r9
 376         lvx             v2,r6,r9
 377         lvx             v3,r7,r9
 378         addi    r9,r9,64
 379         lvx             v4,0,r9
 380         lvx             v5,r5,r9
 381         lvx             v6,r6,r9
 382         lvx             v7,r7,r9
 383
 384         b               pmap_g4_restore                                 // restore lower half of MSR and return
 385
 386  //
 387  //             Copy on 64-bit without VMX
 388  //
 389
 390 pmap_novmx_copy:
 391                 li              r0,PPC_PGBYTES/128                              // we loop over 128-byte chunks
 392                 mtctr   r0
 393                 li              r0,MASK(MSR_DR)                                 // get DR bit
 394                 andc    r12,r2,r0                                               // turn off DR bit
 395                 li              r0,1                                                    // get a 1 to slam into SF
 396                 rldimi  r12,r0,63,MSR_SF_BIT                    // set SF bit (bit 0)
 397                 mtmsrd  r12                                                             // now we've saved VRs, turn DR off and SF on
 398                 isync                                                                   // wait for it to happen
 399                 dcbt128 0,r3,1                                                  // start a forward stream
 400
 401 pmap_novmx_copy_loop:                                                   // loop over 128-byte cache lines
 402         dcbz128 0,r4                                                    // avoid read of dest line
 403
 404         ld              r0,0(r3)                                                // Load half a line
 405         ld              r12,8(r3)
 406         ld              r5,16(r3)
 407         ld              r6,24(r3)
 408         ld              r7,32(r3)
 409         ld              r8,40(r3)
 410         ld              r9,48(r3)
 411         ld              r10,56(r3)
 412
 413         std             r0,0(r4)                                                // Store half a line
 414         std             r12,8(r4)
 415         std             r5,16(r4)
 416         std             r6,24(r4)
 417         std             r7,32(r4)
 418         std             r8,40(r4)
 419         std             r9,48(r4)
 420         std             r10,56(r4)
 421
 422         ld              r0,64(r3)                                               // Load half a line
 423         ld              r12,72(r3)
 424         ld              r5,80(r3)
 425         ld              r6,88(r3)
 426         ld              r7,96(r3)
 427         ld              r8,104(r3)
 428         ld              r9,112(r3)
 429         ld              r10,120(r3)
 430
 431         addi    r3,r3,128
 432
 433         std             r0,64(r4)                                               // Store half a line
 434         std             r12,72(r4)
 435         std             r5,80(r4)
 436         std             r6,88(r4)
 437         std             r7,96(r4)
 438         std             r8,104(r4)
 439         std             r9,112(r4)
 440         std             r10,120(r4)
 441
 442         dcbf    0,r4                                                    // flush the line we just wrote
 443                 addi    r4,r4,128
 444         bdnz    pmap_novmx_copy_loop
 445
 446         sync                                                                    // wait for stores to take
 447         subi    r4,r4,PPC_PGBYTES                               // restore ptr to destintation page
 448         li              r8,PPC_PGBYTES-128                              // point to last line in page
 449
 450 pmap_novmx_icache_flush:
 451         subic.  r9,r8,128                                               // more to go?
 452         icbi    r4,r8                                                   // flush from icache
 453         subi    r8,r9,128                                               // get offset to next line
 454         icbi    r4,r9
 455         bne             pmap_novmx_icache_flush
 456
 457         sync
 458         mtmsrd  r2                                                              // turn DR back on, SF off
 459         isync
 460
 461         b               pmap_g4_restore                                 // restore lower half of MSR and return
 462
 463
 464
 465 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
 466
 467 // Stack frame format used by copyin, copyout, copyinstr and copyoutstr.
 468 // These routines all run both on 32 and 64-bit machines, though because they are called
 469 // by the BSD kernel they are always in 32-bit mode when entered.  The mapped ptr returned
 470 // by MapUserMemoryWindow will be 64 bits however on 64-bit machines.  Beware to avoid
 471 // using compare instructions on this ptr.  This mapped ptr is kept globally in r31, so there
 472 // is no need to store or load it, which are mode-dependent operations since it could be
 473 // 32 or 64 bits.
 474
 475 #define kkFrameSize     (FM_SIZE+32)
 476
 477 #define kkBufSize       (FM_SIZE+0)
 478 #define kkCR3           (FM_SIZE+4)
 479 #define kkSource        (FM_SIZE+8)
 480 #define kkDest          (FM_SIZE+12)
 481 #define kkCountPtr      (FM_SIZE+16)
 482 #define kkR31Save       (FM_SIZE+20)
 483 #define kkThrErrJmp     (FM_SIZE+24)
 484
 485
 486 // nonvolatile CR bits we use as flags in cr3
 487
 488 #define kk64bit         12
 489 #define kkNull          13
 490 #define kkIn            14
 491 #define kkString        15
 492 #define kkZero          15
 493
 494
 495 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
 496 /*
 497  * int
 498  * copyoutstr(src, dst, maxcount, count)
 499  *      vm_offset_t     src;        // r3
 500  *      addr64_t        dst;        // r4 and r5
 501  *      vm_size_t       maxcount;   // r6
 502  *      vm_size_t*      count;      // r7
 503  *
 504  * Set *count to the number of bytes copied.
 505  */
 506
 507 ENTRY(copyoutstr, TAG_NO_FRAME_USED)
 508         mfcr    r2,0x10                         // save caller's cr3, which we use for flags
 509         mr      r10,r4                          // move high word of 64-bit user address to r10
 510         li              r0,0
 511         crset   kkString                                                // flag as a string op
 512         mr      r11,r5                          // move low word of 64-bit user address to r11
 513         stw             r0,0(r7)                                                // initialize #bytes moved
 514         crclr   kkIn                                                    // flag as copyout
 515         b               copyJoin
 516
 517
 518 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
 519 /*
 520  * int
 521  * copyinstr(src, dst, maxcount, count)
 522  *      addr64_t        src;        // r3 and r4
 523  *      vm_offset_t     dst;        // r5
 524  *      vm_size_t       maxcount;   // r6
 525  *      vm_size_t*      count;      // r7
 526  *
 527  * Set *count to the number of bytes copied
 528  * If dst == NULL, don't copy, just count bytes.
 529  * Only currently called from klcopyinstr.
 530  */
 531
 532 ENTRY(copyinstr, TAG_NO_FRAME_USED)
 533         mfcr    r2,0x10                         // save caller's cr3, which we use for flags
 534         cmplwi  r5,0                                                    // dst==NULL?
 535         mr      r10,r3                          // move high word of 64-bit user address to r10
 536         li              r0,0
 537         crset   kkString                                                // flag as a string op
 538         mr      r11,r4                          // move low word of 64-bit user address to r11
 539         crmove  kkNull,cr0_eq                                   // remember if (dst==NULL)
 540         stw             r0,0(r7)                                                // initialize #bytes moved
 541         crset   kkIn                                                    // flag as copyin (rather than copyout)
 542         b               copyJoin1                                               // skip over the "crclr kkNull"
 543
 544
 545 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
 546 /*
 547  * int
 548  * copyout(src, dst, count)
 549  *      vm_offset_t     src;        // r3
 550  *      addr64_t        dst;        // r4 and r5
 551  *      size_t          count;      // r6
 552  */
 553
 554                         .align  5
 555                         .globl  EXT(copyout)
 556                         .globl  EXT(copyoutmsg)
 557
 558 LEXT(copyout)
 559 LEXT(copyoutmsg)
 560
 561 #if INSTRUMENT
 562         mfspr   r12,pmc1                                                ; INSTRUMENT - saveinstr[12] - Take stamp at copyout
 563         stw             r12,0x6100+(12*16)+0x0(0)               ; INSTRUMENT - Save it
 564         mfspr   r12,pmc2                                                ; INSTRUMENT - Get stamp
 565         stw             r12,0x6100+(12*16)+0x4(0)               ; INSTRUMENT - Save it
 566         mfspr   r12,pmc3                                                ; INSTRUMENT - Get stamp
 567         stw             r12,0x6100+(12*16)+0x8(0)               ; INSTRUMENT - Save it
 568         mfspr   r12,pmc4                                                ; INSTRUMENT - Get stamp
 569         stw             r12,0x6100+(12*16)+0xC(0)               ; INSTRUMENT - Save it
 570 #endif
 571         mfcr    r2,0x10                         // save caller's cr3, which we use for flags
 572         mr      r10,r4                          // move high word of 64-bit user address to r10
 573         crclr   kkString                                                // not a string version
 574         mr      r11,r5                          // move low word of 64-bit user address to r11
 575         crclr   kkIn                                                    // flag as copyout
 576         b               copyJoin
 577
 578
 579 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
 580 /*
 581  * int
 582  * copyin(src, dst, count)
 583  *      addr64_t        src;        // r3 and r4
 584  *      vm_offset_t     dst;        // r5
 585  *      size_t          count;      // r6
 586  */
 587
 588
 589                         .align  5
 590                         .globl  EXT(copyin)
 591                         .globl  EXT(copyinmsg)
 592
 593 LEXT(copyin)
 594 LEXT(copyinmsg)
 595
 596         mfcr    r2,0x10                         // save caller's cr3, which we use for flags
 597         mr      r10,r3                          // move high word of 64-bit user address to r10
 598         crclr   kkString                                                // not a string version
 599         mr      r11,r4                          // move low word of 64-bit user address to r11
 600         crset   kkIn                                                    // flag as copyin
 601
 602
 603 // Common code to handle setup for all the copy variants:
 604 //              r2 = caller's cr3
 605 //      r3 = source if copyout
 606 //      r5 = dest if copyin
 607 //      r6 = buffer length or count
 608 //      r7 = count output ptr (if kkString set)
 609 //         r10 = high word of 64-bit user-space address (source if copyin, dest if copyout)
 610 //         r11 = low word of 64-bit user-space address
 611 //     cr3 = kkIn, kkString, kkNull flags
 612
 613 copyJoin:
 614         crclr   kkNull                                                  // (dst==NULL) convention not used with this call
 615 copyJoin1:                                                                              // enter from copyinstr with kkNull set
 616                 mflr    r0                                                              // get return address
 617         cmplwi  r6,0                                                    // buffer length 0?
 618         lis             r9,0x1000                                               // r9 <- 0x10000000 (256MB)
 619                 stw             r0,FM_LR_SAVE(r1)                               // save return
 620         cmplw   cr1,r6,r9                                               // buffer length > 256MB ?
 621         mfsprg  r8,2                                                    // get the features
 622         beq--   copyinout_0                                             // 0 length is degenerate case
 623                 stwu    r1,-kkFrameSize(r1)                             // set up stack frame
 624         stw             r2,kkCR3(r1)                    // save caller's cr3, which we use for flags
 625         mtcrf   0x02,r8                                                 // move pf64Bit to cr6
 626         stw             r3,kkSource(r1)                                 // save args across MapUserMemoryWindow
 627         stw             r5,kkDest(r1)
 628         stw             r6,kkBufSize(r1)
 629         crmove  kk64bit,pf64Bitb                                // remember if this is a 64-bit processor
 630         stw             r7,kkCountPtr(r1)
 631         stw             r31,kkR31Save(r1)                               // we use r31 globally for mapped user ptr
 632
 633
 634
 635 // Handle buffer length > 256MB.  This is an error (ENAMETOOLONG) on copyin and copyout.
 636 // The string ops are passed -1 lengths by some BSD callers, so for them we silently clamp
 637 // the buffer length to 256MB.  This isn't an issue if the string is less than 256MB
 638 // (as most are!), but if they are >256MB we eventually return ENAMETOOLONG.  This restriction
 639 // is due to MapUserMemoryWindow; we don't want to consume more than two segments for
 640 // the mapping.
 641
 642         ble++   cr1,copyin0                                             // skip if buffer length <= 256MB
 643         bf              kkString,copyinout_too_big              // error if not string op
 644         mr              r6,r9                                                   // silently clamp buffer length to 256MB
 645         stw             r9,kkBufSize(r1)                                // update saved copy too
 646
 647
 648 // Set up thread_recover in case we hit an illegal address.
 649
 650 copyin0:
 651                 li              r31,0                                                   // no mapped ptr yet
 652                 mfsprg  r8,1                                                    // Get the current thread
 653                 lis             r2,hi16(copyinout_error)
 654                 ori             r2,r2,lo16(copyinout_error)
 655                 lwz             r4,THREAD_RECOVER(r8)
 656                 lwz             r3,ACT_VMMAP(r8)                                // r3 <- vm_map virtual address
 657                 stw             r2,THREAD_RECOVER(r8)
 658                 stw             r4,kkThrErrJmp(r1)
 659
 660
 661 // Map user segment into kernel map, turn on 64-bit mode.  At this point:
 662 //              r3 = vm map
 663 //              r6 = buffer length
 664 // r10/r11 = 64-bit user-space ptr (source if copyin, dest if copyout)
 665 //
 666 // When we call MapUserMemoryWindow, we pass:
 667 //      r3 = vm map ptr
 668 //   r4/r5 = 64-bit user space address as an addr64_t
 669
 670         mr      r4,r10                          // copy user ptr into r4/r5
 671         mr      r5,r11
 672 #if INSTRUMENT
 673         mfspr   r12,pmc1                                                ; INSTRUMENT - saveinstr[13] - Take stamp before mapuseraddressspace
 674         stw             r12,0x6100+(13*16)+0x0(0)               ; INSTRUMENT - Save it
 675         mfspr   r12,pmc2                                                ; INSTRUMENT - Get stamp
 676         stw             r12,0x6100+(13*16)+0x4(0)               ; INSTRUMENT - Save it
 677         mfspr   r12,pmc3                                                ; INSTRUMENT - Get stamp
 678         stw             r12,0x6100+(13*16)+0x8(0)               ; INSTRUMENT - Save it
 679         mfspr   r12,pmc4                                                ; INSTRUMENT - Get stamp
 680         stw             r12,0x6100+(13*16)+0xC(0)               ; INSTRUMENT - Save it
 681 #endif
 682         bl              EXT(MapUserMemoryWindow)                // get r3/r4 <- 64-bit address in kernel map of user operand
 683 #if INSTRUMENT
 684         mfspr   r12,pmc1                                                ; INSTRUMENT - saveinstr[14] - Take stamp after mapuseraddressspace
 685         stw             r12,0x6100+(14*16)+0x0(0)               ; INSTRUMENT - Save it
 686         mfspr   r12,pmc2                                                ; INSTRUMENT - Get stamp
 687         stw             r12,0x6100+(14*16)+0x4(0)               ; INSTRUMENT - Save it
 688         mfspr   r12,pmc3                                                ; INSTRUMENT - Get stamp
 689         stw             r12,0x6100+(14*16)+0x8(0)               ; INSTRUMENT - Save it
 690         mfspr   r12,pmc4                                                ; INSTRUMENT - Get stamp
 691         stw             r12,0x6100+(14*16)+0xC(0)               ; INSTRUMENT - Save it
 692 #endif
 693         mr              r31,r4                                                  // r31 <- mapped ptr into user space (may be 64-bit)
 694         bf--    kk64bit,copyin1                                 // skip if a 32-bit processor
 695
 696                 rldimi  r31,r3,32,0                                             // slam high-order bits into mapped ptr
 697         mfmsr   r4                                                              // if 64-bit, turn on SF so we can use returned ptr
 698         li              r0,1
 699         rldimi  r4,r0,63,MSR_SF_BIT                             // light bit 0
 700         mtmsrd  r4                                                              // turn on 64-bit mode
 701         isync                                                                   // wait for mode to change
 702
 703
 704 // Load r3-r5, substituting mapped ptr as appropriate.
 705
 706 copyin1:
 707         lwz             r5,kkBufSize(r1)                                // restore length to copy
 708         bf              kkIn,copyin2                                    // skip if copyout
 709         lwz             r4,kkDest(r1)                                   // copyin: dest is kernel ptr
 710         mr              r3,r31                                                  // source is mapped ptr
 711         b               copyin3
 712 copyin2:                                                                                // handle copyout
 713         lwz             r3,kkSource(r1)                                 // source is kernel buffer (r3 at entry)
 714         mr              r4,r31                                                  // dest is mapped ptr into user space
 715
 716
 717 // Finally, all set up to copy:
 718 //              r3 = source ptr (mapped if copyin)
 719 //              r4 = dest ptr (mapped if copyout)
 720 //              r5 = length
 721 //         r31 = mapped ptr returned by MapUserMemoryWindow
 722 //         cr3 = kkIn, kkString, kk64bit, and kkNull flags
 723
 724 copyin3:
 725         bt              kkString,copyString                             // handle copyinstr and copyoutstr
 726         bl              EXT(bcopy)                                              // copyin and copyout: let bcopy do the work
 727         li              r3,0                                                    // return success
 728
 729
 730 // Main exit point for copyin, copyout, copyinstr, and copyoutstr.  Also reached
 731 // from error recovery if we get a DSI accessing user space.  Clear recovery ptr,
 732 // and pop off frame.
 733 //              r3 = 0, EFAULT, or ENAMETOOLONG
 734
 735 copyinx:
 736         lwz             r2,kkCR3(r1)                    // get callers cr3
 737                 mfsprg  r6,1                                                    // Get the current thread
 738         bf--    kk64bit,copyinx1                                // skip if 32-bit processor
 739         mfmsr   r12
 740         rldicl  r12,r12,0,MSR_SF_BIT+1                  // if 64-bit processor, turn 64-bit mode off
 741         mtmsrd  r12                                                             // turn SF off
 742         isync                                                                   // wait for the mode to change
 743 copyinx1:
 744                 lwz             r0,FM_LR_SAVE+kkFrameSize(r1)   // get return address
 745         lwz             r31,kkR31Save(r1)                               // restore callers r31
 746         lwz             r4,kkThrErrJmp(r1)                              // load saved thread recover
 747         addi    r1,r1,kkFrameSize                               // pop off our stack frame
 748                 mtlr    r0
 749                 stw             r4,THREAD_RECOVER(r6)                   // restore thread recover
 750         mtcrf   0x10,r2                                                 // restore cr3
 751                 blr
 752
 753
 754 /* We get here via the exception handler if an illegal
 755  * user memory reference was made.  This error handler is used by
 756  * copyin, copyout, copyinstr, and copyoutstr.  Registers are as
 757  * they were at point of fault, so for example cr3 flags are valid.
 758  */
 759
 760 copyinout_error:
 761         li              r3,EFAULT                                               // return error
 762         b               copyinx
 763
 764 copyinout_0:                                                                    // degenerate case: 0-length copy
 765                 mtcrf   0x10,r2                                                 // restore cr3
 766         li              r3,0                                                    // return success
 767         blr
 768
 769 copyinout_too_big:                                                              // degenerate case
 770         mtcrf   0x10,r2                                                 // restore cr3
 771         lwz             r1,0(r1)                                                // pop off stack frame
 772         li              r3,ENAMETOOLONG
 773         blr
 774
 775
 776 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
 777 // Handle copyinstr and copyoutstr.  At this point the stack frame is set up,
 778 // the recovery ptr is set, the user's buffer is mapped, we're in 64-bit mode
 779 // if necessary, and:
 780 //              r3 = source ptr, mapped if copyinstr
 781 //              r4 = dest ptr, mapped if copyoutstr
 782 //              r5 = buffer length
 783 //         r31 = mapped ptr returned by MapUserMemoryWindow
 784 //     cr3 = kkIn, kkString, kkNull, and kk64bit flags
 785 // We do word copies unless the buffer is very short, then use a byte copy loop
 786 // for the leftovers if necessary.  The crossover at which the word loop becomes
 787 // faster is about seven bytes, counting the zero.
 788 //
 789 // We first must word-align the source ptr, in order to avoid taking a spurious
 790 // page fault.
 791
 792 copyString:
 793         cmplwi  cr1,r5,15                                               // is buffer very short?
 794         mr      r12,r3                          // remember ptr to 1st source byte
 795         mtctr   r5                                                              // assuming short, set up loop count for bytes
 796         blt--   cr1,copyinstr8                                  // too short for word loop
 797         rlwinm  r2,r3,0,0x3                     // get byte offset of 1st byte within word
 798         rlwinm  r9,r3,3,0x18                    // get bit offset of 1st byte within word
 799         li      r7,-1
 800         sub     r3,r3,r2                        // word-align source address
 801         add     r6,r5,r2                        // get length starting at byte 0 in word
 802         srw     r7,r7,r9                        // get mask for bytes in first word
 803         srwi    r0,r6,2                                                 // get #words in buffer
 804         lwz     r5,0(r3)                        // get aligned word with first source byte
 805         lis             r10,hi16(0xFEFEFEFF)                    // load magic constants into r10 and r11
 806         lis             r11,hi16(0x80808080)
 807         mtctr   r0                                                              // set up word loop count
 808         addi    r3,r3,4                         // advance past the source word
 809         ori             r10,r10,lo16(0xFEFEFEFF)
 810         ori             r11,r11,lo16(0x80808080)
 811         orc     r8,r5,r7                        // map bytes preceeding first source byte into 0xFF
 812         bt--    kkNull,copyinstr5enter          // enter loop that just counts
 813
 814 // Special case 1st word, which has been 0xFF filled on left.  Note that we use
 815 // "and.", even though we execute both in 32 and 64-bit mode.  This is OK.
 816
 817         slw     r5,r5,r9                        // left justify payload bytes
 818         add             r9,r10,r8                                               // r9 =  data + 0xFEFEFEFF
 819         andc    r7,r11,r8                                               // r7 = ~data & 0x80808080
 820                 subfic  r0,r2,4                                                 // get r0 <- #payload bytes in 1st word
 821         and.    r7,r9,r7                                                // if r7==0, then all bytes in r8 are nonzero
 822         stw     r5,0(r4)                        // copy payload bytes to dest buffer
 823         add             r4,r4,r0                                                // then point to next byte in dest buffer
 824         bdnzt   cr0_eq,copyinstr6               // use loop that copies if 0 not found
 825
 826         b               copyinstr7                      // 0 found (buffer can't be full)
 827
 828
 829 // Word loop(s).  They do a word-parallel search for 0s, using the following
 830 // inobvious but very efficient test:
 831 //              y =  data + 0xFEFEFEFF
 832 //              z = ~data & 0x80808080
 833 // If (y & z)==0, then all bytes in dataword are nonzero.  There are two copies
 834 // of this loop, one that just counts and another that copies.
 835 //              r3 = ptr to next word of source (word aligned)
 836 //              r4 = ptr to next byte in buffer
 837 //      r6 = original buffer length (adjusted to be word origin)
 838 //     r10 = 0xFEFEFEFE
 839 //     r11 = 0x80808080
 840 //     r12 = ptr to 1st source byte (used to determine string length)
 841
 842         .align  5                                                               // align inner loops for speed
 843 copyinstr5:                                                                             // version that counts but does not copy
 844         lwz     r8,0(r3)                                                // get next word of source
 845         addi    r3,r3,4                         // advance past it
 846 copyinstr5enter:
 847         add             r9,r10,r8                                               // r9 =  data + 0xFEFEFEFF
 848         andc    r7,r11,r8                                               // r7 = ~data & 0x80808080
 849         and.    r7,r9,r7                        // r7 = r9 & r7 ("." ok even in 64-bit mode)
 850         bdnzt   cr0_eq,copyinstr5                               // if r7==0, then all bytes in r8 are nonzero
 851
 852         b               copyinstr7
 853
 854         .align  5                                                               // align inner loops for speed
 855 copyinstr6:                                                                             // version that counts and copies
 856         lwz     r8,0(r3)                                                // get next word of source
 857         addi    r3,r3,4                         // advance past it
 858         addi    r4,r4,4                                                 // increment dest ptr while we wait for data
 859         add             r9,r10,r8                                               // r9 =  data + 0xFEFEFEFF
 860         andc    r7,r11,r8                                               // r7 = ~data & 0x80808080
 861         and.    r7,r9,r7                        // r7 = r9 & r7 ("." ok even in 64-bit mode)
 862         stw             r8,-4(r4)                                               // pack all 4 bytes into buffer
 863         bdnzt   cr0_eq,copyinstr6                               // if r7==0, then all bytes are nonzero
 864
 865
 866 // Either 0 found or buffer filled.  The above algorithm has mapped nonzero bytes to 0
 867 // and 0 bytes to 0x80 with one exception: 0x01 bytes preceeding the first 0 are also
 868 // mapped to 0x80.  We must mask out these false hits before searching for an 0x80 byte.
 869 //              r3 = word aligned ptr to next word of source (ie, r8==mem(r3-4))
 870 //      r6 = original buffer length (adjusted to be word origin)
 871 //      r7 = computed vector of 0x00 and 0x80 bytes
 872 //      r8 = original source word, coming from -4(r3), possibly padded with 0xFFs on left if 1st word
 873 //     r12 = ptr to 1st source byte (used to determine string length)
 874 //     cr0 = beq set iff 0 not found
 875
 876 copyinstr7:
 877         rlwinm  r2,r8,7,0,31                                    // move 0x01 bits to 0x80 position
 878                 rlwinm  r6,r6,0,0x3                                             // mask down to partial byte count in last word
 879         andc    r7,r7,r2                                                // turn off false hits from 0x0100 worst case
 880         crnot   kkZero,cr0_eq                                   // 0 found iff cr0_eq is off
 881         srwi    r7,r7,8                         // we want to count the 0 as a byte xferred
 882                 cmpwi   r6,0                                                    // any bytes left over in last word?
 883         cntlzw  r7,r7                                                   // now we can find the 0 byte (ie, the 0x80)
 884         subi    r3,r3,4                         // back up r3 to point to 1st byte in r8
 885         srwi    r7,r7,3                                                 // convert 8,16,24,32 to 1,2,3,4
 886         add     r3,r3,r7                        // now r3 points one past 0 byte, or at 1st byte not xferred
 887         bt++    kkZero,copyinstr10                              // 0 found, so done
 888
 889         beq             copyinstr10                                             // r6==0, so buffer truly full
 890         mtctr   r6                                                              // 0 not found, loop over r6 bytes
 891         b               copyinstr8                                              // enter byte loop for last 1-3 leftover bytes
 892
 893
 894 // Byte loop.  This is used for very small buffers and for the odd bytes left over
 895 // after searching and copying words at a time.
 896 //      r3 = ptr to next byte of source
 897 //      r4 = ptr to next dest byte
 898 //     r12 = ptr to first byte of source
 899 //     ctr = count of bytes to check
 900
 901         .align  5                                                               // align inner loops for speed
 902 copyinstr8:                                                                             // loop over bytes of source
 903         lbz             r0,0(r3)                                                // get next byte of source
 904         addi    r3,r3,1
 905         addi    r4,r4,1                                                 // increment dest addr whether we store or not
 906         cmpwi   r0,0                                                    // the 0?
 907         bt--    kkNull,copyinstr9                               // don't store if copyinstr with NULL ptr
 908         stb             r0,-1(r4)
 909 copyinstr9:
 910         bdnzf   cr0_eq,copyinstr8                               // loop if byte not 0 and more room in buffer
 911
 912         crmove  kkZero,cr0_eq                                   // remember if 0 found or buffer filled
 913
 914
 915 // Buffer filled or 0 found.  Unwind and return.
 916 //      r3 = ptr to 1st source byte not transferred
 917 //     r12 = ptr to 1st source byte
 918 //     r31 = mapped ptr returned by MapUserMemoryWindow
 919 //     cr3 = kkZero set iff 0 found
 920
 921 copyinstr10:
 922         lwz             r9,kkCountPtr(r1)                               // get ptr to place to store count of bytes moved
 923         sub     r2,r3,r12                       // compute #bytes copied (including the 0)
 924         li              r3,0                                                    // assume success return status
 925         stw             r2,0(r9)                                                // store #bytes moved
 926         bt++    kkZero,copyinx                                  // we did find the 0 so return 0
 927         li              r3,ENAMETOOLONG                                 // buffer filled
 928         b               copyinx                                                 // join main exit routine
 929
 930 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
 931 /*
 932  * int
 933  * copypv(source, sink, size, which)
 934  *      addr64_t        src;        // r3 and r4
 935  *      addr64_t        dst;        // r5 and r6
 936  *      size_t          size;           // r7
 937  *      int                     which;          // r8
 938  *
 939  * Operand size bytes are copied from operand src into operand dst. The source and
 940  * destination operand addresses are given as addr64_t, and may designate starting
 941  * locations in physical or virtual memory in any combination except where both are
 942  * virtual. Virtual memory locations may be in either the kernel or the current thread's
 943  * address space. Operand size may be up to 256MB.
 944  *
 945  * Operation is controlled by operand which, which offers these options:
 946  *              cppvPsrc : source operand is (1) physical or (0) virtual
 947  *              cppvPsnk : destination operand is (1) physical or (0) virtual
 948  *              cppvKmap : virtual operand is in (1) kernel or (0) current thread
 949  *              cppvFsnk : (1) flush destination before and after transfer
 950  *              cppvFsrc : (1) flush source before and after transfer
 951  *              cppvNoModSnk : (1) don't set source operand's changed bit(s)
 952  *              cppvNoRefSrc : (1) don't set destination operand's referenced bit(s)
 953  *
 954  * Implementation is now split into this new 64-bit path and the old path, hw_copypv_32().
 955  * This section describes the operation of the new 64-bit path.
 956  *
 957  * The 64-bit path utilizes the more capacious 64-bit kernel address space to create a
 958  * window in the kernel address space into all of physical RAM plus the I/O hole. Since
 959  * the window's mappings specify the proper access policies for the underlying memory,
 960  * the new path does not have to flush caches to avoid a cache paradox, so cppvFsnk
 961  * and cppvFsrc are ignored. Physical operand adresses are relocated into the physical
 962  * memory window, and are accessed with data relocation on. Virtual addresses are either
 963  * within the kernel, or are mapped into the kernel address space through the user memory
 964  * window. Because accesses to a virtual operand are performed with data relocation on,
 965  * the new path does not have to translate the address, disable/enable interrupts, lock
 966  * the mapping, or update referenced and changed bits.
 967  *
 968  * The IBM 970 (a.k.a. G5) processor treats real-mode accesses as guarded, so there is
 969  * a substantial performance penalty for copypv operating in real mode. Utilizing the
 970  * new 64-bit path, transfer performance increases >100% on the G5.
 971  *
 972  * The attentive reader may notice that mtmsrd ops are not followed by isync ops as
 973  * might be expected. The 970 follows PowerPC architecture version 2.01, which defines
 974  * mtmsrd with L=0 as a context synchronizing op, so a following isync is no longer
 975  * required.
 976  *
 977  * To keep things exciting, we develop 64-bit values in non-volatiles, but we also need
 978  * to call 32-bit functions, which would lead to the high-order 32 bits of our values
 979  * getting clobbered unless we do something special. So, we preserve our 64-bit non-volatiles
 980  * in our own stack frame across calls to 32-bit functions.
 981  *
 982  */
 983
 984 // Map operand which bits into non-volatile CR2 and CR3 bits.
 985 #define whichAlign      ((3+1)*4)
 986 #define whichMask       0x007F0000
 987 #define pvPsnk          (cppvPsnkb - whichAlign)
 988 #define pvPsrc          (cppvPsrcb - whichAlign)
 989 #define pvFsnk          (cppvFsnkb - whichAlign)
 990 #define pvFsrc          (cppvFsrcb - whichAlign)
 991 #define pvNoModSnk      (cppvNoModSnkb - whichAlign)
 992 #define pvNoRefSrc      (cppvNoRefSrcb - whichAlign)
 993 #define pvKmap          (cppvKmapb - whichAlign)
 994 #define pvNoCache       cr2_lt
 995
 996                 .align  5
 997                 .globl  EXT(copypv)
 998
 999 LEXT(copypv)
1000         mfsprg  r10,2                                                   // get feature flags
1001         mtcrf   0x02,r10                                                // we need to test pf64Bit
1002         bt++    pf64Bitb,copypv_64                              // skip if 64-bit processor (only they take hint)
1003
1004         b               EXT(hw_copypv_32)                               // carry on with 32-bit copypv
1005
1006 // Push a 32-bit ABI-compliant stack frame and preserve all non-volatiles that we'll clobber.
1007 copypv_64:
1008                 mfsprg  r9,1                                                    // get current thread
1009                 stwu    r1,-(FM_ALIGN((31-26+11)*4)+FM_SIZE)(r1)
1010                                                                                                 // allocate stack frame and link it
1011                 mflr    r0                                                              // get return address
1012                 mfcr    r10                                                             // get cr2 and cr3
1013                 lwz             r12,THREAD_RECOVER(r9)                  // get error callback
1014                 stw             r26,FM_ARG0+0x00(r1)                    // save non-volatile r26
1015                 stw             r27,FM_ARG0+0x04(r1)                    // save non-volatile r27
1016                 stw             r28,FM_ARG0+0x08(r1)                    // save non-volatile r28
1017                 stw             r29,FM_ARG0+0x0C(r1)                    // save non-volatile r29
1018                 stw             r30,FM_ARG0+0x10(r1)                    // save non-volatile r30
1019                 stw             r31,FM_ARG0+0x14(r1)                    // save non-volatile r31
1020                 stw             r12,FM_ARG0+0x20(r1)                    // save error callback
1021                 stw             r0,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_LR_SAVE)(r1)
1022                                                                                                 // save return address
1023                 stw             r10,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_CR_SAVE)(r1)
1024                                                                                                 // save non-volatile cr2 and cr3
1025
1026 // Non-volatile register usage in this routine is:
1027 //      r26: saved msr image
1028 //      r27: current pmap_t / virtual source address
1029 //      r28: destination virtual address
1030 //      r29: source address
1031 //      r30: destination address
1032 //      r31: byte count to copy
1033 //      cr2/3: parameter 'which' bits
1034
1035                 rlwinm  r8,r8,whichAlign,whichMask              // align and mask which bits
1036                 mr              r31,r7                                                  // copy size to somewhere non-volatile
1037                 mtcrf   0x20,r8                                                 // insert which bits into cr2 and cr3
1038                 mtcrf   0x10,r8                                                 // insert which bits into cr2 and cr3
1039                 rlwinm  r29,r3,0,1,0                                    // form source address high-order bits
1040                 rlwinm  r30,r5,0,1,0                                    // form destination address high-order bits
1041                 rlwimi  r29,r4,0,0,31                                   // form source address low-order bits
1042                 rlwimi  r30,r6,0,0,31                                   // form destination address low-order bits
1043                 crand   cr7_lt,pvPsnk,pvPsrc                    // are both operand addresses physical?
1044                 cntlzw  r0,r31                                                  // count leading zeroes in byte count
1045                 cror    cr7_eq,pvPsnk,pvPsrc                    // cr7_eq <- source or destination is physical
1046                 bf--    cr7_eq,copypv_einval                    // both operands may not be virtual
1047                 cmplwi  r0,4                                                    // byte count greater than or equal 256M (2**28)?
1048                 blt--   copypv_einval                                   // byte count too big, give EINVAL
1049                 cmplwi  r31,0                                                   // byte count zero?
1050                 beq--   copypv_zero                                             // early out
1051                 bt              cr7_lt,copypv_phys                              // both operand addresses are physical
1052                 mr              r28,r30                                                 // assume destination is virtual
1053                 bf              pvPsnk,copypv_dv                                // is destination virtual?
1054                 mr              r28,r29                                                 // no, so source must be virtual
1055 copypv_dv:
1056                 lis             r27,ha16(EXT(kernel_pmap))              // get kernel's pmap_t *, high-order
1057                 lwz             r27,lo16(EXT(kernel_pmap))(r27) // get kernel's pmap_t
1058                 bt              pvKmap,copypv_kern                              // virtual address in kernel map?
1059                 lwz             r3,ACT_VMMAP(r9)                                // get user's vm_map *
1060                 rldicl  r4,r28,32,32                                    // r4, r5 <- addr64_t virtual address
1061                 rldicl  r5,r28,0,32
1062                 std             r29,FM_ARG0+0x30(r1)                    // preserve 64-bit r29 across 32-bit call
1063                 std             r30,FM_ARG0+0x38(r1)                    // preserve 64-bit r30 across 32-bit call
1064                 bl              EXT(MapUserMemoryWindow)                // map slice of user space into kernel space
1065                 ld              r29,FM_ARG0+0x30(r1)                    // restore 64-bit r29
1066                 ld              r30,FM_ARG0+0x38(r1)                    // restore 64-bit r30
1067                 rlwinm  r28,r3,0,1,0                                    // convert relocated addr64_t virtual address
1068                 rlwimi  r28,r4,0,0,31                                   //  into a single 64-bit scalar
1069 copypv_kern:
1070
1071 // Since we'll be accessing the virtual operand with data-relocation on, we won't need to
1072 // update the referenced and changed bits manually after the copy. So, force the appropriate
1073 // flag bit on for the virtual operand.
1074                 crorc   pvNoModSnk,pvNoModSnk,pvPsnk    // for virtual dest, let hardware do ref/chg bits
1075                 crorc   pvNoRefSrc,pvNoRefSrc,pvPsrc    // for virtual source, let hardware do ref bit
1076
1077 // We'll be finding a mapping and looking at, so we need to disable 'rupts.
1078                 lis             r0,hi16(MASK(MSR_VEC))                  // get vector mask
1079                 ori             r0,r0,lo16(MASK(MSR_FP))                // insert fp mask
1080                 mfmsr   r26                                                             // save current msr
1081                 andc    r26,r26,r0                                              // turn off VEC and FP in saved copy
1082                 ori             r0,r0,lo16(MASK(MSR_EE))                // add EE to our mask
1083                 andc    r0,r26,r0                                               // disable EE in our new msr image
1084                 mtmsrd  r0                                                              // introduce new msr image
1085
1086 // We're now holding the virtual operand's pmap_t in r27 and its virtual address in r28. We now
1087 // try to find a mapping corresponding to this address in order to determine whether the address
1088 // is cacheable. If we don't find a mapping, we can safely assume that the operand is cacheable
1089 // (a non-cacheable operand must be a block mapping, which will always exist); otherwise, we
1090 // examine the mapping's caching-inhibited bit.
1091                 mr              r3,r27                                                  // r3 <- pmap_t pmap
1092                 rldicl  r4,r28,32,32                                    // r4, r5 <- addr64_t va
1093                 rldicl  r5,r28,0,32
1094                 la              r6,FM_ARG0+0x18(r1)                             // r6 <- addr64_t *nextva
1095                 li              r7,1                                                    // r7 <- int full, search nested mappings
1096                 std             r26,FM_ARG0+0x28(r1)                    // preserve 64-bit r26 across 32-bit calls
1097                 std             r28,FM_ARG0+0x30(r1)                    // preserve 64-bit r28 across 32-bit calls
1098                 std             r29,FM_ARG0+0x38(r1)                    // preserve 64-bit r29 across 32-bit calls
1099                 std             r30,FM_ARG0+0x40(r1)                    // preserve 64-bit r30 across 32-bit calls
1100                 bl              EXT(mapping_find)                               // find mapping for virtual operand
1101                 mr.             r3,r3                                                   // did we find it?
1102                 beq             copypv_nomapping                                // nope, so we'll assume it's cacheable
1103                 lwz             r4,mpVAddr+4(r3)                                // get low half of virtual addr for hw flags
1104                 rlwinm. r4,r4,0,mpIb-32,mpIb-32                 // caching-inhibited bit set?
1105                 crnot   pvNoCache,cr0_eq                                // if it is, use bcopy_nc
1106                 bl              EXT(mapping_drop_busy)                  // drop busy on the mapping
1107 copypv_nomapping:
1108                 ld              r26,FM_ARG0+0x28(r1)                    // restore 64-bit r26
1109                 ld              r28,FM_ARG0+0x30(r1)                    // restore 64-bit r28
1110                 ld              r29,FM_ARG0+0x38(r1)                    // restore 64-bit r29
1111                 ld              r30,FM_ARG0+0x40(r1)                    // restore 64-bit r30
1112                 mtmsrd  r26                                                             // restore msr to it's previous state
1113
1114 // Set both the source and destination virtual addresses to the virtual operand's address --
1115 // we'll overlay one of them with the physical operand's address.
1116                 mr              r27,r28                                                 // make virtual operand BOTH source AND destination
1117
1118 // Now we're ready to relocate the physical operand address(es) into the physical memory window.
1119 // Recall that we've mapped physical memory (including the I/O hole) into the kernel's address
1120 // space somewhere at or over the 2**32 line. If one or both of the operands are in the I/O hole,
1121 // we'll set the pvNoCache flag, forcing use of non-caching bcopy_nc() to do the copy.
1122 copypv_phys:
1123                 ld              r6,lgPMWvaddr(0)                                // get physical memory window virtual address
1124                 bf              pvPsnk,copypv_dstvirt                   // is destination address virtual?
1125                 cntlzd  r4,r30                                                  // count leading zeros in destination address
1126                 cmplwi  r4,32                                                   // if it's 32, then it's in the I/O hole (2**30 to 2**31-1)
1127                 cror    pvNoCache,cr0_eq,pvNoCache              // use bcopy_nc for I/O hole locations
1128                 add             r28,r30,r6                                              // relocate physical destination into physical window
1129 copypv_dstvirt:
1130                 bf              pvPsrc,copypv_srcvirt                   // is source address virtual?
1131                 cntlzd  r4,r29                                                  // count leading zeros in source address
1132                 cmplwi  r4,32                                                   // if it's 32, then it's in the I/O hole (2**30 to 2**31-1)
1133                 cror    pvNoCache,cr0_eq,pvNoCache              // use bcopy_nc for I/O hole locations
1134                 add             r27,r29,r6                                              // relocate physical source into physical window
1135 copypv_srcvirt:
1136
1137 // Once the copy is under way (bcopy or bcopy_nc), we will want to get control if anything
1138 // funny happens during the copy. So, we set a pointer to our error handler in the per-thread
1139 // control block.
1140                 mfsprg  r8,1                                                    // get current threads stuff
1141                 lis             r3,hi16(copypv_error)                   // get our error callback's address, high
1142                 ori             r3,r3,lo16(copypv_error)                // get our error callback's address, low
1143                 stw             r3,THREAD_RECOVER(r8)                   // set our error callback
1144
1145 // Since our physical operand(s) are relocated at or above the 2**32 line, we must enter
1146 // 64-bit mode.
1147                 li              r0,1                                                    // get a handy one bit
1148                 mfmsr   r3                                                              // get current msr
1149                 rldimi  r3,r0,63,MSR_SF_BIT                             // set SF bit on in our msr copy
1150                 mtmsrd  r3                                                              // enter 64-bit mode
1151
1152 // If requested, flush data cache
1153 // Note that we don't flush, the code is being saved "just in case".
1154 #if 0
1155                 bf              pvFsrc,copypv_nfs                               // do we flush the source?
1156                 rldicl  r3,r27,32,32                                    // r3, r4 <- addr64_t source virtual address
1157                 rldicl  r4,r27,0,32
1158                 mr              r5,r31                                                  // r5 <- count (in bytes)
1159                 li              r6,0                                                    // r6 <- boolean phys (false, not physical)
1160                 bl              EXT(flush_dcache)                               // flush the source operand
1161 copypv_nfs:
1162                 bf              pvFsnk,copypv_nfdx                              // do we flush the destination?
1163                 rldicl  r3,r28,32,32                                    // r3, r4 <- addr64_t destination virtual address
1164                 rldicl  r4,r28,0,32
1165                 mr              r5,r31                                                  // r5 <- count (in bytes)
1166                 li              r6,0                                                    // r6 <- boolean phys (false, not physical)
1167                 bl              EXT(flush_dcache)                               // flush the destination operand
1168 copypv_nfdx:
1169 #endif
1170
1171 // Call bcopy or bcopy_nc to perform the copy.
1172                 mr              r3,r27                                                  // r3 <- source virtual address
1173                 mr              r4,r28                                                  // r4 <- destination virtual address
1174                 mr              r5,r31                                                  // r5 <- bytes to copy
1175                 bt              pvNoCache,copypv_nc                             // take non-caching route
1176                 bl              EXT(bcopy)                                              // call bcopy to do the copying
1177                 b               copypv_copydone
1178 copypv_nc:
1179                 bl              EXT(bcopy_nc)                                   // call bcopy_nc to do the copying
1180 copypv_copydone:
1181
1182 // If requested, flush data cache
1183 // Note that we don't flush, the code is being saved "just in case".
1184 #if 0
1185                 bf              pvFsrc,copypv_nfsx                              // do we flush the source?
1186                 rldicl  r3,r27,32,32                                    // r3, r4 <- addr64_t source virtual address
1187                 rldicl  r4,r27,0,32
1188                 mr              r5,r31                                                  // r5 <- count (in bytes)
1189                 li              r6,0                                                    // r6 <- boolean phys (false, not physical)
1190                 bl              EXT(flush_dcache)                               // flush the source operand
1191 copypv_nfsx:
1192                 bf              pvFsnk,copypv_nfd                               // do we flush the destination?
1193                 rldicl  r3,r28,32,32                                    // r3, r4 <- addr64_t destination virtual address
1194                 rldicl  r4,r28,0,32
1195                 mr              r5,r31                                                  // r5 <- count (in bytes)
1196                 li              r6,0                                                    // r6 <- boolean phys (false, not physical)
1197                 bl              EXT(flush_dcache)                               // flush the destination operand
1198 copypv_nfd:
1199 #endif
1200
1201 // Leave 64-bit mode.
1202                 mfmsr   r3                                                              // get current msr
1203                 rldicl  r3,r3,0,MSR_SF_BIT+1                    // clear SF bit in our copy
1204                 mtmsrd  r3                                                              // leave 64-bit mode
1205
1206 // If requested, set ref/chg on source/dest physical operand(s). It is possible that copy is
1207 // from/to a RAM disk situated outside of mapped physical RAM, so we check each page by calling
1208 // mapping_phys_lookup() before we try to set its ref/chg bits; otherwise, we might panic.
1209 // Note that this code is page-size sensitive, so it should probably be a part of our low-level
1210 // code in hw_vm.s.
1211                 bt              pvNoModSnk,copypv_nomod                 // skip destination update if not requested
1212                 std             r29,FM_ARG0+0x30(r1)                    // preserve 64-bit r29 across 32-bit calls
1213                 li              r26,1                                                   // r26 <- 4K-page count
1214                 mr              r27,r31                                                 // r27 <- byte count
1215                 rlwinm  r3,r30,0,20,31                                  // does destination cross a page boundary?
1216                 subfic  r3,r3,4096                                              //
1217                 cmplw   r3,r27                                                  //
1218                 blt             copypv_modnox                                   // skip if not crossing case
1219                 subf    r27,r3,r27                                              // r27 <- byte count less initial fragment
1220                 addi    r26,r26,1                                               // increment page count
1221 copypv_modnox:
1222                 srdi    r3,r27,12                                               // pages to update (not including crosser)
1223                 add             r26,r26,r3                                              // add in crosser
1224                 srdi    r27,r30,12                                              // r27 <- destination page number
1225 copypv_modloop:
1226                 mr              r3,r27                                                  // r3 <- destination page number
1227                 la              r4,FM_ARG0+0x18(r1)                             // r4 <- unsigned int *pindex
1228                 bl              EXT(mapping_phys_lookup)                // see if page is really there
1229                 mr.             r3,r3                                                   // is it?
1230                 beq--   copypv_modend                                   // nope, break out of modify loop
1231                 mr              r3,r27                                                  // r3 <- destination page number
1232                 bl              EXT(mapping_set_mod)                    // set page changed status
1233                 subi    r26,r26,1                                               // decrement page count
1234                 cmpwi   r26,0                                                   // done yet?
1235                 bgt             copypv_modloop                                  // nope, iterate
1236 copypv_modend:
1237                 ld              r29,FM_ARG0+0x30(r1)                    // restore 64-bit r29
1238 copypv_nomod:
1239                 bt              pvNoRefSrc,copypv_done                  // skip source update if not requested
1240 copypv_debugref:
1241                 li              r26,1                                                   // r26 <- 4K-page count
1242                 mr              r27,r31                                                 // r27 <- byte count
1243                 rlwinm  r3,r29,0,20,31                                  // does source cross a page boundary?
1244                 subfic  r3,r3,4096                                              //
1245                 cmplw   r3,r27                                                  //
1246                 blt             copypv_refnox                                   // skip if not crossing case
1247                 subf    r27,r3,r27                                              // r27 <- byte count less initial fragment
1248                 addi    r26,r26,1                                               // increment page count
1249 copypv_refnox:
1250                 srdi    r3,r27,12                                               // pages to update (not including crosser)
1251                 add             r26,r26,r3                                              // add in crosser
1252                 srdi    r27,r29,12                                              // r27 <- source page number
1253 copypv_refloop:
1254                 mr              r3,r27                                                  // r3 <- source page number
1255                 la              r4,FM_ARG0+0x18(r1)                             // r4 <- unsigned int *pindex
1256                 bl              EXT(mapping_phys_lookup)                // see if page is really there
1257                 mr.             r3,r3                                                   // is it?
1258                 beq--   copypv_done                                             // nope, break out of modify loop
1259                 mr              r3,r27                                                  // r3 <- source  page number
1260                 bl              EXT(mapping_set_ref)                    // set page referenced status
1261                 subi    r26,r26,1                                               // decrement page count
1262                 cmpwi   r26,0                                                   // done yet?
1263                 bgt             copypv_refloop                                  // nope, iterate
1264
1265 // Return, indicating success.
1266 copypv_done:
1267 copypv_zero:
1268                 li              r3,0                                                    // our efforts were crowned with success
1269
1270 // Pop frame, restore caller's non-volatiles, clear recovery routine pointer.
1271 copypv_return:
1272                 mfsprg  r9,1                                                    // get current threads stuff
1273                 lwz             r0,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_LR_SAVE)(r1)
1274                                                                                                 // get return address
1275                 lwz             r4,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_CR_SAVE)(r1)
1276                                                                                                 // get non-volatile cr2 and cr3
1277                 lwz             r26,FM_ARG0+0x00(r1)                    // restore non-volatile r26
1278                 lwz             r27,FM_ARG0+0x04(r1)                    // restore non-volatile r27
1279                 mtlr    r0                                                              // restore return address
1280                 lwz             r28,FM_ARG0+0x08(r1)                    // restore non-volatile r28
1281                 mtcrf   0x20,r4                                                 // restore non-volatile cr2
1282                 mtcrf   0x10,r4                                                 // restore non-volatile cr3
1283                 lwz             r11,FM_ARG0+0x20(r1)                    // save error callback
1284                 lwz             r29,FM_ARG0+0x0C(r1)                    // restore non-volatile r29
1285                 lwz             r30,FM_ARG0+0x10(r1)                    // restore non-volatile r30
1286                 lwz             r31,FM_ARG0+0x14(r1)                    // restore non-volatile r31
1287                 stw             r11,THREAD_RECOVER(r9)                  // restore our error callback
1288                 lwz             r1,0(r1)                                                // release stack frame
1289
1290                 blr                                                                             // y'all come back now
1291
1292 // Invalid argument handler.
1293 copypv_einval:
1294                 li              r3,EINVAL                                               // invalid argument
1295                 b               copypv_return                                   // return
1296
1297 // Error encountered during bcopy or bcopy_nc.
1298 copypv_error:
1299                 mfmsr   r3                                                              // get current msr
1300                 rldicl  r3,r3,0,MSR_SF_BIT+1                    // clear SF bit in our copy
1301                 mtmsrd  r3                                                              // leave 64-bit mode
1302                 li              r3,EFAULT                                               // it was all his fault
1303                 b               copypv_return                                   // return