]> git.saurik.com Git - apple/xnu.git/blob - osfmk/ppc/Emulate64.s
xnu-1504.9.26.tar.gz
[apple/xnu.git] / osfmk / ppc / Emulate64.s
1 /*
2 * Copyright (c) 2002-2007 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /* Emulate64.s
30 *
31 * Software emulation of instructions not handled in hw, on 64-bit machines.
32 */
33
34 #include <sys/appleapiopts.h>
35 #include <ppc/asm.h>
36 #include <ppc/proc_reg.h>
37 #include <ppc/exception.h>
38 #include <mach/machine/vm_param.h>
39 #include <ppc/cpu_capabilities.h>
40 #include <assym.s>
41
42 // CR bit set if the instruction is an "update" form (LFDU, STWU, etc):
43 #define kUpdate 25
44
45 // CR bit set if interrupt occured in trace mode (ie, MSR_SE_BIT):
46 #define kTrace 8
47
48 // CR bit set if notification on alignment interrupts is requested (notifyUnalignbit in spcFlags):
49 #define kNotify 9
50
51 // CR bit distinguishes between alignment and program exceptions:
52 #define kAlignment 10
53
54
55
56 // *************************************
57 // * P R O G R A M I N T E R R U P T *
58 // *************************************
59 //
60 // These are floating pt exceptions, illegal instructions, privileged mode violations,
61 // and traps. All we're interested in at this low level is illegal instructions.
62 // The ones we "emulate" are:
63 // DCBA, which is not implemented in the IBM 970. The emulation is to ignore it,
64 // as it is just a hint.
65 // MCRXR, which is not implemented on the IBM 970, but is in the PPC ISA.
66 //
67 // Additionally, to facilitate debugging the alignment handler, we recognize a special
68 // diagnostic mode that is used to simulate alignment exceptions. When in this mode,
69 // if the instruction has opcode==0 and the extended opcode is one of the X-form
70 // instructions that can take an alignment interrupt, then we change the opcode to
71 // 31 and pretend it got an alignment interrupt. This exercises paths that
72 // are hard to drive or perhaps never driven on this particular CPU.
73
74 .text
75 .globl EXT(Emulate64)
76 .align 5
77 LEXT(Emulate64)
78 crclr kAlignment // not an alignment exception
79 b a64AlignAssistJoin // join alignment handler
80
81
82 // Return from alignment handler with all the regs loaded for opcode emulation.
83
84 a64HandleProgramInt:
85 rlwinm. r0,r29,0,SRR1_PRG_ILL_INS_BIT,SRR1_PRG_ILL_INS_BIT // illegal opcode?
86 beq a64PassAlong // No, must have been trap or priv violation etc
87 rlwinm r3,r20,6,26,31 // right justify opcode field (bits 0-5)
88 rlwinm r4,r20,31,22,31 // right justify extended opcode field (bits 21-30)
89 cmpwi cr0,r3,31 // X-form?
90 cmpwi cr1,r4,758 // DCBA?
91 cmpwi cr4,r4,512 // MCRXR?
92 crand cr1_eq,cr0_eq,cr1_eq // merge the two tests for DCBA
93 crand cr4_eq,cr0_eq,cr4_eq // and for MCRXR
94 beq++ cr1_eq,a64ExitEm // was DCBA, so ignore
95 bne-- cr4_eq,a64NotEmulated // skip if not MCRXR
96
97 // Was MCRXR, so emulate.
98
99 ld r3,savexer(r13) // get the XER
100 lwz r4,savecr(r13) // and the CR
101 rlwinm r5,r20,11,27,29 // get (CR# * 4) from instruction
102 rlwinm r6,r3,0,4,31 // zero XER[32-35] (also XER[0-31])
103 sld r4,r4,r5 // move target CR field to bits 32-35
104 rlwimi r4,r3,0,0,3 // move XER[32-35] into CR field
105 stw r6,savexer+4(r13) // update XER
106 srd r4,r4,r5 // re-position CR
107 stw r4,savecr(r13) // update CR
108 b a64ExitEm // done
109
110 // Not an opcode we normally emulate. If in special diagnostic mode and opcode=0,
111 // emulate as an alignment exception. This special case is for test software.
112
113 a64NotEmulated:
114 lwz r30,dgFlags(0) // Get the flags
115 rlwinm. r0,r30,0,enaDiagEMb,enaDiagEMb // Do we want to try to emulate something?
116 beq++ a64PassAlong // No emulation allowed
117 cmpwi r3,0 // opcode==0 ?
118 bne a64PassAlong // not the special case
119 oris r20,r20,0x7C00 // change opcode to 31
120 crset kAlignment // say we took alignment exception
121 rlwinm r5,r4,0,26+1,26-1 // mask Update bit (32) out of extended opcode
122 rlwinm r5,r5,0,0,31 // Clean out leftover junk from rlwinm
123
124 cmpwi r4,1014 // dcbz/dcbz128 ?
125 crmove cr1_eq,cr0_eq
126 cmpwi r5,21 // ldx/ldux ?
127 cror cr1_eq,cr0_eq,cr1_eq
128 cmpwi r5,599 // lfdx/lfdux ?
129 cror cr1_eq,cr0_eq,cr1_eq
130 cmpwi r5,535 // lfsx/lfsux ?
131 cror cr1_eq,cr0_eq,cr1_eq
132 cmpwi r5,343 // lhax/lhaux ?
133 cror cr1_eq,cr0_eq,cr1_eq
134 cmpwi r4,790 // lhbrx ?
135 cror cr1_eq,cr0_eq,cr1_eq
136 cmpwi r5,279 // lhzx/lhzux ?
137 cror cr1_eq,cr0_eq,cr1_eq
138 cmpwi r4,597 // lswi ?
139 cror cr1_eq,cr0_eq,cr1_eq
140 cmpwi r4,533 // lswx ?
141 cror cr1_eq,cr0_eq,cr1_eq
142 cmpwi r5,341 // lwax/lwaux ?
143 cror cr1_eq,cr0_eq,cr1_eq
144 cmpwi r4,534 // lwbrx ?
145 cror cr1_eq,cr0_eq,cr1_eq
146 cmpwi r5,23 // lwz/lwzx ?
147 cror cr1_eq,cr0_eq,cr1_eq
148 cmpwi r5,149 // stdx/stdux ?
149 cror cr1_eq,cr0_eq,cr1_eq
150 cmpwi r5,727 // stfdx/stfdux ?
151 cror cr1_eq,cr0_eq,cr1_eq
152 cmpwi r4,983 // stfiwx ?
153 cror cr1_eq,cr0_eq,cr1_eq
154 cmpwi r5,663 // stfsx/stfsux ?
155 cror cr1_eq,cr0_eq,cr1_eq
156 cmpwi r4,918 // sthbrx ?
157 cror cr1_eq,cr0_eq,cr1_eq
158 cmpwi r5,407 // sthx/sthux ?
159 cror cr1_eq,cr0_eq,cr1_eq
160 cmpwi r4,725 // stswi ?
161 cror cr1_eq,cr0_eq,cr1_eq
162 cmpwi r4,661 // stswx ?
163 cror cr1_eq,cr0_eq,cr1_eq
164 cmpwi r4,662 // stwbrx ?
165 cror cr1_eq,cr0_eq,cr1_eq
166 cmpwi r5,151 // stwx/stwux ?
167 cror cr1_eq,cr0_eq,cr1_eq
168
169 beq++ cr1,a64GotInstruction // it was one of the X-forms we handle
170 crclr kAlignment // revert to program interrupt
171 b a64PassAlong // not recognized extended opcode
172
173
174 // *****************************************
175 // * A L I G N M E N T I N T E R R U P T *
176 // *****************************************
177 //
178 // We get here in exception context, ie with interrupts disabled, translation off, and
179 // in 64-bit mode, with:
180 // r13 = save-area pointer, with general context already saved in it
181 // cr6 = feature flags
182 // We preserve r13 and cr6. Other GPRs and CRs, the LR and CTR are used.
183 //
184 // Current 64-bit processors (GPUL) handle almost all misaligned operations in hardware,
185 // so this routine usually isn't called very often. Only floating pt ops that cross a page
186 // boundary and are not word aligned, and LMW/STMW can take exceptions to cacheable memory.
187 // However, in contrast to G3 and G4, any misaligned load/store will get an alignment
188 // interrupt on uncached memory.
189 //
190 // We always emulate scalar ops with a series of byte load/stores. Doing so is no slower
191 // than LWZ/STW in cases where a scalar op gets an alignment exception.
192 //
193 // This routine supports all legal permutations of alignment interrupts occuring in user or
194 // supervisor mode, 32 or 64-bit addressing, and translation on or off. We do not emulate
195 // instructions that go past the end of an address space, such as "LHZ -1(0)"; we just pass
196 // along the alignment exception rather than wrap around to byte 0.
197 //
198 // First, check for a few special cases such as virtual machines, etc.
199
200 .globl EXT(AlignAssist64)
201 .align 5
202 LEXT(AlignAssist64)
203 crset kAlignment // mark as alignment interrupt
204
205 a64AlignAssistJoin: // join here from program interrupt handler
206 li r0,0 // Get a 0
207 mfsprg r31,0 // get the per_proc data ptr
208 mcrf cr3,cr6 // save feature flags here...
209 lwz r21,spcFlags(r31) // grab the special flags
210 ld r29,savesrr1(r13) // get the MSR etc at the fault
211 ld r28,savesrr0(r13) // get the EA of faulting instruction
212 stw r0,savemisc3(r13) // Assume we will handle this ok
213 mfmsr r26 // save MSR at entry
214 rlwinm. r0,r21,0,runningVMbit,runningVMbit // Are we running a VM?
215 lwz r19,dgFlags(0) // Get the diagnostics flags
216 bne-- a64PassAlong // yes, let the virtual machine monitor handle
217
218
219 // Set up the MSR shadow regs. We turn on FP in this routine, and usually set DR and RI
220 // when accessing user space (the SLB is still set up with all the user space translations.)
221 // However, if the interrupt occured in the kernel with DR off, we keep it off while
222 // accessing the "target" address space. If we set DR to access the target space, we also
223 // set RI. The RI bit tells the exception handlers to clear cr0 beq and return if we get an
224 // exception accessing the user address space. We are careful to test cr0 beq after every such
225 // access. We keep the following "shadows" of the MSR in global regs across this code:
226 // r25 = MSR at entry, plus FP and probably DR and RI (used to access target space)
227 // r26 = MSR at entry
228 // r27 = free
229 // r29 = SRR1 (ie, MSR at interrupt)
230 // Note that EE and IR are always off, and SF is always on in this code.
231
232 rlwinm r3,r29,31,MSR_DR_BIT,MSR_DR_BIT // Move instruction translate bit to DR
233 rlwimi r3,r3,32-MSR_RI_BIT+MSR_DR_BIT,MSR_RI_BIT,MSR_RI_BIT // if DR is now set, set RI too
234 or r25,r26,r3 // assemble MSR to use accessing target space
235
236
237 // Because the DSISR and DAR are either not set or are not to be trusted on some 64-bit
238 // processors on an alignment interrupt, we must fetch the faulting instruction ourselves,
239 // then decode/hash the opcode and reconstruct the EA manually.
240
241 mtmsr r25 // turn on FP and (if it was on at fault) DR and RI
242 isync // wait for it to happen
243 cmpw r0,r0 // turn on beq so we can check for DSIs
244 lwz r20,0(r28) // fetch faulting instruction, probably with DR on
245 bne-- a64RedriveAsISI // got a DSI trying to fetch it, pretend it was an ISI
246 mtmsr r26 // turn DR back off
247 isync // wait for it to happen
248
249
250 // Set a few flags while we wait for the faulting instruction to arrive from cache.
251
252 rlwinm. r0,r29,0,MSR_SE_BIT,MSR_SE_BIT // Were we single stepping?
253 stw r20,savemisc2(r13) // Save the instruction image in case we notify
254 crnot kTrace,cr0_eq
255 rlwinm. r0,r19,0,enaNotifyEMb,enaNotifyEMb // Should we notify?
256 crnot kNotify,cr0_eq
257
258 rlwinm r3,r29,0,MSR_DR_BIT,MSR_DR_BIT // was data translation on at fault?
259 rlwimi r3,r3,32-MSR_RI_BIT+MSR_DR_BIT,MSR_RI_BIT,MSR_RI_BIT // if DR is now set, set RI too
260 or r25,r26,r3 // assemble MSR to use accessing target space
261
262
263 // Hash the intruction into a 5-bit value "AAAAB" used to index the branch table, and a
264 // 1-bit kUpdate flag, as follows:
265 // ¥ for X-form instructions (with primary opcode 31):
266 // the "AAAA" bits are bits 21-24 of the instruction
267 // the "B" bit is the XOR of bits 29 and 30
268 // the update bit is instruction bit 25
269 // ¥ for D and DS-form instructions (actually, any primary opcode except 31):
270 // the "AAAA" bits are bits 1-4 of the instruction
271 // the "B" bit is 0
272 // the update bit is instruction bit 5
273 //
274 // Just for fun (and perhaps a little speed on deep-pipe machines), we compute the hash,
275 // update flag, and EA without branches and with ipc >= 2.
276 //
277 // When we "bctr" to the opcode-specific reoutine, the following are all set up:
278 // MSR = EE and IR off, SF and FP on
279 // r12 = full 64-bit EA (r17 is clamped EA)
280 // r13 = save-area pointer (physical)
281 // r14 = ptr to saver0 in save-area (ie, to base of GPRs)
282 // r15 = 0x00000000FFFFFFFF if 32-bit mode fault, 0xFFFFFFFFFFFFFFFF if 64
283 // r16 = RA * 8 (ie, reg# not reg value)
284 // r17 = EA, clamped to 32 bits if 32-bit mode fault (see also r12)
285 // r18 = (RA|0) (reg value)
286 // r19 = -1 if X-form, 0 if D-form
287 // r20 = faulting instruction
288 // r21 = RT * 8 (ie, reg# not reg value)
289 // r22 = addr(aaFPopTable)+(RT*32), ie ptr to floating pt table for target register
290 // r25 = MSR at entrance, probably with DR and RI set (for access to target space)
291 // r26 = MSR at entrance
292 // r27 = free
293 // r28 = SRR0 (ie, EA of faulting instruction)
294 // r29 = SRR1 (ie, MSR at fault)
295 // r30 = scratch, usually user data
296 // r31 = per-proc pointer
297 // cr2 = kTrace, kNotify, and kAlignment flags
298 // cr3 = saved copy of feature flags used in lowmem vector code
299 // cr6 = bits 24-27 of CR are bits 24-27 of opcode if X-form, or bits 4-5 and 00 if D-form
300 // bit 25 is the kUpdate flag, set for update form instructions
301 // cr7 = bits 28-31 of CR are bits 28-31 of opcode if X-form, or 0 if D-form
302
303 a64GotInstruction: // here from program interrupt with instruction in r20
304 rlwinm r21,r20,6+6,20,25 // move the primary opcode (bits 0-6) to bits 20-25
305 la r14,saver0(r13) // r14 <- base address of GPR registers
306 xori r19,r21,0x07C0 // iff primary opcode is 31, set r19 to 0
307 rlwinm r16,r20,16+3,24,28 // r16 <- RA*8
308 subi r19,r19,1 // set bit 0 iff X-form (ie, if primary opcode is 31)
309 rlwinm r17,r20,21+3,24,28 // r17 <- RB*8 (if X-form)
310 sradi r19,r19,63 // r19 <- -1 if X-form, 0 if D-form
311 extsh r22,r20 // r22 <- displacement (if D-form)
312
313 ldx r23,r14,r17 // get (RB), if any
314 and r15,r20,r19 // instruction if X, 0 if D
315 andc r17,r21,r19 // primary opcode in bits 20-25 if D, 0 if X
316 ldx r18,r14,r16 // get (RA)
317 subi r24,r16,1 // set bit 0 iff RA==0
318 or r21,r15,r17 // r21 <- instruction if X, or bits 0-5 in bits 20-25 if D
319 sradi r24,r24,63 // r24 <- -1 if RA==0, 0 otherwise
320 rlwinm r17,r21,32-4,25,28 // shift opcode bits 21-24 to 25-28 (hash "AAAA" bits)
321 lis r10,ha16(a64BranchTable) // start to build up branch table address
322 rlwimi r17,r21,0,29,29 // move opcode bit 29 into hash as start of "B" bit
323 rlwinm r30,r21,1,29,29 // position opcode bit 30 in position 29
324 and r12,r23,r19 // RB if X-form, 0 if D-form
325 andc r11,r22,r19 // 0 if X-form, sign extended displacement if D-form
326 xor r17,r17,r30 // bit 29 ("B") of hash is xor(bit29,bit30)
327 addi r10,r10,lo16(a64BranchTable)
328 or r12,r12,r11 // r12 <- (RB) or displacement, as appropriate
329 lwzx r30,r10,r17 // get address from branch table
330 mtcrf 0x01,r21 // move opcode bits 28-31 to CR7
331 sradi r15,r29,32 // propogate SF bit from SRR1 (MSR_SF, which is bit 0)
332 andc r18,r18,r24 // r18 <- (RA|0)
333 mtcrf 0x02,r21 // move opcode bits 24-27 to CR6 (kUpdate is bit 25)
334 add r12,r18,r12 // r12 <- 64-bit EA
335 mtctr r30 // set up branch address
336
337 oris r15,r15,0xFFFF // start to fill low word of r15 with 1s
338 rlwinm r21,r20,11+3,24,28 // r21 <- RT * 8
339 lis r22,ha16(EXT(aaFPopTable)) // start to compute address of floating pt table
340 ori r15,r15,0xFFFF // now bits 32-63 of r15 are 1s
341 addi r22,r22,lo16(EXT(aaFPopTable))
342 and r17,r12,r15 // clamp EA to 32 bits if fault occured in 32-bit mode
343 rlwimi r22,r21,2,22,26 // move RT into aaFPopTable address (which is 1KB aligned)
344
345 bf-- kAlignment,a64HandleProgramInt // return to Program Interrupt handler
346 bctr // if alignment interrupt, jump to opcode-specific routine
347
348
349 // Floating-pt load single (lfs[u], lfsx[u])
350
351 a64LfsLfsx:
352 bl a64Load4Bytes // get data in r30
353 mtctr r22 // set up address of "lfs fRT,emfp0(r31)"
354 stw r30,emfp0(r31) // put word here for aaFPopTable routine
355 bctrl // do the lfs
356 b a64UpdateCheck // update RA if necessary and exit
357
358
359 // Floating-pt store single (stfs[u], stfsx[u])
360
361 a64StfsStfsx:
362 ori r22,r22,8 // set dir==1 (ie, single store) in aaFPopTable
363 mtctr r22 // set up address of "stfs fRT,emfp0(r31)"
364 bctrl // execute the store into emfp0
365 lwz r30,emfp0(r31) // get the word
366 bl a64Store4Bytes // store r30 into user space
367 b a64UpdateCheck // update RA if necessary and exit
368
369
370 // Floating-pt store as integer word (stfiwx)
371
372 a64Stfiwx:
373 ori r22,r22,16+8 // set size=1, dir==1 (ie, double store) in aaFPopTable
374 mtctr r22 // set up FP register table address
375 bctrl // double precision store into emfp0
376 lwz r30,emfp0+4(r31) // get the low-order word
377 bl a64Store4Bytes // store r30 into user space
378 b a64Exit // successfully emulated
379
380
381 // Floating-pt load double (lfd[u], lfdx[u])
382
383 a64LfdLfdx:
384 ori r22,r22,16 // set Double bit in aaFPopTable address
385 bl a64Load8Bytes // get data in r30
386 mtctr r22 // set up address of "lfd fRT,emfp0(r31)"
387 std r30,emfp0(r31) // put doubleword here for aaFPopTable routine
388 bctrl // execute the load
389 b a64UpdateCheck // update RA if necessary and exit
390
391
392 // Floating-pt store double (stfd[u], stfdx[u])
393
394 a64StfdStfdx:
395 ori r22,r22,16+8 // set size=1, dir==1 (ie, double store) in aaFPopTable address
396 mtctr r22 // address of routine to stfd RT
397 bctrl // store into emfp0
398 ld r30,emfp0(r31) // get the doubleword
399 bl a64Store8Bytes // store r30 into user space
400 b a64UpdateCheck // update RA if necessary and exit
401
402
403 // Load halfword w 0-fill (lhz[u], lhzx[u])
404
405 a64LhzLhzx:
406 bl a64Load2Bytes // load into r30 from user space (w 0-fill)
407 stdx r30,r14,r21 // store into RT slot in register file
408 b a64UpdateCheck // update RA if necessary and exit
409
410
411 // Load halfword w sign fill (lha[u], lhax[u])
412
413 a64LhaLhax:
414 bl a64Load2Bytes // load into r30 from user space (w 0-fill)
415 extsh r30,r30 // sign-extend
416 stdx r30,r14,r21 // store into RT slot in register file
417 b a64UpdateCheck // update RA if necessary and exit
418
419
420 // Load halfword byte reversed (lhbrx)
421
422 a64Lhbrx:
423 bl a64Load2Bytes // load into r30 from user space (w 0-fill)
424 rlwinm r3,r30,8,16,23 // reverse bytes into r3
425 rlwimi r3,r30,24,24,31
426 stdx r3,r14,r21 // store into RT slot in register file
427 b a64Exit // successfully emulated
428
429
430 // Store halfword (sth[u], sthx[u])
431
432 a64SthSthx:
433 ldx r30,r14,r21 // get RT
434 bl a64Store2Bytes // store r30 into user space
435 b a64UpdateCheck // update RA if necessary and exit
436
437
438 // Store halfword byte reversed (sthbrx)
439
440 a64Sthbrx:
441 addi r21,r21,6 // point to low two bytes of RT
442 lhbrx r30,r14,r21 // load and reverse
443 bl a64Store2Bytes // store r30 into user space
444 b a64Exit // successfully emulated
445
446
447 // Load word w 0-fill (lwz[u], lwzx[u]), also lwarx.
448
449 a64LwzLwzxLwarx:
450 andc r3,r19,r20 // light bit 30 of r3 iff lwarx
451 andi. r0,r3,2 // is it lwarx?
452 bne-- a64PassAlong // yes, never try to emulate a lwarx
453 bl a64Load4Bytes // load 4 bytes from user space into r30 (0-filled)
454 stdx r30,r14,r21 // update register file
455 b a64UpdateCheck // update RA if necessary and exit
456
457
458 // Load word w sign fill (lwa, lwax[u])
459
460 a64Lwa:
461 crclr kUpdate // no update form of lwa (its a reserved encoding)
462 a64Lwax:
463 bl a64Load4Bytes // load 4 bytes from user space into r30 (0-filled)
464 extsw r30,r30 // sign extend
465 stdx r30,r14,r21 // update register file
466 b a64UpdateCheck // update RA if necessary and exit
467
468
469 // Load word byte reversed (lwbrx)
470
471 a64Lwbrx:
472 bl a64Load4Bytes // load 4 bytes from user space into r30 (0-filled)
473 rlwinm r3,r30,24,0,31 // flip bytes 1234 to 4123
474 rlwimi r3,r30,8,8,15 // r3 is now 4323
475 rlwimi r3,r30,8,24,31 // r3 is now 4321
476 stdx r3,r14,r21 // update register file
477 b a64Exit // successfully emulated
478
479
480 // Store word (stw[u], stwx[u])
481
482 a64StwStwx:
483 ldx r30,r14,r21 // get RT
484 bl a64Store4Bytes // store r30 into user space
485 b a64UpdateCheck // update RA if necessary and exit
486
487
488 // Store word byte reversed (stwbrx)
489
490 a64Stwbrx:
491 addi r21,r21,4 // point to low word of RT
492 lwbrx r30,r14,r21 // load and reverse
493 bl a64Store4Bytes // store r30 into user space
494 b a64Exit // successfully emulated
495
496
497 // Load doubleword (ld[u], ldx[u]), also lwa.
498
499 a64LdLwa: // these are DS form: ld=0, ldu=1, and lwa=2
500 mtcrf 0x01,r20 // move DS field to cr7
501 rlwinm r3,r20,0,30,31 // must adjust EA by subtracting DS field
502 sub r12,r12,r3 // subtract from full 64-bit EA
503 and r17,r12,r15 // then re-clamp to 32 bits if necessary
504 bt 30,a64Lwa // handle lwa
505 crmove kUpdate,31 // if opcode bit 31 is set, it is ldu so set update flag
506 a64Ldx:
507 bl a64Load8Bytes // load 8 bytes from user space into r30
508 stdx r30,r14,r21 // update register file
509 b a64UpdateCheck // update RA if necessary and exit
510
511
512 // Store doubleword (stdx[u], std[u], stwcx)
513
514 a64StdxStwcx:
515 bf-- 30,a64PassAlong // stwcx, so pass along alignment exception
516 b a64Stdx // was stdx
517 a64StdStfiwx: // if DS form: 0=std, 1=stdu, 2-3=undefined
518 bt 30,a64Stfiwx // handle stfiwx
519 rlwinm r3,r20,0,30,31 // must adjust EA by subtracting DS field
520 mtcrf 0x01,r20 // move DS field to cr7
521 sub r12,r12,r3 // subtract from full 64-bit EA
522 and r17,r12,r15 // then re-clamp to 32 bits if necessary
523 crmove kUpdate,31 // if DS==1, then it is update form
524 a64Stdx:
525 ldx r30,r14,r21 // get RT
526 bl a64Store8Bytes // store RT into user space
527 b a64UpdateCheck // update RA if necessary and exit
528
529
530 // Dcbz and Dcbz128 (bit 10 distinguishes the two forms)
531
532 a64DcbzDcbz128:
533 andis. r0,r20,0x0020 // bit 10 set?
534 li r3,0 // get a 0 to store
535 li r0,4 // assume 32-bit version, store 8 bytes 4x
536 rldicr r17,r17,0,63-5 // 32-byte align EA
537 li r4,_COMM_PAGE_BASE_ADDRESS
538 beq a64DcbzSetup // it was the 32-byte version
539 rldicr r17,r17,0,63-7 // zero low 7 bits of EA
540 li r0,16 // store 8 bytes 16x
541 a64DcbzSetup:
542 sub r4,r28,r4 // get instruction offset from start of commpage
543 and r4,r4,r15 // mask off high-order bits if 32-bit mode
544 cmpldi r4,_COMM_PAGE_AREA_USED // did fault occur in commpage area?
545 bge a64NotCommpage // not in commpage
546 rlwinm. r4,r29,0,MSR_PR_BIT,MSR_PR_BIT // did fault occur in user mode?
547 beq-- a64NotCommpage // do not zero cr7 if kernel got alignment exception
548 lwz r4,savecr(r13) // if we take a dcbz{128} in the commpage...
549 rlwinm r4,r4,0,0,27 // ...clear user's cr7...
550 stw r4,savecr(r13) // ...as a flag for commpage code
551 a64NotCommpage:
552 mtctr r0
553 cmpw r0,r0 // turn cr0 beq on so we can check for DSIs
554 mtmsr r25 // turn on DR and RI so we can address user space
555 isync // wait for it to happen
556 a64DcbzLoop:
557 std r3,0(r17) // store into user space
558 bne-- a64RedriveAsDSI
559 addi r17,r17,8
560 bdnz a64DcbzLoop
561
562 mtmsr r26 // restore MSR
563 isync // wait for it to happen
564 b a64Exit
565
566
567 // Load and store multiple (lmw, stmw), distinguished by bit 25
568
569 a64LmwStmw:
570 subfic r22,r21,32*8 // how many regs to load or store?
571 srwi r22,r22,1 // get bytes to load/store
572 bf 25,a64LoadMultiple // handle lmw
573 b a64StoreMultiple // it was stmw
574
575
576 // Load string word immediate (lswi)
577
578 a64Lswi:
579 rlwinm r22,r20,21,27,31 // get #bytes in r22
580 and r17,r18,r15 // recompute EA as (RA|0), and clamp
581 subi r3,r22,1 // r22==0?
582 rlwimi r22,r3,6,26,26 // map count of 0 to 32
583 b a64LoadMultiple
584
585
586 // Store string word immediate (stswi)
587
588 a64Stswi:
589 rlwinm r22,r20,21,27,31 // get #bytes in r22
590 and r17,r18,r15 // recompute EA as (RA|0), and clamp
591 subi r3,r22,1 // r22==0?
592 rlwimi r22,r3,6,26,26 // map count of 0 to 32
593 b a64StoreMultiple
594
595
596 // Load string word indexed (lswx), also lwbrx
597
598 a64LswxLwbrx:
599 bf 30,a64Lwbrx // was lwbrx
600 ld r22,savexer(r13) // get the xer
601 rlwinm r22,r22,0,25,31 // isolate the byte count
602 b a64LoadMultiple // join common code
603
604
605 // Store string word indexed (stswx), also stwbrx
606
607 a64StswxStwbrx:
608 bf 30,a64Stwbrx // was stwbrx
609 ld r22,savexer(r13) // get the xer
610 rlwinm r22,r22,0,25,31 // isolate the byte count
611 b a64StoreMultiple // join common code
612
613
614 // Load multiple words. This handles lmw, lswi, and lswx.
615
616 a64LoadMultiple: // r22 = byte count, may be 0
617 subic. r3,r22,1 // get (#bytes-1)
618 blt a64Exit // done if 0
619 add r4,r17,r3 // get EA of last operand byte
620 and r4,r4,r15 // clamp
621 cmpld r4,r17 // address space wrap?
622 blt-- a64PassAlong // pass along exception if so
623 srwi. r4,r22,2 // get # full words to load
624 rlwinm r22,r22,0,30,31 // r22 <- leftover byte count
625 cmpwi cr1,r22,0 // leftover bytes?
626 beq a64Lm3 // no words
627 mtctr r4 // set up word count
628 cmpw r0,r0 // set beq for DSI test
629 a64Lm2:
630 mtmsr r25 // turn on DR and RI
631 isync // wait for it to happen
632 lbz r3,0(r17)
633 bne-- a64RedriveAsDSI // got a DSI
634 lbz r4,1(r17)
635 bne-- a64RedriveAsDSI // got a DSI
636 lbz r5,2(r17)
637 bne-- a64RedriveAsDSI // got a DSI
638 lbz r6,3(r17)
639 bne-- a64RedriveAsDSI // got a DSI
640 rlwinm r30,r3,24,0,7 // pack bytes into r30
641 rldimi r30,r4,16,40
642 rldimi r30,r5,8,48
643 rldimi r30,r6,0,56
644 mtmsr r26 // turn DR back off so we can store into register file
645 isync
646 addi r17,r17,4 // bump EA
647 stdx r30,r14,r21 // pack into register file
648 addi r21,r21,8 // bump register file offset
649 rlwinm r21,r21,0,24,28 // wrap around to 0
650 bdnz a64Lm2
651 a64Lm3: // cr1/r22 = leftover bytes (0-3), cr0 beq set
652 beq cr1,a64Exit // no leftover bytes
653 mtctr r22
654 mtmsr r25 // turn on DR so we can access user space
655 isync
656 lbz r3,0(r17) // get 1st leftover byte
657 bne-- a64RedriveAsDSI // got a DSI
658 rlwinm r30,r3,24,0,7 // position in byte 4 of r30 (and clear rest of r30)
659 bdz a64Lm4 // only 1 byte leftover
660 lbz r3,1(r17) // get 2nd byte
661 bne-- a64RedriveAsDSI // got a DSI
662 rldimi r30,r3,16,40 // insert into byte 5 of r30
663 bdz a64Lm4 // only 2 bytes leftover
664 lbz r3,2(r17) // get 3rd byte
665 bne-- a64RedriveAsDSI // got a DSI
666 rldimi r30,r3,8,48 // insert into byte 6
667 a64Lm4:
668 mtmsr r26 // turn DR back off so we can store into register file
669 isync
670 stdx r30,r14,r21 // pack partially-filled word into register file
671 b a64Exit
672
673
674 // Store multiple words. This handles stmw, stswi, and stswx.
675
676 a64StoreMultiple: // r22 = byte count, may be 0
677 subic. r3,r22,1 // get (#bytes-1)
678 blt a64Exit // done if 0
679 add r4,r17,r3 // get EA of last operand byte
680 and r4,r4,r15 // clamp
681 cmpld r4,r17 // address space wrap?
682 blt-- a64PassAlong // pass along exception if so
683 srwi. r4,r22,2 // get # full words to load
684 rlwinm r22,r22,0,30,31 // r22 <- leftover byte count
685 cmpwi cr1,r22,0 // leftover bytes?
686 beq a64Sm3 // no words
687 mtctr r4 // set up word count
688 cmpw r0,r0 // turn on beq so we can check for DSIs
689 a64Sm2:
690 ldx r30,r14,r21 // get next register
691 addi r21,r21,8 // bump register file offset
692 rlwinm r21,r21,0,24,28 // wrap around to 0
693 srwi r3,r30,24 // shift the four bytes into position
694 srwi r4,r30,16
695 srwi r5,r30,8
696 mtmsr r25 // turn on DR so we can access user space
697 isync // wait for it to happen
698 stb r3,0(r17)
699 bne-- a64RedriveAsDSI // got a DSI
700 stb r4,1(r17)
701 bne-- a64RedriveAsDSI // got a DSI
702 stb r5,2(r17)
703 bne-- a64RedriveAsDSI // got a DSI
704 stb r30,3(r17)
705 bne-- a64RedriveAsDSI // got a DSI
706 mtmsr r26 // turn DR back off
707 isync
708 addi r17,r17,4 // bump EA
709 bdnz a64Sm2
710 a64Sm3: // r22 = 0-3, cr1 set on r22, cr0 beq set
711 beq cr1,a64Exit // no leftover bytes
712 ldx r30,r14,r21 // get last register
713 mtctr r22
714 mtmsr r25 // turn on DR so we can access user space
715 isync // wait for it to happen
716 a64Sm4:
717 rlwinm r30,r30,8,0,31 // position next byte
718 stb r30,0(r17) // pack into user space
719 addi r17,r17,1 // bump user space ptr
720 bne-- a64RedriveAsDSI // got a DSI
721 bdnz a64Sm4
722 mtmsr r26 // turn DR back off
723 isync
724 b a64Exit
725
726
727 // Subroutines to load bytes from user space.
728
729 a64Load2Bytes: // load 2 bytes right-justified into r30
730 addi r7,r17,1 // get EA of last byte
731 and r7,r7,r15 // clamp
732 cmpld r7,r17 // address wrap?
733 blt-- a64PassAlong // yes
734 mtmsr r25 // turn on DR so we can access user space
735 isync // wait for it to happen
736 sub. r30,r30,r30 // 0-fill dest and set beq
737 b a64Load2 // jump into routine
738 a64Load4Bytes: // load 4 bytes right-justified into r30 (ie, low order word)
739 addi r7,r17,3 // get EA of last byte
740 and r7,r7,r15 // clamp
741 cmpld r7,r17 // address wrap?
742 blt-- a64PassAlong // yes
743 mtmsr r25 // turn on DR so we can access user space
744 isync // wait for it to happen
745 sub. r30,r30,r30 // 0-fill dest and set beq
746 b a64Load4 // jump into routine
747 a64Load8Bytes: // load 8 bytes into r30
748 addi r7,r17,7 // get EA of last byte
749 and r7,r7,r15 // clamp
750 cmpld r7,r17 // address wrap?
751 blt-- a64PassAlong // yes
752 mtmsr r25 // turn on DR so we can access user space
753 isync // wait for it to happen
754 sub. r30,r30,r30 // 0-fill dest and set beq
755 lbz r3,-7(r7) // get byte 0
756 bne-- a64RedriveAsDSI // got a DSI
757 lbz r4,-6(r7) // and byte 1, etc
758 bne-- a64RedriveAsDSI // got a DSI
759 lbz r5,-5(r7)
760 bne-- a64RedriveAsDSI // got a DSI
761 lbz r6,-4(r7)
762 bne-- a64RedriveAsDSI // got a DSI
763 rldimi r30,r3,56,0 // position bytes in upper word
764 rldimi r30,r4,48,8
765 rldimi r30,r5,40,16
766 rldimi r30,r6,32,24
767 a64Load4:
768 lbz r3,-3(r7)
769 bne-- a64RedriveAsDSI // got a DSI
770 lbz r4,-2(r7)
771 bne-- a64RedriveAsDSI // got a DSI
772 rldimi r30,r3,24,32 // insert bytes 4 and 5 into r30
773 rldimi r30,r4,16,40
774 a64Load2:
775 lbz r3,-1(r7)
776 bne-- a64RedriveAsDSI // got a DSI
777 lbz r4,0(r7)
778 bne-- a64RedriveAsDSI // got a DSI
779 mtmsr r26 // turn DR back off
780 isync
781 rldimi r30,r3,8,48 // insert bytes 6 and 7 into r30
782 rldimi r30,r4,0,56
783 blr
784
785
786 // Subroutines to store bytes into user space.
787
788 a64Store2Bytes: // store bytes 6 and 7 of r30
789 addi r7,r17,1 // get EA of last byte
790 and r7,r7,r15 // clamp
791 cmpld r7,r17 // address wrap?
792 blt-- a64PassAlong // yes
793 mtmsr r25 // turn on DR so we can access user space
794 isync // wait for it to happen
795 cmpw r0,r0 // set beq so we can check for DSI
796 b a64Store2 // jump into routine
797 a64Store4Bytes: // store bytes 4-7 of r30 (ie, low order word)
798 addi r7,r17,3 // get EA of last byte
799 and r7,r7,r15 // clamp
800 cmpld r7,r17 // address wrap?
801 blt-- a64PassAlong // yes
802 mtmsr r25 // turn on DR so we can access user space
803 isync // wait for it to happen
804 cmpw r0,r0 // set beq so we can check for DSI
805 b a64Store4 // jump into routine
806 a64Store8Bytes: // r30 = bytes
807 addi r7,r17,7 // get EA of last byte
808 and r7,r7,r15 // clamp
809 cmpld r7,r17 // address wrap?
810 blt-- a64PassAlong // yes
811 mtmsr r25 // turn on DR so we can access user space
812 isync // wait for it to happen
813 cmpw r0,r0 // set beq so we can check for DSI
814 rotldi r3,r30,8 // shift byte 0 into position
815 rotldi r4,r30,16 // and byte 1
816 rotldi r5,r30,24 // and byte 2
817 rotldi r6,r30,32 // and byte 3
818 stb r3,-7(r7) // store byte 0
819 bne-- a64RedriveAsDSI // got a DSI
820 stb r4,-6(r7) // and byte 1 etc...
821 bne-- a64RedriveAsDSI // got a DSI
822 stb r5,-5(r7)
823 bne-- a64RedriveAsDSI // got a DSI
824 stb r6,-4(r7)
825 bne-- a64RedriveAsDSI // got a DSI
826 a64Store4:
827 rotldi r3,r30,40 // shift byte 4 into position
828 rotldi r4,r30,48 // and byte 5
829 stb r3,-3(r7)
830 bne-- a64RedriveAsDSI // got a DSI
831 stb r4,-2(r7)
832 bne-- a64RedriveAsDSI // got a DSI
833 a64Store2:
834 rotldi r3,r30,56 // shift byte 6 into position
835 stb r3,-1(r7) // store byte 6
836 bne-- a64RedriveAsDSI // got a DSI
837 stb r30,0(r7) // store byte 7, which is already positioned
838 bne-- a64RedriveAsDSI // got a DSI
839 mtmsr r26 // turn off DR
840 isync
841 blr
842
843
844 // Exit routines.
845
846 a64ExitEm:
847 li r30,T_EMULATE // Change exception code to emulate
848 stw r30,saveexception(r13) // Save it
849 b a64Exit // Join standard exit routine...
850
851 a64PassAlong: // unhandled exception, just pass it along
852 li r0,1 // Set that the alignment/program exception was not emulated
853 crset kNotify // return T_ALIGNMENT or T_PROGRAM
854 stw r0,savemisc3(r13) // Set that emulation was not done
855 crclr kTrace // not a trace interrupt
856 b a64Exit1
857 a64UpdateCheck: // successfully emulated, may be update form
858 bf kUpdate,a64Exit // update?
859 stdx r12,r14,r16 // yes, store 64-bit EA into RA
860 a64Exit: // instruction successfully emulated
861 addi r28,r28,4 // bump SRR0 past the emulated instruction
862 li r30,T_IN_VAIN // eat the interrupt since we emulated it
863 and r28,r28,r15 // clamp to address space size (32 vs 64)
864 std r28,savesrr0(r13) // save, so we return to next instruction
865 a64Exit1:
866 bt-- kTrace,a64Trace // were we in single-step at fault?
867 bt-- kNotify,a64Notify // should we say T_ALIGNMENT anyway?
868 a64Exit2:
869 mcrf cr6,cr3 // restore feature flags
870 mr r11,r30 // pass back exception code (T_IN_VAIN etc) in r11
871 b EXT(EmulExit) // return to exception processing
872
873
874 // Notification requested: pass exception upstairs even though it might have been emulated.
875
876 a64Notify:
877 li r30,T_ALIGNMENT // somebody wants to know about it (but don't redrive)
878 bt kAlignment,a64Exit2 // was an alignment exception
879 li r30,T_PROGRAM // was an emulated instruction
880 b a64Exit2
881
882
883 // Emulate a trace interrupt after handling alignment interrupt.
884
885 a64Trace:
886 lwz r9,SAVflags(r13) // get the save-area flags
887 li r30,T_TRACE
888 oris r9,r9,hi16(SAVredrive) // Set the redrive bit
889 stw r30,saveexception(r13) // Set the exception code
890 stw r9,SAVflags(r13) // Set the flags
891 b a64Exit2 // Exit and do trace interrupt...
892
893
894 // Got a DSI accessing user space. Redrive. One way this can happen is if another
895 // processor removes a mapping while we are emulating.
896
897 a64RedriveAsISI: // this DSI happened fetching the opcode (r1==DSISR r4==DAR)
898 mtmsr r26 // turn DR back off
899 isync // wait for it to happen
900 li r30,T_INSTRUCTION_ACCESS
901 rlwimi r29,r1,0,1,4 // insert the fault type from DSI's DSISR
902 std r29,savesrr1(r13) // update SRR1 to look like an ISI
903 b a64Redrive
904
905 a64RedriveAsDSI: // r0==DAR r1==DSISR
906 mtmsr r26 // turn DR back off
907 isync // wait for it to happen
908 stw r1,savedsisr(r13) // Set the DSISR of failed access
909 std r0,savedar(r13) // Set the address of the failed access
910 li r30,T_DATA_ACCESS // Set failing data access code
911 a64Redrive:
912 lwz r9,SAVflags(r13) // Pick up the flags
913 stw r30,saveexception(r13) // Set the replacement code
914 oris r9,r9,hi16(SAVredrive) // Set the redrive bit
915 stw r9,SAVflags(r13) // Set redrive request
916 crclr kTrace // don't take a trace interrupt
917 crclr kNotify // don't pass alignment exception
918 b a64Exit2 // done
919
920
921 // This is the branch table, indexed by the "AAAAB" opcode hash.
922
923 a64BranchTable:
924 .long a64LwzLwzxLwarx // 00000 lwz[u], lwzx[u], lwarx
925 .long a64Ldx // 00001 ldx[u]
926 .long a64PassAlong // 00010 ldarx (never emulate these)
927 .long a64PassAlong // 00011
928 .long a64StwStwx // 00100 stw[u], stwx[u]
929 .long a64StdxStwcx // 00101 stdx[u], stwcx
930 .long a64PassAlong // 00110
931 .long a64PassAlong // 00111 stdcx (never emulate these)
932 .long a64LhzLhzx // 01000 lhz[u], lhzx[u]
933 .long a64PassAlong // 01001
934 .long a64LhaLhax // 01010 lha[u], lhax[u]
935 .long a64Lwax // 01011 lwax[u]
936 .long a64SthSthx // 01100 sth[u], sthx[u]
937 .long a64PassAlong // 01101
938 .long a64LmwStmw // 01110 lmw, stmw
939 .long a64PassAlong // 01111
940 .long a64LfsLfsx // 10000 lfs[u], lfsx[u]
941 .long a64LswxLwbrx // 10001 lswx, lwbrx
942 .long a64LfdLfdx // 10010 lfd[u], lfdx[u]
943 .long a64Lswi // 10011 lswi
944 .long a64StfsStfsx // 10100 stfs[u], stfsx[u]
945 .long a64StswxStwbrx // 10101 stswx, stwbrx
946 .long a64StfdStfdx // 10110 stfd[u], stfdx[u]
947 .long a64Stswi // 10111 stswi
948 .long a64PassAlong // 11000
949 .long a64Lhbrx // 11001 lhbrx
950 .long a64LdLwa // 11010 ld[u], lwa
951 .long a64PassAlong // 11011
952 .long a64PassAlong // 11100
953 .long a64Sthbrx // 11101 sthbrx
954 .long a64StdStfiwx // 11110 std[u], stfiwx
955 .long a64DcbzDcbz128 // 11111 dcbz, dcbz128
956
957