2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
58 #include <platforms.h>
60 #include <mach_kgdb.h>
62 #include <stat_time.h>
63 #include <mach_assert.h>
65 #include <sys/errno.h>
67 #include <i386/cpuid.h>
68 #include <i386/eflags.h>
69 #include <i386/proc_reg.h>
70 #include <i386/trap.h>
72 #include <mach/exception_types.h>
74 #define _ARCH_I386_ASM_HELP_H_ /* Prevent inclusion of user header */
75 #include <mach/i386/syscall_sw.h>
80 * PTmap is recursive pagemap at top of virtual address space.
81 * Within PTmap, the page directory can be found (third indirection).
83 .globl _PTmap,_PTD,_PTDpde
84 .set _PTmap,(PTDPTDI << PDESHIFT)
85 .set _PTD,_PTmap + (PTDPTDI * NBPG)
86 .set _PTDpde,_PTD + (PTDPTDI * PDESIZE)
89 * APTmap, APTD is the alternate recursive pagemap.
90 * It's used when modifying another process's page tables.
92 .globl _APTmap,_APTD,_APTDpde
93 .set _APTmap,(APTDPTDI << PDESHIFT)
94 .set _APTD,_APTmap + (APTDPTDI * NBPG)
95 .set _APTDpde,_PTD + (APTDPTDI * PDESIZE)
98 /* Under Mach-O, etext is a variable which contains
99 * the last text address
101 #define ETEXT_ADDR (EXT(etext))
103 /* Under ELF and other non-Mach-O formats, the address of
104 * etext represents the last text address
106 #define ETEXT_ADDR $ EXT(etext)
109 #define CX(addr,reg) addr(,reg,4)
112 * The following macros make calls into C code.
113 * They dynamically align the stack to 16 bytes.
114 * Arguments are moved (not pushed) onto the correctly aligned stack.
115 * NOTE: EDI is destroyed in the process, and hence cannot
116 * be directly used as a parameter. Users of this macro must
117 * independently preserve EDI (a non-volatile) if the routine is
118 * intended to be called from C, for instance.
123 andl $0xFFFFFFF0, %esp ;\
127 #define CCALL1(fn, arg1) \
130 andl $0xFFFFFFF0, %esp ;\
131 movl arg1, 0(%esp) ;\
135 #define CCALL2(fn, arg1, arg2) \
138 andl $0xFFFFFFF0, %esp ;\
139 movl arg2, 4(%esp) ;\
140 movl arg1, 0(%esp) ;\
144 #define CCALL3(fn, arg1, arg2, arg3) \
147 andl $0xFFFFFFF0, %esp ;\
148 movl arg3, 8(%esp) ;\
149 movl arg2, 4(%esp) ;\
150 movl arg1, 0(%esp) ;\
162 #define RECOVERY_SECTION .section __VECTORS, __recover
164 #define RECOVERY_SECTION .text
165 #define RECOVERY_SECTION .text
168 #define RECOVER_TABLE_START \
170 .globl EXT(recover_table) ;\
171 LEXT(recover_table) ;\
174 #define RECOVER(addr) \
181 #define RECOVER_TABLE_END \
183 .globl EXT(recover_table_end) ;\
184 LEXT(recover_table_end) ;\
188 * Allocate recovery and table.
200 movl %eax,TIMER_HIGHCHK(%ecx)
201 movl %edx,TIMER_LOW(%ecx)
202 movl %eax,TIMER_HIGH(%ecx)
207 0: movl TIMER_HIGH(%ecx),%edx
208 movl TIMER_LOW(%ecx),%eax
209 cmpl TIMER_HIGHCHK(%ecx),%edx
215 #define TIME_TRAP_UENTRY
216 #define TIME_TRAP_UEXIT
217 #define TIME_INT_ENTRY
218 #define TIME_INT_EXIT
226 * Nanotime returned in %edx:%eax.
227 * Computed from tsc based on the scale factor
228 * and an implicit 32 bit shift.
230 * Uses %eax, %ebx, %ecx, %edx, %esi, %edi.
232 #define RNT_INFO _rtc_nanotime_info
234 0: movl RNT_INFO+RNT_TSC_BASE,%esi ; \
235 movl RNT_INFO+RNT_TSC_BASE+4,%edi ; \
237 subl %esi,%eax /* tsc - tsc_base */ ; \
239 movl RNT_INFO+RNT_SCALE,%ecx ; \
240 movl %edx,%ebx /* delta * scale */ ; \
246 adcl $0,%edx /* add carry into hi */ ; \
247 addl RNT_INFO+RNT_NS_BASE,%eax /* add ns_base lo */ ; \
248 adcl RNT_INFO+RNT_NS_BASE+4,%edx /* add ns_base hi */ ; \
249 cmpl RNT_INFO+RNT_TSC_BASE,%esi ; \
250 jne 0b /* repeat if changed */ ; \
251 cmpl RNT_INFO+RNT_TSC_BASE+4,%edi ; \
255 * Add 64-bit delta in register dreg : areg to timer pointed to by register treg.
257 #define TIMER_UPDATE(treg,dreg,areg) \
258 addl TIMER_LOW(treg),areg /* add low bits */ ; \
259 adcl dreg,TIMER_HIGH(treg) /* add carry high bits */ ; \
260 movl areg,TIMER_LOW(treg) /* store updated low bit */ ; \
261 movl TIMER_HIGH(treg),dreg /* copy high bits */ ; \
262 movl dreg,TIMER_HIGHCHK(treg) /* to high check */
265 * Add time delta to old timer and start new.
267 #define TIMER_EVENT(old,new) \
268 NANOTIME /* edx:eax nanosecs */ ; \
269 movl %eax,%esi /* save timestamp */ ; \
270 movl %edx,%edi /* save timestamp */ ; \
271 movl %gs:CPU_PROCESSOR,%ebx /* get current processor */ ; \
272 movl THREAD_TIMER(%ebx),%ecx /* get current timer */ ; \
273 subl TIMER_TSTAMP(%ecx),%eax /* compute elapsed time */ ; \
274 sbbl TIMER_TSTAMP+4(%ecx),%edx /* compute elapsed time */ ; \
275 TIMER_UPDATE(%ecx,%edx,%eax) /* update timer */ ; \
276 addl $(new##_TIMER-old##_TIMER),%ecx /* point to new timer */ ; \
277 movl %esi,TIMER_TSTAMP(%ecx) /* set timestamp */ ; \
278 movl %edi,TIMER_TSTAMP+4(%ecx) /* set timestamp */ ; \
279 movl %ecx,THREAD_TIMER(%ebx) /* set current timer */ ; \
280 movl %esi,%eax /* restore timestamp */ ; \
281 movl %edi,%edx /* restore timestamp */ ; \
282 movl CURRENT_STATE(%ebx),%ecx /* current state */ ; \
283 subl TIMER_TSTAMP(%ecx),%eax /* compute elapsed time */ ; \
284 sbbl TIMER_TSTAMP+4(%ecx),%edx /* compute elapsed time */ ; \
285 TIMER_UPDATE(%ecx,%edx,%eax) /* update timer */ ; \
286 addl $(new##_STATE-old##_STATE),%ecx /* point to new state */ ; \
287 movl %ecx,CURRENT_STATE(%ebx) /* set current state */ ; \
288 movl %esi,TIMER_TSTAMP(%ecx) /* set timestamp */ ; \
289 movl %edi,TIMER_TSTAMP+4(%ecx) /* set timestamp */
292 * Update time on user trap entry.
293 * Uses %eax,%ebx,%ecx,%edx,%esi,%edi.
295 #define TIME_TRAP_UENTRY TIMER_EVENT(USER,SYSTEM)
298 * update time on user trap exit.
299 * Uses %eax,%ebx,%ecx,%edx,%esi,%edi.
301 #define TIME_TRAP_UEXIT TIMER_EVENT(SYSTEM,USER)
304 * update time on interrupt entry.
305 * Uses %eax,%ebx,%ecx,%edx,%esi,%edi.
306 * Saves processor state info on stack.
308 #define TIME_INT_ENTRY \
309 NANOTIME /* edx:eax nanosecs */ ; \
310 movl %eax,%gs:CPU_INT_EVENT_TIME /* save in cpu data */ ; \
311 movl %edx,%gs:CPU_INT_EVENT_TIME+4 /* save in cpu data */ ; \
312 movl %eax,%esi /* save timestamp */ ; \
313 movl %edx,%edi /* save timestamp */ ; \
314 movl %gs:CPU_PROCESSOR,%ebx /* get current processor */ ; \
315 movl THREAD_TIMER(%ebx),%ecx /* get current timer */ ; \
316 subl TIMER_TSTAMP(%ecx),%eax /* compute elapsed time */ ; \
317 sbbl TIMER_TSTAMP+4(%ecx),%edx /* compute elapsed time */ ; \
318 TIMER_UPDATE(%ecx,%edx,%eax) /* update timer */ ; \
319 movl KERNEL_TIMER(%ebx),%ecx /* point to kernel timer */ ; \
320 movl %esi,TIMER_TSTAMP(%ecx) /* set timestamp */ ; \
321 movl %edi,TIMER_TSTAMP+4(%ecx) /* set timestamp */ ; \
322 movl %esi,%eax /* restore timestamp */ ; \
323 movl %edi,%edx /* restore timestamp */ ; \
324 movl CURRENT_STATE(%ebx),%ecx /* get current state */ ; \
325 pushl %ecx /* save state */ ; \
326 subl TIMER_TSTAMP(%ecx),%eax /* compute elapsed time */ ; \
327 sbbl TIMER_TSTAMP+4(%ecx),%edx /* compute elapsed time */ ; \
328 TIMER_UPDATE(%ecx,%edx,%eax) /* update timer */ ; \
329 leal IDLE_STATE(%ebx),%eax /* get idle state */ ; \
330 cmpl %eax,%ecx /* compare current state */ ; \
331 je 0f /* skip if equal */ ; \
332 leal SYSTEM_STATE(%ebx),%ecx /* get system state */ ; \
333 movl %ecx,CURRENT_STATE(%ebx) /* set current state */ ; \
334 0: movl %esi,TIMER_TSTAMP(%ecx) /* set timestamp */ ; \
335 movl %edi,TIMER_TSTAMP+4(%ecx) /* set timestamp */
338 * update time on interrupt exit.
339 * Uses %eax,%ebx,%ecx,%edx,%esi,%edi.
340 * Restores processor state info from stack.
342 #define TIME_INT_EXIT \
343 NANOTIME /* edx:eax nanosecs */ ; \
344 movl %eax,%gs:CPU_INT_EVENT_TIME /* save in cpu data */ ; \
345 movl %edx,%gs:CPU_INT_EVENT_TIME+4 /* save in cpu data */ ; \
346 movl %eax,%esi /* save timestamp */ ; \
347 movl %edx,%edi /* save timestamp */ ; \
348 movl %gs:CPU_PROCESSOR,%ebx /* get current processor */ ; \
349 movl KERNEL_TIMER(%ebx),%ecx /* point to kernel timer */ ; \
350 subl TIMER_TSTAMP(%ecx),%eax /* compute elapsed time */ ; \
351 sbbl TIMER_TSTAMP+4(%ecx),%edx /* compute elapsed time */ ; \
352 TIMER_UPDATE(%ecx,%edx,%eax) /* update timer */ ; \
353 movl THREAD_TIMER(%ebx),%ecx /* interrupted timer */ ; \
354 movl %esi,TIMER_TSTAMP(%ecx) /* set timestamp */ ; \
355 movl %edi,TIMER_TSTAMP+4(%ecx) /* set timestamp */ ; \
356 movl %esi,%eax /* restore timestamp */ ; \
357 movl %edi,%edx /* restore timestamp */ ; \
358 movl CURRENT_STATE(%ebx),%ecx /* get current state */ ; \
359 subl TIMER_TSTAMP(%ecx),%eax /* compute elapsed time */ ; \
360 sbbl TIMER_TSTAMP+4(%ecx),%edx /* compute elapsed time */ ; \
361 TIMER_UPDATE(%ecx,%edx,%eax) /* update timer */ ; \
362 popl %ecx /* restore state */ ; \
363 movl %ecx,CURRENT_STATE(%ebx) /* set current state */ ; \
364 movl %esi,TIMER_TSTAMP(%ecx) /* set timestamp */ ; \
365 movl %edi,TIMER_TSTAMP+4(%ecx) /* set timestamp */
367 #endif /* STAT_TIME */
374 * Traditional, not ANSI.
378 .globl label/**/count ;\
381 .globl label/**/limit ;\
385 addl $1,%ss:label/**/count ;\
386 cmpl $0,label/**/limit ;\
390 movl %ss:label/**/count,%eax ;\
391 cmpl %eax,%ss:label/**/limit ;\
404 * Last-ditch debug code to handle faults that might result
405 * from entering kernel (from collocated server) on an invalid
406 * stack. On collocated entry, there's no hardware-initiated
407 * stack switch, so a valid stack must be in place when an
408 * exception occurs, or we may double-fault.
410 * In case of a double-fault, our only recourse is to switch
411 * hardware "tasks", so that we avoid using the current stack.
413 * The idea here is just to get the processor into the debugger,
414 * post-haste. No attempt is made to fix up whatever error got
415 * us here, so presumably continuing from the debugger will
416 * simply land us here again -- at best.
420 * Note that the per-fault entry points are not currently
421 * functional. The only way to make them work would be to
422 * set up separate TSS's for each fault type, which doesn't
423 * currently seem worthwhile. (The offset part of a task
424 * gate is always ignored.) So all faults that task switch
425 * currently resume at db_task_start.
428 * Double fault (Murphy's point) - error code (0) on stack
430 Entry(db_task_dbl_fault)
432 movl $(T_DOUBLE_FAULT),%ebx
435 * Segment not present - error code on stack
437 Entry(db_task_seg_np)
439 movl $(T_SEGMENT_NOT_PRESENT),%ebx
442 * Stack fault - error code on (current) stack
444 Entry(db_task_stk_fault)
446 movl $(T_STACK_FAULT),%ebx
449 * General protection fault - error code on stack
451 Entry(db_task_gen_prot)
453 movl $(T_GENERAL_PROTECTION),%ebx
457 * The entry point where execution resumes after last-ditch debugger task
462 subl $(ISS32_SIZE),%edx
463 movl %edx,%esp /* allocate x86_saved_state on stack */
464 movl %eax,R_ERR(%esp)
465 movl %ebx,R_TRAPNO(%esp)
468 movl CX(EXT(master_dbtss),%edx),%edx
469 movl TSS_LINK(%edx),%eax
470 pushl %eax /* pass along selector of previous TSS */
471 call EXT(db_tss_to_frame)
472 popl %eax /* get rid of TSS selector */
473 call EXT(db_trap_from_asm)
478 iret /* ha, ha, ha... */
479 #endif /* MACH_KDB */
482 * Called as a function, makes the current thread
483 * return from the kernel as if from an exception.
486 .globl EXT(thread_exception_return)
487 .globl EXT(thread_bootstrap_return)
488 LEXT(thread_exception_return)
489 LEXT(thread_bootstrap_return)
491 movl %gs:CPU_KERNEL_STACK,%ecx
492 movl (%ecx),%esp /* switch back to PCB stack */
493 jmp EXT(return_from_trap)
495 Entry(call_continuation)
496 movl S_ARG0,%eax /* get continuation */
497 movl S_ARG1,%edx /* continuation param */
498 movl S_ARG2,%ecx /* wait result */
499 movl %gs:CPU_KERNEL_STACK,%esp /* pop the stack */
500 xorl %ebp,%ebp /* zero frame pointer */
501 subl $8,%esp /* align the stack */
504 call *%eax /* call continuation */
506 movl %gs:CPU_ACTIVE_THREAD,%eax
508 call EXT(thread_terminate)
512 /*******************************************************************************************************
514 * All 64 bit task 'exceptions' enter lo_alltraps:
515 * esp -> x86_saved_state_t
517 * The rest of the state is set up as:
518 * cr3 -> kernel directory
519 * esp -> low based stack
522 * ss/ds/es -> KERNEL_DS
524 * interrupts disabled
525 * direction flag cleared
528 movl R_CS(%esp),%eax /* assume 32-bit state */
529 cmpl $(SS_64),SS_FLAVOR(%esp)/* 64-bit? */
531 movl R64_CS(%esp),%eax /* 64-bit user mode */
538 movl %gs:CPU_ACTIVE_THREAD,%ecx
539 movl ACT_TASK(%ecx),%ebx
541 /* Check for active vtimers in the current task */
542 cmpl $0,TASK_VTIMERS(%ebx)
545 /* Set a pending AST */
546 orl $(AST_BSD),%gs:CPU_PENDING_AST
548 /* Set a thread AST (atomic) */
550 orl $(AST_BSD),ACT_AST(%ecx)
553 movl %gs:CPU_KERNEL_STACK,%ebx
554 xchgl %ebx,%esp /* switch to kernel stack */
557 CCALL1(user_trap, %ebx) /* call user trap routine */
558 cli /* hold off intrs - critical section */
559 popl %esp /* switch back to PCB stack */
562 * Return from trap or system call, checking for ASTs.
563 * On lowbase PCB stack with intrs disabled
565 LEXT(return_from_trap)
566 movl %gs:CPU_PENDING_AST,%eax
568 je EXT(return_to_user) /* branch if no AST */
570 movl %gs:CPU_KERNEL_STACK,%ebx
571 xchgl %ebx,%esp /* switch to kernel stack */
572 sti /* interrupts always enabled on return to user mode */
574 pushl %ebx /* save PCB stack */
575 xorl %ebp,%ebp /* Clear framepointer */
576 CCALL1(i386_astintr, $0) /* take the AST */
578 popl %esp /* switch back to PCB stack (w/exc link) */
579 jmp EXT(return_from_trap) /* and check again (rare) */
585 cmpl $0, %gs:CPU_IS64BIT
586 je EXT(lo_ret_to_user)
587 jmp EXT(lo64_ret_to_user)
592 * Trap from kernel mode. No need to switch stacks.
593 * Interrupts must be off here - we will set them to state at time of trap
594 * as soon as it's safe for us to do so and not recurse doing preemption
597 movl %esp, %eax /* saved state addr */
598 pushl R_EIP(%esp) /* Simulate a CALL from fault point */
599 pushl %ebp /* Extend framepointer chain */
601 CCALL1(kernel_trap, %eax) /* Call kernel trap handler */
606 movl %gs:CPU_PENDING_AST,%eax /* get pending asts */
607 testl $ AST_URGENT,%eax /* any urgent preemption? */
608 je ret_to_kernel /* no, nothing to do */
609 cmpl $ T_PREEMPT,R_TRAPNO(%esp)
610 je ret_to_kernel /* T_PREEMPT handled in kernel_trap() */
611 testl $ EFL_IF,R_EFLAGS(%esp) /* interrupts disabled? */
613 cmpl $0,%gs:CPU_PREEMPTION_LEVEL /* preemption disabled? */
615 movl %gs:CPU_KERNEL_STACK,%eax
618 andl $(-KERNEL_STACK_SIZE),%ecx
619 testl %ecx,%ecx /* are we on the kernel stack? */
620 jne ret_to_kernel /* no, skip it */
622 CCALL1(i386_astintr, $1) /* take the AST */
625 cmpl $0, %gs:CPU_IS64BIT
626 je EXT(lo_ret_to_kernel)
627 jmp EXT(lo64_ret_to_kernel)
631 /*******************************************************************************************************
633 * All interrupts on all tasks enter here with:
634 * esp-> -> x86_saved_state_t
636 * cr3 -> kernel directory
637 * esp -> low based stack
640 * ss/ds/es -> KERNEL_DS
642 * interrupts disabled
643 * direction flag cleared
647 * test whether already on interrupt stack
649 movl %gs:CPU_INT_STACK_TOP,%ecx
652 leal -INTSTACK_SIZE(%ecx),%edx
656 xchgl %ecx,%esp /* switch to interrupt stack */
658 movl %cr0,%eax /* get cr0 */
659 orl $(CR0_TS),%eax /* or in TS bit */
660 movl %eax,%cr0 /* set cr0 */
662 subl $8, %esp /* for 16-byte stack alignment */
663 pushl %ecx /* save pointer to old stack */
664 movl %ecx,%gs:CPU_INT_STATE /* save intr state */
666 TIME_INT_ENTRY /* do timing */
668 movl %gs:CPU_ACTIVE_THREAD,%ecx
669 movl ACT_TASK(%ecx),%ebx
671 /* Check for active vtimers in the current task */
672 cmpl $0,TASK_VTIMERS(%ebx)
675 /* Set a pending AST */
676 orl $(AST_BSD),%gs:CPU_PENDING_AST
678 /* Set a thread AST (atomic) */
680 orl $(AST_BSD),ACT_AST(%ecx)
683 incl %gs:CPU_PREEMPTION_LEVEL
684 incl %gs:CPU_INTERRUPT_LEVEL
686 movl %gs:CPU_INT_STATE, %eax
687 CCALL1(PE_incoming_interrupt, %eax) /* call generic interrupt routine */
689 cli /* just in case we returned with intrs enabled */
691 movl %eax,%gs:CPU_INT_STATE /* clear intr state pointer */
693 decl %gs:CPU_INTERRUPT_LEVEL
694 decl %gs:CPU_PREEMPTION_LEVEL
696 TIME_INT_EXIT /* do timing */
698 movl %gs:CPU_ACTIVE_THREAD,%eax
699 movl ACT_PCB(%eax),%eax /* get act`s PCB */
700 movl PCB_FPS(%eax),%eax /* get pcb's ims.ifps */
701 cmpl $0,%eax /* Is there a context */
702 je 1f /* Branch if not */
703 movl FP_VALID(%eax),%eax /* Load fp_valid */
704 cmpl $0,%eax /* Check if valid */
705 jne 1f /* Branch if valid */
709 movl %cr0,%eax /* get cr0 */
710 orl $(CR0_TS),%eax /* or in TS bit */
711 movl %eax,%cr0 /* set cr0 */
713 popl %esp /* switch back to old stack */
715 /* Load interrupted code segment into %eax */
716 movl R_CS(%esp),%eax /* assume 32-bit state */
717 cmpl $(SS_64),SS_FLAVOR(%esp)/* 64-bit? */
719 movl R64_CS(%esp),%eax /* 64-bit user mode */
721 testb $3,%al /* user mode, */
722 jnz ast_from_interrupt_user /* go handle potential ASTs */
724 * we only want to handle preemption requests if
725 * the interrupt fell in the kernel context
726 * and preemption isn't disabled
728 movl %gs:CPU_PENDING_AST,%eax
729 testl $ AST_URGENT,%eax /* any urgent requests? */
730 je ret_to_kernel /* no, nothing to do */
732 cmpl $0,%gs:CPU_PREEMPTION_LEVEL /* preemption disabled? */
733 jne ret_to_kernel /* yes, skip it */
735 movl %gs:CPU_KERNEL_STACK,%eax
738 andl $(-KERNEL_STACK_SIZE),%ecx
739 testl %ecx,%ecx /* are we on the kernel stack? */
740 jne ret_to_kernel /* no, skip it */
743 * Take an AST from kernel space. We don't need (and don't want)
744 * to do as much as the case where the interrupt came from user
747 CCALL1(i386_astintr, $1)
753 * nested int - simple path, can't preempt etc on way out
756 incl %gs:CPU_PREEMPTION_LEVEL
757 incl %gs:CPU_INTERRUPT_LEVEL
759 movl %esp, %edx /* x86_saved_state */
760 CCALL1(PE_incoming_interrupt, %edx)
762 decl %gs:CPU_INTERRUPT_LEVEL
763 decl %gs:CPU_PREEMPTION_LEVEL
768 * Take an AST from an interrupted user
770 ast_from_interrupt_user:
771 movl %gs:CPU_PENDING_AST,%eax
772 testl %eax,%eax /* pending ASTs? */
773 je EXT(ret_to_user) /* no, nothing to do */
777 jmp EXT(return_from_trap) /* return */
780 /*******************************************************************************************************
783 * System call entries via INTR_GATE or sysenter:
785 * esp -> x86_saved_state32_t
786 * cr3 -> kernel directory
787 * esp -> low based stack
790 * ss/ds/es -> KERNEL_DS
792 * interrupts disabled
793 * direction flag cleared
798 * We can be here either for a mach syscall or a unix syscall,
799 * as indicated by the sign of the code:
801 movl R_EAX(%esp),%eax
803 js EXT(lo_mach_scall) /* < 0 => mach */
809 movl %gs:CPU_ACTIVE_THREAD,%ecx /* get current thread */
810 movl ACT_TASK(%ecx),%ebx /* point to current task */
811 addl $1,TASK_SYSCALLS_UNIX(%ebx) /* increment call count */
813 /* Check for active vtimers in the current task */
814 cmpl $0,TASK_VTIMERS(%ebx)
817 /* Set a pending AST */
818 orl $(AST_BSD),%gs:CPU_PENDING_AST
820 /* Set a thread AST (atomic) */
822 orl $(AST_BSD),ACT_AST(%ecx)
825 movl %gs:CPU_KERNEL_STACK,%ebx
826 xchgl %ebx,%esp /* switch to kernel stack */
830 CCALL1(unix_syscall, %ebx)
832 * always returns through thread_exception_return
839 movl %gs:CPU_ACTIVE_THREAD,%ecx /* get current thread */
840 movl ACT_TASK(%ecx),%ebx /* point to current task */
841 addl $1,TASK_SYSCALLS_MACH(%ebx) /* increment call count */
843 /* Check for active vtimers in the current task */
844 cmpl $0,TASK_VTIMERS(%ebx)
847 /* Set a pending AST */
848 orl $(AST_BSD),%gs:CPU_PENDING_AST
850 /* Set a thread AST (atomic) */
852 orl $(AST_BSD),ACT_AST(%ecx)
855 movl %gs:CPU_KERNEL_STACK,%ebx
856 xchgl %ebx,%esp /* switch to kernel stack */
860 CCALL1(mach_call_munger, %ebx)
862 * always returns through thread_exception_return
869 movl %gs:CPU_ACTIVE_THREAD,%ecx /* get current thread */
870 movl ACT_TASK(%ecx),%ebx /* point to current task */
872 /* Check for active vtimers in the current task */
873 cmpl $0,TASK_VTIMERS(%ebx)
876 /* Set a pending AST */
877 orl $(AST_BSD),%gs:CPU_PENDING_AST
879 /* Set a thread AST (atomic) */
881 orl $(AST_BSD),ACT_AST(%ecx)
884 movl %gs:CPU_KERNEL_STACK,%ebx
885 xchgl %ebx,%esp /* switch to kernel stack */
889 CCALL1(machdep_syscall, %ebx)
891 * always returns through thread_exception_return
898 movl %gs:CPU_ACTIVE_THREAD,%ecx /* get current thread */
899 movl ACT_TASK(%ecx),%ebx /* point to current task */
901 /* Check for active vtimers in the current task */
902 cmpl $0,TASK_VTIMERS(%ebx)
905 /* Set a pending AST */
906 orl $(AST_BSD),%gs:CPU_PENDING_AST
908 /* Set a thread AST (atomic) */
910 orl $(AST_BSD),ACT_AST(%ecx)
913 movl %gs:CPU_KERNEL_STACK,%ebx // Get the address of the kernel stack
914 xchgl %ebx,%esp // Switch to it, saving the previous
916 CCALL1(diagCall, %ebx) // Call diagnostics
918 cmpl $0,%eax // What kind of return is this?
920 cli // Disable interruptions just in case they were enabled
921 popl %esp // Get back the original stack
922 jmp EXT(return_to_user) // Normal return, do not check asts...
924 CCALL3(i386_exception, $EXC_SYSCALL, $0x6000, $1)
925 // pass what would be the diag syscall
926 // error return - cause an exception
931 /*******************************************************************************************************
934 * System call entries via syscall only:
936 * esp -> x86_saved_state64_t
937 * cr3 -> kernel directory
938 * esp -> low based stack
941 * ss/ds/es -> KERNEL_DS
943 * interrupts disabled
944 * direction flag cleared
949 * We can be here either for a mach, unix machdep or diag syscall,
950 * as indicated by the syscall class:
952 movl R64_RAX(%esp), %eax /* syscall number/class */
954 andl $(SYSCALL_CLASS_MASK), %ebx /* syscall class */
955 cmpl $(SYSCALL_CLASS_MACH<<SYSCALL_CLASS_SHIFT), %ebx
956 je EXT(lo64_mach_scall)
957 cmpl $(SYSCALL_CLASS_UNIX<<SYSCALL_CLASS_SHIFT), %ebx
958 je EXT(lo64_unix_scall)
959 cmpl $(SYSCALL_CLASS_MDEP<<SYSCALL_CLASS_SHIFT), %ebx
960 je EXT(lo64_mdep_scall)
961 cmpl $(SYSCALL_CLASS_DIAG<<SYSCALL_CLASS_SHIFT), %ebx
962 je EXT(lo64_diag_scall)
964 movl %gs:CPU_KERNEL_STACK,%ebx
965 xchgl %ebx,%esp /* switch to kernel stack */
969 /* Syscall class unknown */
970 CCALL3(i386_exception, $(EXC_SYSCALL), %eax, $1)
974 Entry(lo64_unix_scall)
977 movl %gs:CPU_ACTIVE_THREAD,%ecx /* get current thread */
978 movl ACT_TASK(%ecx),%ebx /* point to current task */
979 addl $1,TASK_SYSCALLS_UNIX(%ebx) /* increment call count */
981 /* Check for active vtimers in the current task */
982 cmpl $0,TASK_VTIMERS(%ebx)
985 /* Set a pending AST */
986 orl $(AST_BSD),%gs:CPU_PENDING_AST
988 /* Set a thread AST (atomic) */
990 orl $(AST_BSD),ACT_AST(%ecx)
993 movl %gs:CPU_KERNEL_STACK,%ebx
994 xchgl %ebx,%esp /* switch to kernel stack */
998 CCALL1(unix_syscall64, %ebx)
1000 * always returns through thread_exception_return
1004 Entry(lo64_mach_scall)
1007 movl %gs:CPU_ACTIVE_THREAD,%ecx /* get current thread */
1008 movl ACT_TASK(%ecx),%ebx /* point to current task */
1009 addl $1,TASK_SYSCALLS_MACH(%ebx) /* increment call count */
1011 /* Check for active vtimers in the current task */
1012 cmpl $0,TASK_VTIMERS(%ebx)
1015 /* Set a pending AST */
1016 orl $(AST_BSD),%gs:CPU_PENDING_AST
1019 orl $(AST_BSD),ACT_AST(%ecx)
1022 movl %gs:CPU_KERNEL_STACK,%ebx
1023 xchgl %ebx,%esp /* switch to kernel stack */
1027 CCALL1(mach_call_munger64, %ebx)
1029 * always returns through thread_exception_return
1034 Entry(lo64_mdep_scall)
1037 movl %gs:CPU_ACTIVE_THREAD,%ecx /* get current thread */
1038 movl ACT_TASK(%ecx),%ebx /* point to current task */
1040 /* Check for active vtimers in the current task */
1041 cmpl $0,TASK_VTIMERS(%ebx)
1044 /* Set a pending AST */
1045 orl $(AST_BSD),%gs:CPU_PENDING_AST
1047 /* Set a thread AST (atomic) */
1049 orl $(AST_BSD),ACT_AST(%ecx)
1052 movl %gs:CPU_KERNEL_STACK,%ebx
1053 xchgl %ebx,%esp /* switch to kernel stack */
1057 CCALL1(machdep_syscall64, %ebx)
1059 * always returns through thread_exception_return
1063 Entry(lo64_diag_scall)
1066 movl %gs:CPU_ACTIVE_THREAD,%ecx /* get current thread */
1067 movl ACT_TASK(%ecx),%ebx /* point to current task */
1069 /* Check for active vtimers in the current task */
1070 cmpl $0,TASK_VTIMERS(%ebx)
1073 /* Set a pending AST */
1074 orl $(AST_BSD),%gs:CPU_PENDING_AST
1076 /* Set a thread AST (atomic) */
1078 orl $(AST_BSD),ACT_AST(%ecx)
1081 movl %gs:CPU_KERNEL_STACK,%ebx // Get the address of the kernel stack
1082 xchgl %ebx,%esp // Switch to it, saving the previous
1084 CCALL1(diagCall64, %ebx) // Call diagnostics
1086 cmpl $0,%eax // What kind of return is this?
1088 cli // Disable interruptions just in case they were enabled
1089 popl %esp // Get back the original stack
1090 jmp EXT(return_to_user) // Normal return, do not check asts...
1092 CCALL3(i386_exception, $EXC_SYSCALL, $0x6000, $1)
1093 // pass what would be the diag syscall
1094 // error return - cause an exception
1104 * Copy from user/kernel address space.
1105 * arg0: window offset or kernel address
1106 * arg1: kernel address
1109 Entry(copyinphys_user)
1110 movl $(USER_WINDOW_SEL),%ecx /* user data segment access through kernel window */
1113 Entry(copyinphys_kern)
1114 movl $(PHYS_WINDOW_SEL),%ecx /* physical access through kernel window */
1119 movl $(USER_WINDOW_SEL),%ecx /* user data segment access through kernel window */
1126 pushl %edi /* save registers */
1128 movl 8+S_ARG0,%esi /* get source - window offset or kernel address */
1129 movl 8+S_ARG1,%edi /* get destination - kernel address */
1130 movl 8+S_ARG2,%edx /* get count */
1133 movl %edx,%ecx /* move by longwords first */
1136 RECOVER(copyin_fail)
1138 movsl /* move longwords */
1139 movl %edx,%ecx /* now move remaining bytes */
1142 RECOVER(copyin_fail)
1145 xorl %eax,%eax /* return 0 for success */
1147 mov %ss,%cx /* restore kernel data and extended segments */
1151 popl %edi /* restore registers */
1153 ret /* and return */
1156 movl $(EFAULT),%eax /* return error for failure */
1157 jmp copyin_ret /* pop frame and return */
1162 * Copy string from user/kern address space.
1163 * arg0: window offset or kernel address
1164 * arg1: kernel address
1165 * arg2: max byte count
1166 * arg3: actual byte count (OUT)
1168 Entry(copyinstr_kern)
1170 jmp copyinstr_common
1172 Entry(copyinstr_user)
1173 movl $(USER_WINDOW_SEL),%ecx /* user data segment access through kernel window */
1179 pushl %edi /* save registers */
1181 movl 8+S_ARG0,%esi /* get source - window offset or kernel address */
1182 movl 8+S_ARG1,%edi /* get destination - kernel address */
1183 movl 8+S_ARG2,%edx /* get count */
1185 xorl %eax,%eax /* set to 0 here so that the high 24 bits */
1186 /* are 0 for the cmpl against 0 */
1189 RECOVER(copystr_fail) /* copy bytes... */
1192 testl %edi,%edi /* if kernel address is ... */
1193 jz 3f /* not NULL */
1194 movb %al,(%edi) /* copy the byte */
1197 testl %eax,%eax /* did we just stuff the 0-byte? */
1198 jz 4f /* yes, return 0 status already in %eax */
1199 decl %edx /* decrement #bytes left in buffer */
1200 jnz 2b /* buffer not full so copy in another byte */
1201 movl $(ENAMETOOLONG),%eax /* buffer full but no 0-byte: ENAMETOOLONG */
1203 movl 8+S_ARG3,%edi /* get OUT len ptr */
1205 jz copystr_ret /* if null, just return */
1207 movl %esi,(%edi) /* else set OUT arg to xfer len */
1209 popl %edi /* restore registers */
1211 ret /* and return */
1214 movl $(EFAULT),%eax /* return error for failure */
1215 jmp copystr_ret /* pop frame and return */
1219 * Copy to user/kern address space.
1220 * arg0: kernel address
1221 * arg1: window offset or kernel address
1224 ENTRY(copyoutphys_user)
1225 movl $(USER_WINDOW_SEL),%ecx /* user data segment access through kernel window */
1228 ENTRY(copyoutphys_kern)
1229 movl $(PHYS_WINDOW_SEL),%ecx /* physical access through kernel window */
1234 movl $(USER_WINDOW_SEL),%ecx /* user data segment access through kernel window */
1241 pushl %edi /* save registers */
1243 movl 8+S_ARG0,%esi /* get source - kernel address */
1244 movl 8+S_ARG1,%edi /* get destination - window offset or kernel address */
1245 movl 8+S_ARG2,%edx /* get count */
1248 movl %edx,%ecx /* move by longwords first */
1251 RECOVER(copyout_fail)
1254 movl %edx,%ecx /* now move remaining bytes */
1257 RECOVER(copyout_fail)
1260 xorl %eax,%eax /* return 0 for success */
1262 mov %ss,%cx /* restore kernel segment */
1266 popl %edi /* restore registers */
1268 ret /* and return */
1271 movl $(EFAULT),%eax /* return error for failure */
1272 jmp copyout_ret /* pop frame and return */
1275 * io register must not be used on slaves (no AT bus)
1277 #define ILL_ON_SLAVE
1285 #define PUSH_FRAME FRAME
1286 #define POP_FRAME EMARF
1288 #else /* MACH_ASSERT */
1296 #endif /* MACH_ASSERT */
1299 #if MACH_KDB || MACH_ASSERT
1302 * Following routines are also defined as macros in i386/pio.h
1303 * Compile then when MACH_KDB is configured so that they
1304 * can be invoked from the debugger.
1308 * void outb(unsigned char *io_port,
1309 * unsigned char byte)
1311 * Output a byte to an IO port.
1316 movl ARG0,%edx /* IO port address */
1317 movl ARG1,%eax /* data to output */
1318 outb %al,%dx /* send it out */
1323 * unsigned char inb(unsigned char *io_port)
1325 * Input a byte from an IO port.
1330 movl ARG0,%edx /* IO port address */
1331 xor %eax,%eax /* clear high bits of register */
1332 inb %dx,%al /* get the byte */
1337 * void outw(unsigned short *io_port,
1338 * unsigned short word)
1340 * Output a word to an IO port.
1345 movl ARG0,%edx /* IO port address */
1346 movl ARG1,%eax /* data to output */
1347 outw %ax,%dx /* send it out */
1352 * unsigned short inw(unsigned short *io_port)
1354 * Input a word from an IO port.
1359 movl ARG0,%edx /* IO port address */
1360 xor %eax,%eax /* clear high bits of register */
1361 inw %dx,%ax /* get the word */
1366 * void outl(unsigned int *io_port,
1367 * unsigned int byte)
1369 * Output an int to an IO port.
1374 movl ARG0,%edx /* IO port address*/
1375 movl ARG1,%eax /* data to output */
1376 outl %eax,%dx /* send it out */
1381 * unsigned int inl(unsigned int *io_port)
1383 * Input an int from an IO port.
1388 movl ARG0,%edx /* IO port address */
1389 inl %dx,%eax /* get the int */
1393 #endif /* MACH_KDB || MACH_ASSERT*/
1396 * void loutb(unsigned byte *io_port,
1397 * unsigned byte *data,
1398 * unsigned int count)
1400 * Output an array of bytes to an IO port.
1406 movl %esi,%eax /* save register */
1407 movl ARG0,%edx /* get io port number */
1408 movl ARG1,%esi /* get data address */
1409 movl ARG2,%ecx /* get count */
1413 movl %eax,%esi /* restore register */
1419 * void loutw(unsigned short *io_port,
1420 * unsigned short *data,
1421 * unsigned int count)
1423 * Output an array of shorts to an IO port.
1429 movl %esi,%eax /* save register */
1430 movl ARG0,%edx /* get io port number */
1431 movl ARG1,%esi /* get data address */
1432 movl ARG2,%ecx /* get count */
1436 movl %eax,%esi /* restore register */
1441 * void loutw(unsigned short io_port,
1442 * unsigned int *data,
1443 * unsigned int count)
1445 * Output an array of longs to an IO port.
1451 movl %esi,%eax /* save register */
1452 movl ARG0,%edx /* get io port number */
1453 movl ARG1,%esi /* get data address */
1454 movl ARG2,%ecx /* get count */
1458 movl %eax,%esi /* restore register */
1464 * void linb(unsigned char *io_port,
1465 * unsigned char *data,
1466 * unsigned int count)
1468 * Input an array of bytes from an IO port.
1474 movl %edi,%eax /* save register */
1475 movl ARG0,%edx /* get io port number */
1476 movl ARG1,%edi /* get data address */
1477 movl ARG2,%ecx /* get count */
1481 movl %eax,%edi /* restore register */
1487 * void linw(unsigned short *io_port,
1488 * unsigned short *data,
1489 * unsigned int count)
1491 * Input an array of shorts from an IO port.
1497 movl %edi,%eax /* save register */
1498 movl ARG0,%edx /* get io port number */
1499 movl ARG1,%edi /* get data address */
1500 movl ARG2,%ecx /* get count */
1504 movl %eax,%edi /* restore register */
1510 * void linl(unsigned short io_port,
1511 * unsigned int *data,
1512 * unsigned int count)
1514 * Input an array of longs from an IO port.
1520 movl %edi,%eax /* save register */
1521 movl ARG0,%edx /* get io port number */
1522 movl ARG1,%edi /* get data address */
1523 movl ARG2,%ecx /* get count */
1527 movl %eax,%edi /* restore register */
1532 * int rdmsr_carefully(uint32_t msr, uint32_t *lo, uint32_t *hi)
1534 ENTRY(rdmsr_carefully)
1551 * Done with recovery table.
1591 lidt null_idtr /* disable the interrupt handler */
1592 xor %ecx,%ecx /* generate a divide by zero */
1593 div %ecx,%eax /* reboot now */
1594 ret /* this will "never" be executed */
1596 #endif /* SYMMETRY */
1600 * setbit(int bitno, int *s) - set bit in bit string
1603 movl S_ARG0, %ecx /* bit number */
1604 movl S_ARG1, %eax /* address */
1605 btsl %ecx, (%eax) /* set bit */
1609 * clrbit(int bitno, int *s) - clear bit in bit string
1612 movl S_ARG0, %ecx /* bit number */
1613 movl S_ARG1, %eax /* address */
1614 btrl %ecx, (%eax) /* clear bit */
1618 * ffsbit(int *s) - find first set bit in bit string
1621 movl S_ARG0, %ecx /* address */
1622 movl $0, %edx /* base offset */
1624 bsfl (%ecx), %eax /* check argument bits */
1625 jnz 1f /* found bit, return */
1626 addl $4, %ecx /* increment address */
1627 addl $32, %edx /* increment offset */
1628 jmp 0b /* try again */
1630 addl %edx, %eax /* return offset */
1634 * testbit(int nr, volatile void *array)
1636 * Test to see if the bit is set within the bit string
1640 movl S_ARG0,%eax /* Get the bit to test */
1641 movl S_ARG1,%ecx /* get the array string */
1654 * jail: set the EIP to "jail" to block a kernel thread.
1655 * Useful to debug synchronization problems on MPs.
1662 * div_scale(unsigned int dividend,
1663 * unsigned int divisor,
1664 * unsigned int *scale)
1666 * This function returns (dividend << *scale) //divisor where *scale
1667 * is the largest possible value before overflow. This is used in
1668 * computation where precision must be achieved in order to avoid
1669 * floating point usage.
1673 * while (((dividend >> *scale) >= divisor))
1675 * *scale = 32 - *scale;
1676 * return ((dividend << *scale) / divisor);
1680 xorl %ecx, %ecx /* *scale = 0 */
1682 movl ARG0, %edx /* get dividend */
1684 cmpl ARG1, %edx /* if (divisor > dividend) */
1685 jle 1f /* goto 1f */
1686 addl $1, %ecx /* (*scale)++ */
1687 shrdl $1, %edx, %eax /* dividend >> 1 */
1688 shrl $1, %edx /* dividend >> 1 */
1689 jmp 0b /* goto 0b */
1691 divl ARG1 /* (dividend << (32 - *scale)) / divisor */
1692 movl ARG2, %edx /* get scale */
1693 movl $32, (%edx) /* *scale = 32 */
1694 subl %ecx, (%edx) /* *scale -= %ecx */
1700 * mul_scale(unsigned int multiplicand,
1701 * unsigned int multiplier,
1702 * unsigned int *scale)
1704 * This function returns ((multiplicand * multiplier) >> *scale) where
1705 * scale is the largest possible value before overflow. This is used in
1706 * computation where precision must be achieved in order to avoid
1707 * floating point usage.
1711 * while (overflow((multiplicand * multiplier) >> *scale))
1713 * return ((multiplicand * multiplier) >> *scale);
1717 xorl %ecx, %ecx /* *scale = 0 */
1718 movl ARG0, %eax /* get multiplicand */
1719 mull ARG1 /* multiplicand * multiplier */
1721 cmpl $0, %edx /* if (!overflow()) */
1723 addl $1, %ecx /* (*scale)++ */
1724 shrdl $1, %edx, %eax /* (multiplicand * multiplier) >> 1 */
1725 shrl $1, %edx /* (multiplicand * multiplier) >> 1 */
1728 movl ARG2, %edx /* get scale */
1729 movl %ecx, (%edx) /* set *scale */
1736 * Double-fault exception handler task. The last gasp...
1738 Entry(df_task_start)
1739 CCALL1(panic_double_fault, $(T_DOUBLE_FAULT))
1744 * machine-check handler task. The last gasp...
1746 Entry(mc_task_start)
1747 CCALL1(panic_machine_check, $(T_MACHINE_CHECK))
1751 * Compatibility mode's last gasp...
1755 CCALL1(panic_double_fault64, %eax)
1760 CCALL1(panic_machine_check64, %eax)