bsd/dev/i386/dtrace_isa.c

   1 /*
   2  * Copyright (c) 2005-2006 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */
  30 #include <kern/thread.h>
  31 #include <mach/thread_status.h>
  32
  33 typedef x86_saved_state_t savearea_t;
  34
  35 #include <stdarg.h>
  36 #include <string.h>
  37 #include <sys/malloc.h>
  38 #include <sys/time.h>
  39 #include <sys/systm.h>
  40 #include <sys/proc.h>
  41 #include <sys/proc_internal.h>
  42 #include <sys/kauth.h>
  43 #include <sys/dtrace.h>
  44 #include <sys/dtrace_impl.h>
  45 #include <libkern/OSAtomic.h>
  46 #include <kern/thread_call.h>
  47 #include <kern/task.h>
  48 #include <kern/sched_prim.h>
  49 #include <miscfs/devfs/devfs.h>
  50 #include <mach/vm_param.h>
  51 #include <machine/pal_routines.h>
  52 #include <i386/mp.h>
  53
  54 /*
  55  * APPLE NOTE:  The regmap is used to decode which 64bit uregs[] register
  56  * is being accessed when passed the 32bit uregs[] constant (based on
  57  * the reg.d translator file). The dtrace_getreg() is smart enough to handle
  58  * the register mappings.   The register set definitions are the same as
  59  * those used by the fasttrap_getreg code.
  60  */
  61 #include "fasttrap_regset.h"
  62 static const uint8_t regmap[19] = {
  63     REG_GS,             /* GS */
  64     REG_FS,             /* FS */
  65     REG_ES,             /* ES */
  66     REG_DS,             /* DS */
  67     REG_RDI,            /* EDI */
  68     REG_RSI,            /* ESI */
  69     REG_RBP,            /* EBP, REG_FP  */
  70     REG_RSP,            /* ESP */
  71     REG_RBX,            /* EBX */
  72     REG_RDX,            /* EDX, REG_R1  */
  73     REG_RCX,            /* ECX */
  74     REG_RAX,            /* EAX, REG_R0  */
  75     REG_TRAPNO,         /* TRAPNO */
  76     REG_ERR,            /* ERR */
  77     REG_RIP,            /* EIP, REG_PC  */
  78     REG_CS,             /* CS */
  79     REG_RFL,            /* EFL, REG_PS  */
  80     REG_RSP,            /* UESP, REG_SP */
  81     REG_SS              /* SS */
  82 };
  83
  84 extern dtrace_id_t      dtrace_probeid_error;   /* special ERROR probe */
  85
  86 void
  87 dtrace_probe_error(dtrace_state_t *state, dtrace_epid_t epid, int which,
  88     int fltoffs, int fault, uint64_t illval)
  89 {
  90     /*
  91      * For the case of the error probe firing lets
  92      * stash away "illval" here, and special-case retrieving it in DIF_VARIABLE_ARG.
  93      */
  94     state->dts_arg_error_illval = illval;
  95     dtrace_probe( dtrace_probeid_error, (uint64_t)(uintptr_t)state, epid, which, fltoffs, fault );
  96 }
  97
  98 /*
  99  * Atomicity and synchronization
 100  */
 101 void
 102 dtrace_membar_producer(void)
 103 {
 104         __asm__ volatile("sfence");
 105 }
 106
 107 void
 108 dtrace_membar_consumer(void)
 109 {
 110         __asm__ volatile("lfence");
 111 }
 112
 113 /*
 114  * Interrupt manipulation
 115  * XXX dtrace_getipl() can be called from probe context.
 116  */
 117 int
 118 dtrace_getipl(void)
 119 {
 120         /*
 121          * XXX Drat, get_interrupt_level is MACH_KERNEL_PRIVATE
 122          * in osfmk/kern/cpu_data.h
 123          */
 124         /* return get_interrupt_level(); */
 125         return (ml_at_interrupt_context() ? 1: 0);
 126 }
 127
 128 /*
 129  * MP coordination
 130  */
 131 typedef struct xcArg {
 132         processorid_t cpu;
 133         dtrace_xcall_t f;
 134         void *arg;
 135 } xcArg_t;
 136
 137 static void
 138 xcRemote( void *foo )
 139 {
 140         xcArg_t *pArg = (xcArg_t *)foo;
 141
 142         if ( pArg->cpu == CPU->cpu_id || pArg->cpu == DTRACE_CPUALL ) {
 143                 (pArg->f)(pArg->arg);
 144         }
 145 }
 146
 147
 148 /*
 149  * dtrace_xcall() is not called from probe context.
 150  */
 151 void
 152 dtrace_xcall(processorid_t cpu, dtrace_xcall_t f, void *arg)
 153 {
 154         xcArg_t xcArg;
 155
 156         xcArg.cpu = cpu;
 157         xcArg.f = f;
 158         xcArg.arg = arg;
 159
 160         if (cpu == DTRACE_CPUALL) {
 161                 mp_cpus_call (CPUMASK_ALL, SYNC, xcRemote, (void*)&xcArg);
 162         }
 163         else {
 164                 mp_cpus_call (cpu_to_cpumask((cpu_t)cpu), SYNC, xcRemote, (void*)&xcArg);
 165         }
 166 }
 167
 168 /*
 169  * Runtime and ABI
 170  */
 171 uint64_t
 172 dtrace_getreg(struct regs *savearea, uint_t reg)
 173 {
 174         boolean_t is64Bit = proc_is64bit(current_proc());
 175         x86_saved_state_t *regs = (x86_saved_state_t *)savearea;
 176
 177         if (is64Bit) {
 178             if (reg <= SS) {
 179                 reg = regmap[reg];
 180             } else {
 181                 reg -= (SS + 1);
 182             }
 183
 184             switch (reg) {
 185             case REG_RDI:
 186                 return (uint64_t)(regs->ss_64.rdi);
 187             case REG_RSI:
 188                 return (uint64_t)(regs->ss_64.rsi);
 189             case REG_RDX:
 190                 return (uint64_t)(regs->ss_64.rdx);
 191             case REG_RCX:
 192                 return (uint64_t)(regs->ss_64.rcx);
 193             case REG_R8:
 194                 return (uint64_t)(regs->ss_64.r8);
 195             case REG_R9:
 196                 return (uint64_t)(regs->ss_64.r9);
 197             case REG_RAX:
 198                 return (uint64_t)(regs->ss_64.rax);
 199             case REG_RBX:
 200                 return (uint64_t)(regs->ss_64.rbx);
 201             case REG_RBP:
 202                 return (uint64_t)(regs->ss_64.rbp);
 203             case REG_R10:
 204                 return (uint64_t)(regs->ss_64.r10);
 205             case REG_R11:
 206                 return (uint64_t)(regs->ss_64.r11);
 207             case REG_R12:
 208                 return (uint64_t)(regs->ss_64.r12);
 209             case REG_R13:
 210                 return (uint64_t)(regs->ss_64.r13);
 211             case REG_R14:
 212                 return (uint64_t)(regs->ss_64.r14);
 213             case REG_R15:
 214                 return (uint64_t)(regs->ss_64.r15);
 215             case REG_FS:
 216                 return (uint64_t)(regs->ss_64.fs);
 217             case REG_GS:
 218                 return (uint64_t)(regs->ss_64.gs);
 219             case REG_TRAPNO:
 220                 return (uint64_t)(regs->ss_64.isf.trapno);
 221             case REG_ERR:
 222                 return (uint64_t)(regs->ss_64.isf.err);
 223             case REG_RIP:
 224                 return (uint64_t)(regs->ss_64.isf.rip);
 225             case REG_CS:
 226                 return (uint64_t)(regs->ss_64.isf.cs);
 227             case REG_SS:
 228                 return (uint64_t)(regs->ss_64.isf.ss);
 229             case REG_RFL:
 230                 return (uint64_t)(regs->ss_64.isf.rflags);
 231             case REG_RSP:
 232                 return (uint64_t)(regs->ss_64.isf.rsp);
 233             case REG_DS:
 234             case REG_ES:
 235             default:
 236                 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
 237                 return (0);
 238             }
 239
 240         } else {   /* is 32bit user */
 241                 /* beyond register SS */
 242                 if (reg > x86_SAVED_STATE32_COUNT - 1) {
 243                         DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
 244                         return (0);
 245                 }
 246                 return (uint64_t)((unsigned int *)(&(regs->ss_32.gs)))[reg];
 247         }
 248 }
 249
 250 #define RETURN_OFFSET 4
 251 #define RETURN_OFFSET64 8
 252
 253 static int
 254 dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, user_addr_t pc,
 255     user_addr_t sp)
 256 {
 257 #if 0
 258         volatile uint16_t *flags =
 259             (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
 260
 261         uintptr_t oldcontext = lwp->lwp_oldcontext; /* XXX signal stack crawl */
 262         size_t s1, s2;
 263 #endif
 264         int ret = 0;
 265         boolean_t is64Bit = proc_is64bit(current_proc());
 266
 267         ASSERT(pcstack == NULL || pcstack_limit > 0);
 268
 269 #if 0 /* XXX signal stack crawl */
 270         if (p->p_model == DATAMODEL_NATIVE) {
 271                 s1 = sizeof (struct frame) + 2 * sizeof (long);
 272                 s2 = s1 + sizeof (siginfo_t);
 273         } else {
 274                 s1 = sizeof (struct frame32) + 3 * sizeof (int);
 275                 s2 = s1 + sizeof (siginfo32_t);
 276         }
 277 #endif
 278
 279         while (pc != 0) {
 280                 ret++;
 281                 if (pcstack != NULL) {
 282                         *pcstack++ = (uint64_t)pc;
 283                         pcstack_limit--;
 284                         if (pcstack_limit <= 0)
 285                                 break;
 286                 }
 287
 288                 if (sp == 0)
 289                         break;
 290
 291 #if 0 /* XXX signal stack crawl */
 292                 if (oldcontext == sp + s1 || oldcontext == sp + s2) {
 293                         if (p->p_model == DATAMODEL_NATIVE) {
 294                                 ucontext_t *ucp = (ucontext_t *)oldcontext;
 295                                 greg_t *gregs = ucp->uc_mcontext.gregs;
 296
 297                                 sp = dtrace_fulword(&gregs[REG_FP]);
 298                                 pc = dtrace_fulword(&gregs[REG_PC]);
 299
 300                                 oldcontext = dtrace_fulword(&ucp->uc_link);
 301                         } else {
 302                                 ucontext32_t *ucp = (ucontext32_t *)oldcontext;
 303                                 greg32_t *gregs = ucp->uc_mcontext.gregs;
 304
 305                                 sp = dtrace_fuword32(&gregs[EBP]);
 306                                 pc = dtrace_fuword32(&gregs[EIP]);
 307
 308                                 oldcontext = dtrace_fuword32(&ucp->uc_link);
 309                         }
 310                 }
 311                 else
 312 #endif
 313                 {
 314                         if (is64Bit) {
 315                                 pc = dtrace_fuword64((sp + RETURN_OFFSET64));
 316                                 sp = dtrace_fuword64(sp);
 317                         } else {
 318                                 pc = dtrace_fuword32((sp + RETURN_OFFSET));
 319                                 sp = dtrace_fuword32(sp);
 320                         }
 321                 }
 322
 323 #if 0 /* XXX */
 324                 /*
 325                  * This is totally bogus:  if we faulted, we're going to clear
 326                  * the fault and break.  This is to deal with the apparently
 327                  * broken Java stacks on x86.
 328                  */
 329                 if (*flags & CPU_DTRACE_FAULT) {
 330                         *flags &= ~CPU_DTRACE_FAULT;
 331                         break;
 332                 }
 333 #endif
 334         }
 335
 336         return (ret);
 337 }
 338
 339
 340 /*
 341  * The return value indicates if we've modified the stack.
 342  */
 343 static int
 344 dtrace_adjust_stack(uint64_t **pcstack, int *pcstack_limit, user_addr_t *pc,
 345                     user_addr_t sp)
 346 {
 347     int64_t missing_tos;
 348     int rc = 0;
 349     boolean_t is64Bit = proc_is64bit(current_proc());
 350
 351     ASSERT(pc != NULL);
 352
 353     if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
 354         /*
 355          * If we found ourselves in an entry probe, the frame pointer has not
 356          * yet been pushed (that happens in the
 357          * function prologue).  The best approach is to
 358          * add the current pc as a missing top of stack,
 359          * and back the pc up to the caller, which is stored  at the
 360          * current stack pointer address since the call
 361          * instruction puts it there right before
 362          * the branch.
 363          */
 364
 365         missing_tos = *pc;
 366
 367         if (is64Bit)
 368             *pc = dtrace_fuword64(sp);
 369         else
 370             *pc = dtrace_fuword32(sp);
 371     } else {
 372         /*
 373          * We might have a top of stack override, in which case we just
 374          * add that frame without question to the top.  This
 375          * happens in return probes where you have a valid
 376          * frame pointer, but it's for the callers frame
 377          * and you'd like to add the pc of the return site
 378          * to the frame.
 379          */
 380         missing_tos = cpu_core[CPU->cpu_id].cpuc_missing_tos;
 381     }
 382
 383     if (missing_tos != 0) {
 384         if (pcstack != NULL && pcstack_limit != NULL) {
 385             /*
 386              * If the missing top of stack has been filled out, then
 387              * we add it and adjust the size.
 388              */
 389             *(*pcstack)++ = missing_tos;
 390             (*pcstack_limit)--;
 391         }
 392         /*
 393          * return 1 because we would have changed the
 394          * stack whether or not it was passed in.  This
 395          * ensures the stack count is correct
 396          */
 397          rc = 1;
 398     }
 399     return rc;
 400 }
 401
 402 void
 403 dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit)
 404 {
 405         thread_t thread = current_thread();
 406         x86_saved_state_t *regs;
 407         user_addr_t pc, sp, fp;
 408         volatile uint16_t *flags =
 409             (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
 410         int n;
 411         boolean_t is64Bit = proc_is64bit(current_proc());
 412
 413         if (*flags & CPU_DTRACE_FAULT)
 414                 return;
 415
 416         if (pcstack_limit <= 0)
 417                 return;
 418
 419         /*
 420          * If there's no user context we still need to zero the stack.
 421          */
 422         if (thread == NULL)
 423                 goto zero;
 424
 425         pal_register_cache_state(thread, VALID);
 426         regs = (x86_saved_state_t *)find_user_regs(thread);
 427         if (regs == NULL)
 428                 goto zero;
 429
 430         *pcstack++ = (uint64_t)proc_selfpid();
 431         pcstack_limit--;
 432
 433         if (pcstack_limit <= 0)
 434                 return;
 435
 436         if (is64Bit) {
 437                 pc = regs->ss_64.isf.rip;
 438                 sp = regs->ss_64.isf.rsp;
 439                 fp = regs->ss_64.rbp;
 440         } else {
 441                 pc = regs->ss_32.eip;
 442                 sp = regs->ss_32.uesp;
 443                 fp = regs->ss_32.ebp;
 444         }
 445
 446         /*
 447          * The return value indicates if we've modified the stack.
 448          * Since there is nothing else to fix up in either case,
 449          * we can safely ignore it here.
 450          */
 451         (void)dtrace_adjust_stack(&pcstack, &pcstack_limit, &pc, sp);
 452
 453         if(pcstack_limit <= 0)
 454             return;
 455
 456         /*
 457          * Note that unlike ppc, the x86 code does not use
 458          * CPU_DTRACE_USTACK_FP. This is because x86 always
 459          * traces from the fp, even in syscall/profile/fbt
 460          * providers.
 461          */
 462         n = dtrace_getustack_common(pcstack, pcstack_limit, pc, fp);
 463         ASSERT(n >= 0);
 464         ASSERT(n <= pcstack_limit);
 465
 466         pcstack += n;
 467         pcstack_limit -= n;
 468
 469 zero:
 470         while (pcstack_limit-- > 0)
 471                 *pcstack++ = 0;
 472 }
 473
 474 int
 475 dtrace_getustackdepth(void)
 476 {
 477         thread_t thread = current_thread();
 478         x86_saved_state_t *regs;
 479         user_addr_t pc, sp, fp;
 480         int n = 0;
 481         boolean_t is64Bit = proc_is64bit(current_proc());
 482
 483         if (thread == NULL)
 484                 return 0;
 485
 486         if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT))
 487                 return (-1);
 488
 489         pal_register_cache_state(thread, VALID);
 490         regs = (x86_saved_state_t *)find_user_regs(thread);
 491         if (regs == NULL)
 492                 return 0;
 493
 494         if (is64Bit) {
 495                 pc = regs->ss_64.isf.rip;
 496                 sp = regs->ss_64.isf.rsp;
 497                 fp = regs->ss_64.rbp;
 498         } else {
 499                 pc = regs->ss_32.eip;
 500                 sp = regs->ss_32.uesp;
 501                 fp = regs->ss_32.ebp;
 502         }
 503
 504         if (dtrace_adjust_stack(NULL, NULL, &pc, sp) == 1) {
 505             /*
 506              * we would have adjusted the stack if we had
 507              * supplied one (that is what rc == 1 means).
 508              * Also, as a side effect, the pc might have
 509              * been fixed up, which is good for calling
 510              * in to dtrace_getustack_common.
 511              */
 512             n++;
 513         }
 514
 515         /*
 516          * Note that unlike ppc, the x86 code does not use
 517          * CPU_DTRACE_USTACK_FP. This is because x86 always
 518          * traces from the fp, even in syscall/profile/fbt
 519          * providers.
 520          */
 521
 522         n += dtrace_getustack_common(NULL, 0, pc, fp);
 523
 524         return (n);
 525 }
 526
 527 void
 528 dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit)
 529 {
 530         thread_t thread = current_thread();
 531         savearea_t *regs;
 532         user_addr_t pc, sp;
 533         volatile uint16_t *flags =
 534             (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
 535 #if 0
 536         uintptr_t oldcontext;
 537         size_t s1, s2;
 538 #endif
 539         boolean_t is64Bit = proc_is64bit(current_proc());
 540
 541         if (*flags & CPU_DTRACE_FAULT)
 542                 return;
 543
 544         if (pcstack_limit <= 0)
 545                 return;
 546
 547         /*
 548          * If there's no user context we still need to zero the stack.
 549          */
 550         if (thread == NULL)
 551                 goto zero;
 552
 553         regs = (savearea_t *)find_user_regs(thread);
 554         if (regs == NULL)
 555                 goto zero;
 556
 557         *pcstack++ = (uint64_t)proc_selfpid();
 558         pcstack_limit--;
 559
 560         if (pcstack_limit <= 0)
 561                 return;
 562
 563         pc = regs->ss_32.eip;
 564         sp = regs->ss_32.ebp;
 565
 566 #if 0 /* XXX signal stack crawl */
 567         oldcontext = lwp->lwp_oldcontext;
 568
 569         if (p->p_model == DATAMODEL_NATIVE) {
 570                 s1 = sizeof (struct frame) + 2 * sizeof (long);
 571                 s2 = s1 + sizeof (siginfo_t);
 572         } else {
 573                 s1 = sizeof (struct frame32) + 3 * sizeof (int);
 574                 s2 = s1 + sizeof (siginfo32_t);
 575         }
 576 #endif
 577
 578         if(dtrace_adjust_stack(&pcstack, &pcstack_limit, &pc, sp) == 1) {
 579             /*
 580              * we made a change.
 581              */
 582             *fpstack++ = 0;
 583             if (pcstack_limit <= 0)
 584                 return;
 585         }
 586
 587         while (pc != 0) {
 588                 *pcstack++ = (uint64_t)pc;
 589                 *fpstack++ = sp;
 590                 pcstack_limit--;
 591                 if (pcstack_limit <= 0)
 592                         break;
 593
 594                 if (sp == 0)
 595                         break;
 596
 597 #if 0 /* XXX signal stack crawl */
 598                 if (oldcontext == sp + s1 || oldcontext == sp + s2) {
 599                         if (p->p_model == DATAMODEL_NATIVE) {
 600                                 ucontext_t *ucp = (ucontext_t *)oldcontext;
 601                                 greg_t *gregs = ucp->uc_mcontext.gregs;
 602
 603                                 sp = dtrace_fulword(&gregs[REG_FP]);
 604                                 pc = dtrace_fulword(&gregs[REG_PC]);
 605
 606                                 oldcontext = dtrace_fulword(&ucp->uc_link);
 607                         } else {
 608                                 ucontext_t *ucp = (ucontext_t *)oldcontext;
 609                                 greg_t *gregs = ucp->uc_mcontext.gregs;
 610
 611                                 sp = dtrace_fuword32(&gregs[EBP]);
 612                                 pc = dtrace_fuword32(&gregs[EIP]);
 613
 614                                 oldcontext = dtrace_fuword32(&ucp->uc_link);
 615                         }
 616                 }
 617                 else
 618 #endif
 619                 {
 620                         if (is64Bit) {
 621                                 pc = dtrace_fuword64((sp + RETURN_OFFSET64));
 622                                 sp = dtrace_fuword64(sp);
 623                         } else {
 624                                 pc = dtrace_fuword32((sp + RETURN_OFFSET));
 625                                 sp = dtrace_fuword32(sp);
 626                         }
 627                 }
 628
 629 #if 0 /* XXX */
 630                 /*
 631                  * This is totally bogus:  if we faulted, we're going to clear
 632                  * the fault and break.  This is to deal with the apparently
 633                  * broken Java stacks on x86.
 634                  */
 635                 if (*flags & CPU_DTRACE_FAULT) {
 636                         *flags &= ~CPU_DTRACE_FAULT;
 637                         break;
 638                 }
 639 #endif
 640         }
 641
 642 zero:
 643         while (pcstack_limit-- > 0)
 644                 *pcstack++ = 0;
 645 }
 646
 647 void
 648 dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes,
 649                   uint32_t *intrpc)
 650 {
 651         struct frame *fp = (struct frame *)__builtin_frame_address(0);
 652         struct frame *nextfp, *minfp, *stacktop;
 653         int depth = 0;
 654         int last = 0;
 655         uintptr_t pc;
 656         uintptr_t caller = CPU->cpu_dtrace_caller;
 657         int on_intr;
 658
 659         if ((on_intr = CPU_ON_INTR(CPU)) != 0)
 660                 stacktop = (struct frame *)dtrace_get_cpu_int_stack_top();
 661         else
 662                 stacktop = (struct frame *)(dtrace_get_kernel_stack(current_thread()) + kernel_stack_size);
 663
 664         minfp = fp;
 665
 666         aframes++;
 667
 668         if (intrpc != NULL && depth < pcstack_limit)
 669                 pcstack[depth++] = (pc_t)intrpc;
 670
 671         while (depth < pcstack_limit) {
 672                 nextfp = *(struct frame **)fp;
 673 #if defined(__x86_64__)
 674                 pc = *(uintptr_t *)(((uintptr_t)fp) + RETURN_OFFSET64);
 675 #else
 676                 pc = *(uintptr_t *)(((uintptr_t)fp) + RETURN_OFFSET);
 677 #endif
 678
 679                 if (nextfp <= minfp || nextfp >= stacktop) {
 680                         if (on_intr) {
 681                                 /*
 682                                  * Hop from interrupt stack to thread stack.
 683                                  */
 684                                 vm_offset_t kstack_base = dtrace_get_kernel_stack(current_thread());
 685
 686                                 minfp = (struct frame *)kstack_base;
 687                                 stacktop = (struct frame *)(kstack_base + kernel_stack_size);
 688
 689                                 on_intr = 0;
 690                                 continue;
 691                         }
 692                         /*
 693                          * This is the last frame we can process; indicate
 694                          * that we should return after processing this frame.
 695                          */
 696                         last = 1;
 697                 }
 698
 699                 if (aframes > 0) {
 700                         if (--aframes == 0 && caller != 0) {
 701                                 /*
 702                                  * We've just run out of artificial frames,
 703                                  * and we have a valid caller -- fill it in
 704                                  * now.
 705                                  */
 706                                 ASSERT(depth < pcstack_limit);
 707                                 pcstack[depth++] = (pc_t)caller;
 708                                 caller = 0;
 709                         }
 710                 } else {
 711                         if (depth < pcstack_limit)
 712                                 pcstack[depth++] = (pc_t)pc;
 713                 }
 714
 715                 if (last) {
 716                         while (depth < pcstack_limit)
 717                                 pcstack[depth++] = 0;
 718                         return;
 719                 }
 720
 721                 fp = nextfp;
 722                 minfp = fp;
 723         }
 724 }
 725
 726 struct frame {
 727         struct frame *backchain;
 728         uintptr_t retaddr;
 729 };
 730
 731 uint64_t
 732 dtrace_getarg(int arg, int aframes)
 733 {
 734         uint64_t val;
 735         struct frame *fp = (struct frame *)__builtin_frame_address(0);
 736         uintptr_t *stack;
 737         uintptr_t pc;
 738         int i;
 739
 740
 741 #if defined(__x86_64__)
 742     /*
 743      * A total of 6 arguments are passed via registers; any argument with
 744      * index of 5 or lower is therefore in a register.
 745      */
 746     int inreg = 5;
 747 #endif
 748
 749         for (i = 1; i <= aframes; i++) {
 750                 fp = fp->backchain;
 751                 pc = fp->retaddr;
 752
 753                 if (dtrace_invop_callsite_pre != NULL
 754                         && pc  >  (uintptr_t)dtrace_invop_callsite_pre
 755                         && pc  <= (uintptr_t)dtrace_invop_callsite_post) {
 756 #if defined(__i386__)
 757                         /*
 758                          * If we pass through the invalid op handler, we will
 759                          * use the pointer that it passed to the stack as the
 760                          * second argument to dtrace_invop() as the pointer to
 761                          * the frame we're hunting for.
 762                          */
 763
 764                         stack = (uintptr_t *)&fp[1]; /* Find marshalled arguments */
 765                         fp = (struct frame *)stack[1]; /* Grab *second* argument */
 766                         stack = (uintptr_t *)&fp[1]; /* Find marshalled arguments */
 767 #elif defined(__x86_64__)
 768                         /*
 769                          * In the case of x86_64, we will use the pointer to the
 770                          * save area structure that was pushed when we took the
 771                          * trap.  To get this structure, we must increment
 772                          * beyond the frame structure. If the
 773                          * argument that we're seeking is passed on the stack,
 774                          * we'll pull the true stack pointer out of the saved
 775                          * registers and decrement our argument by the number
 776                          * of arguments passed in registers; if the argument
 777                          * we're seeking is passed in regsiters, we can just
 778                          * load it directly.
 779                          */
 780
 781                         /* fp points to frame of dtrace_invop() activation. */
 782                         fp = fp->backchain; /* to fbt_perfcallback() activation. */
 783                         fp = fp->backchain; /* to kernel_trap() activation. */
 784                         fp = fp->backchain; /* to trap_from_kernel() activation. */
 785
 786                         x86_saved_state_t   *tagged_regs = (x86_saved_state_t *)&fp[1];
 787                         x86_saved_state64_t *saved_state = saved_state64(tagged_regs);
 788
 789                         if (arg <= inreg) {
 790                                 stack = (uintptr_t *)&saved_state->rdi;
 791                         } else {
 792                                 fp = (struct frame *)(saved_state->isf.rsp);
 793                                 stack = (uintptr_t *)&fp[1]; /* Find marshalled
 794                                                                 arguments */
 795                                 arg -= inreg + 1;
 796                         }
 797 #else
 798 #error Unknown arch
 799 #endif
 800                         goto load;
 801                 }
 802         }
 803
 804         /*
 805          * We know that we did not come through a trap to get into
 806          * dtrace_probe() --  We arrive here when the provider has
 807          * called dtrace_probe() directly.
 808          * The probe ID is the first argument to dtrace_probe().
 809          * We must advance beyond that to get the argX.
 810          */
 811         arg++; /* Advance past probeID */
 812
 813 #if defined(__x86_64__)
 814         if (arg <= inreg) {
 815                 /*
 816                  * This shouldn't happen.  If the argument is passed in a
 817                  * register then it should have been, well, passed in a
 818                  * register...
 819                  */
 820                 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
 821                 return (0);
 822         }
 823
 824         arg -= (inreg + 1);
 825 #endif
 826         stack = (uintptr_t *)&fp[1]; /* Find marshalled arguments */
 827
 828 load:
 829         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 830         /* dtrace_probe arguments arg0 ... arg4 are 64bits wide */
 831         val = (uint64_t)(*(((uintptr_t *)stack) + arg));
 832         DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 833
 834         return (val);
 835 }
 836
 837 /*
 838  * Load/Store Safety
 839  */
 840 void
 841 dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit))
 842 {
 843         /*
 844          * "base" is the smallest toxic address in the range, "limit" is the first
 845          * VALID address greater than "base".
 846          */
 847         func(0x0, VM_MIN_KERNEL_AND_KEXT_ADDRESS);
 848         if (VM_MAX_KERNEL_ADDRESS < ~(uintptr_t)0)
 849                         func(VM_MAX_KERNEL_ADDRESS + 1, ~(uintptr_t)0);
 850 }
 851