]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/kern/kern_time.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / bsd / kern / kern_time.c
index d41be2383098022f463968f454ac56b714e0f9fb..f678647974b60a62f20fb192d80740f25cc31de4 100644 (file)
@@ -1,23 +1,29 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
- * @APPLE_LICENSE_HEADER_START@
- * 
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License").  You may not use this file except in compliance with the
- * License.  Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
- * 
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
- * License for the specific language governing rights and limitations
- * under the License.
- * 
- * @APPLE_LICENSE_HEADER_END@
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
 /*
  *
  *     @(#)kern_time.c 8.4 (Berkeley) 5/26/95
  */
+/*
+ * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
+ * support for mandatory and extensible security protections.  This notice
+ * is included in support of clause 2.2 (b) of the Apple Public License,
+ * Version 2.0.
+ */
 
 #include <sys/param.h>
 #include <sys/resourcevar.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
-#include <sys/proc.h>
+#include <sys/proc_internal.h>
+#include <sys/kauth.h>
 #include <sys/vnode.h>
+#include <sys/time.h>
+#include <sys/priv.h>
 
-#include <sys/mount.h>
+#include <sys/mount_internal.h>
+#include <sys/sysproto.h>
+#include <sys/signalvar.h>
+#include <sys/protosw.h> /* for net_uptime2timeval() */
 
 #include <kern/clock.h>
+#include <kern/task.h>
+#include <kern/thread_call.h>
+#if CONFIG_MACF
+#include <security/mac_framework.h>
+#endif
+#include <IOKit/IOBSD.h>
+#include <sys/time.h>
+#include <kern/remote_time.h>
 
-#define HZ     100     /* XXX */
+#define HZ      100     /* XXX */
 
-volatile struct timeval                time;
 /* simple lock used to access timezone, tz structure */
-decl_simple_lock_data(, tz_slock);
-/* 
+static LCK_GRP_DECLARE(tz_slock_grp, "tzlock");
+static LCK_SPIN_DECLARE(tz_slock, &tz_slock_grp);
+
+static void             setthetime(
+       struct timeval  *tv);
+
+static boolean_t timeval_fixusec(struct timeval *t1);
+
+/*
  * Time of day and interval timer support.
  *
  * These routines provide the kernel entry points to get and set
@@ -80,195 +112,179 @@ decl_simple_lock_data(, tz_slock);
  * and decrementing interval timers, optionally reloading the interval
  * timers when they expire.
  */
-struct gettimeofday_args{
-       struct timeval *tp;
-       struct timezone *tzp;
-};
 /* ARGSUSED */
 int
-gettimeofday(p, uap, retval)
-       struct proc *p;
-       register struct gettimeofday_args *uap;
-       register_t *retval;
+gettimeofday(
+       struct proc     *p,
+       struct gettimeofday_args *uap,
+       __unused int32_t *retval)
 {
-       struct timeval atv;
        int error = 0;
-       extern simple_lock_data_t tz_slock;
        struct timezone ltz; /* local copy */
+       clock_sec_t secs;
+       clock_usec_t usecs;
+       uint64_t mach_time;
 
-/*  NOTE THIS implementation is for non ppc architectures only */
+       if (uap->tp || uap->mach_absolute_time) {
+               clock_gettimeofday_and_absolute_time(&secs, &usecs, &mach_time);
+       }
 
        if (uap->tp) {
-               microtime(&atv);
-               if (error = copyout((caddr_t)&atv, (caddr_t)uap->tp,
-                       sizeof (atv)))
-                       return(error);
+               /* Casting secs through a uint32_t to match arm64 commpage */
+               if (IS_64BIT_PROCESS(p)) {
+                       struct user64_timeval user_atv = {};
+                       user_atv.tv_sec = (uint32_t)secs;
+                       user_atv.tv_usec = usecs;
+                       error = copyout(&user_atv, uap->tp, sizeof(user_atv));
+               } else {
+                       struct user32_timeval user_atv = {};
+                       user_atv.tv_sec = (uint32_t)secs;
+                       user_atv.tv_usec = usecs;
+                       error = copyout(&user_atv, uap->tp, sizeof(user_atv));
+               }
+               if (error) {
+                       return error;
+               }
        }
-       
+
        if (uap->tzp) {
-               usimple_lock(&tz_slock);
+               lck_spin_lock(&tz_slock);
                ltz = tz;
-               usimple_unlock(&tz_slock);
-               error = copyout((caddr_t)&ltz, (caddr_t)uap->tzp,
-                   sizeof (tz));
+               lck_spin_unlock(&tz_slock);
+
+               error = copyout((caddr_t)&ltz, CAST_USER_ADDR_T(uap->tzp), sizeof(tz));
        }
 
-       return(error);
+       if (error == 0 && uap->mach_absolute_time) {
+               error = copyout(&mach_time, uap->mach_absolute_time, sizeof(mach_time));
+       }
+
+       return error;
 }
 
-struct settimeofday_args {
-       struct timeval *tv;
-       struct timezone *tzp;
-};
+/*
+ * XXX Y2038 bug because of setthetime() argument
+ */
 /* ARGSUSED */
 int
-settimeofday(p, uap, retval)
-       struct proc *p;
-       struct settimeofday_args  *uap;
-       register_t *retval;
+settimeofday(__unused struct proc *p, struct settimeofday_args  *uap, __unused int32_t *retval)
 {
        struct timeval atv;
        struct timezone atz;
-       int error, s;
-       extern simple_lock_data_t tz_slock;
-
-       if (error = suser(p->p_ucred, &p->p_acflag))
-               return (error);
-       /* Verify all parameters before changing time. */
-       if (uap->tv && (error = copyin((caddr_t)uap->tv,
-           (caddr_t)&atv, sizeof(atv))))
-               return (error);
-       if (uap->tzp && (error = copyin((caddr_t)uap->tzp,
-           (caddr_t)&atz, sizeof(atz))))
-               return (error);
-       if (uap->tv)
+       int error;
+
+       bzero(&atv, sizeof(atv));
+
+       /* Check that this task is entitled to set the time or it is root */
+       if (!IOTaskHasEntitlement(current_task(), SETTIME_ENTITLEMENT)) {
+#if CONFIG_MACF
+               error = mac_system_check_settime(kauth_cred_get());
+               if (error) {
+                       return error;
+               }
+#endif
+#if defined(XNU_TARGET_OS_OSX)
+               if ((error = suser(kauth_cred_get(), &p->p_acflag))) {
+                       return error;
+               }
+#endif
+       }
+
+       /* Verify all parameters before changing time */
+       if (uap->tv) {
+               if (IS_64BIT_PROCESS(p)) {
+                       struct user64_timeval user_atv;
+                       error = copyin(uap->tv, &user_atv, sizeof(user_atv));
+                       atv.tv_sec = (__darwin_time_t)user_atv.tv_sec;
+                       atv.tv_usec = user_atv.tv_usec;
+               } else {
+                       struct user32_timeval user_atv;
+                       error = copyin(uap->tv, &user_atv, sizeof(user_atv));
+                       atv.tv_sec = user_atv.tv_sec;
+                       atv.tv_usec = user_atv.tv_usec;
+               }
+               if (error) {
+                       return error;
+               }
+       }
+       if (uap->tzp && (error = copyin(uap->tzp, (caddr_t)&atz, sizeof(atz)))) {
+               return error;
+       }
+       if (uap->tv) {
+               /* only positive values of sec/usec are accepted */
+               if (atv.tv_sec < 0 || atv.tv_usec < 0) {
+                       return EPERM;
+               }
+               if (!timeval_fixusec(&atv)) {
+                       return EPERM;
+               }
                setthetime(&atv);
+       }
        if (uap->tzp) {
-               usimple_lock(&tz_slock);
+               lck_spin_lock(&tz_slock);
                tz = atz;
-               usimple_unlock(&tz_slock);
+               lck_spin_unlock(&tz_slock);
        }
-       return (0);
-}
-
-setthetime(tv)
-       struct timeval *tv;
-{
-       long delta = tv->tv_sec - time.tv_sec;
-       mach_timespec_t now;
-
-       now.tv_sec = tv->tv_sec;
-       now.tv_nsec = tv->tv_usec * NSEC_PER_USEC;
-
-       clock_set_calendar_value(now);
-       boottime.tv_sec += delta;
-#if NFSCLIENT || NFSSERVER
-       lease_updatetime(delta);
-#endif
+       return 0;
 }
 
-#define tickadj                (40 * NSEC_PER_USEC)    /* "standard" skew, ns / 10 ms */
-#define        bigadj          (1 * NSEC_PER_SEC)              /* use 10x skew above bigadj ns */
-
-struct adjtime_args {
-       struct timeval *delta;
-       struct timeval *olddelta;
-};
-/* ARGSUSED */
-int
-adjtime(p, uap, retval)
-       struct proc *p;
-       register struct adjtime_args *uap;
-       register_t *retval;
+static void
+setthetime(
+       struct timeval  *tv)
 {
-       struct timeval atv;
-       int64_t total;
-       uint32_t delta;
-       int error;
-
-       if (error = suser(p->p_ucred, &p->p_acflag))
-               return (error);
-       if (error = copyin((caddr_t)uap->delta,
-                                                       (caddr_t)&atv, sizeof (struct timeval)))
-               return (error);
-               
-    /*
-     * Compute the total correction and the rate at which to apply it.
-     */
-       total = (int64_t)atv.tv_sec * NSEC_PER_SEC + atv.tv_usec * NSEC_PER_USEC;
-       if (total > bigadj || total < -bigadj)
-               delta = 10 * tickadj;
-       else
-               delta = tickadj;
-
-       total = clock_set_calendar_adjtime(total, delta);
-
-       if (uap->olddelta) {
-               atv.tv_sec = total / NSEC_PER_SEC;
-               atv.tv_usec = (total / NSEC_PER_USEC) % USEC_PER_SEC;
-               (void) copyout((caddr_t)&atv,
-                                                       (caddr_t)uap->olddelta, sizeof (struct timeval));
-       }
-
-       return (0);
+       clock_set_calendar_microtime(tv->tv_sec, tv->tv_usec);
 }
 
 /*
- * Initialze the time of day register. 
- * Trust the RTC except for the case where it is set before 
- * the UNIX epoch. In that case use the the UNIX epoch.
- * The argument passed in is ignored.
+ *     Verify the calendar value.  If negative,
+ *     reset to zero (the epoch).
  */
 void
-inittodr(base)
-       time_t base;
+inittodr(
+       __unused time_t base)
 {
+       struct timeval  tv;
+
        /*
         * Assertion:
         * The calendar has already been
-        * set up from the battery clock.
+        * set up from the platform clock.
         *
         * The value returned by microtime()
         * is gotten from the calendar.
         */
-       microtime(&time);
-
-       /*
-        * This variable still exists to keep
-        * 'w' happy.  It should only be considered
-        * an approximation.
-        */
-       boottime.tv_sec = time.tv_sec;
-       boottime.tv_usec = 0;
+       microtime(&tv);
 
-       /*
-        * If the RTC does not have acceptable value, i.e. time before
-        * the UNIX epoch, set it to the UNIX epoch
-        */
-       if (time.tv_sec < 0) {
-               printf ("WARNING: preposterous time in Real Time Clock");
-               time.tv_sec = 0;        /* the UNIX epoch */
-               time.tv_usec = 0;
-               setthetime(&time);
-               boottime = time;
+       if (tv.tv_sec < 0 || tv.tv_usec < 0) {
+               printf("WARNING: preposterous time in Real Time Clock");
+               tv.tv_sec = 0;          /* the UNIX epoch */
+               tv.tv_usec = 0;
+               setthetime(&tv);
                printf(" -- CHECK AND RESET THE DATE!\n");
        }
+}
 
-       return;
+time_t
+boottime_sec(void)
+{
+       clock_sec_t             secs;
+       clock_nsec_t    nanosecs;
+
+       clock_get_boottime_nanotime(&secs, &nanosecs);
+       return secs;
 }
 
-void   timevaladd(
-                       struct timeval  *t1,
-                       struct timeval  *t2);
-void   timevalsub(
-                       struct timeval  *t1,
-                       struct timeval  *t2);
-void   timevalfix(
-                       struct timeval  *t1);
+void
+boottime_timeval(struct timeval *tv)
+{
+       clock_sec_t             secs;
+       clock_usec_t    microsecs;
 
-uint64_t
-               tvtoabstime(
-                       struct timeval  *tvp);
+       clock_get_boottime_microtime(&secs, &microsecs);
+
+       tv->tv_sec = secs;
+       tv->tv_usec = microsecs;
+}
 
 /*
  * Get value of an interval timer.  The process virtual and
@@ -279,33 +295,34 @@ uint64_t
  * is kept as an absolute time rather than as a delta, so that
  * it is easy to keep periodic real-time signals from drifting.
  *
- * Virtual time timers are processed in the hardclock() routine of
- * kern_clock.c.  The real time timer is processed by a callout
- * routine.  Since a callout may be delayed in real time due to
+ * The real time timer is processed by a callout routine.
+ * Since a callout may be delayed in real time due to
  * other processing in the system, it is possible for the real
  * time callout routine (realitexpire, given below), to be delayed
  * in real time past when it is supposed to occur.  It does not
  * suffice, therefore, to reload the real time .it_value from the
  * real time .it_interval.  Rather, we compute the next time in
  * absolute time when the timer should go off.
+ *
+ * Returns:    0                       Success
+ *             EINVAL                  Invalid argument
+ *     copyout:EFAULT                  Bad address
  */
-struct getitimer_args {
-       u_int   which;
-       struct itimerval *itv;
-}; 
 /* ARGSUSED */
 int
-getitimer(p, uap, retval)
-       struct proc *p;
-       register struct getitimer_args *uap;
-       register_t *retval;
+getitimer(struct proc *p, struct getitimer_args *uap, __unused int32_t *retval)
 {
        struct itimerval aitv;
 
-       if (uap->which > ITIMER_PROF)
-               return(EINVAL);
-       if (uap->which == ITIMER_REAL) {
+       if (uap->which > ITIMER_PROF) {
+               return EINVAL;
+       }
+
+       bzero(&aitv, sizeof(aitv));
+
+       proc_spinlock(p);
+       switch (uap->which) {
+       case ITIMER_REAL:
                /*
                 * If time for real time timer has passed return 0,
                 * else return difference between current time and
@@ -313,72 +330,150 @@ getitimer(p, uap, retval)
                 */
                aitv = p->p_realtimer;
                if (timerisset(&p->p_rtime)) {
-                       struct timeval          now;
+                       struct timeval          now;
 
                        microuptime(&now);
-                       if (timercmp(&p->p_rtime, &now, <))
+                       if (timercmp(&p->p_rtime, &now, <)) {
                                timerclear(&aitv.it_value);
-                       else {
+                       else {
                                aitv.it_value = p->p_rtime;
                                timevalsub(&aitv.it_value, &now);
                        }
-               }
-               else
+               } else {
                        timerclear(&aitv.it_value);
+               }
+               break;
+
+       case ITIMER_VIRTUAL:
+               aitv = p->p_vtimer_user;
+               break;
+
+       case ITIMER_PROF:
+               aitv = p->p_vtimer_prof;
+               break;
        }
-       else
-               aitv = p->p_stats->p_timer[uap->which];
 
-       return (copyout((caddr_t)&aitv,
-                                               (caddr_t)uap->itv, sizeof (struct itimerval)));
+       proc_spinunlock(p);
+
+       if (IS_64BIT_PROCESS(p)) {
+               struct user64_itimerval user_itv;
+               bzero(&user_itv, sizeof(user_itv));
+               user_itv.it_interval.tv_sec = aitv.it_interval.tv_sec;
+               user_itv.it_interval.tv_usec = aitv.it_interval.tv_usec;
+               user_itv.it_value.tv_sec = aitv.it_value.tv_sec;
+               user_itv.it_value.tv_usec = aitv.it_value.tv_usec;
+               return copyout((caddr_t)&user_itv, uap->itv, sizeof(user_itv));
+       } else {
+               struct user32_itimerval user_itv;
+               bzero(&user_itv, sizeof(user_itv));
+               user_itv.it_interval.tv_sec = (user32_time_t)aitv.it_interval.tv_sec;
+               user_itv.it_interval.tv_usec = aitv.it_interval.tv_usec;
+               user_itv.it_value.tv_sec = (user32_time_t)aitv.it_value.tv_sec;
+               user_itv.it_value.tv_usec = aitv.it_value.tv_usec;
+               return copyout((caddr_t)&user_itv, uap->itv, sizeof(user_itv));
+       }
 }
 
-struct setitimer_args {
-       u_int   which;
-       struct  itimerval *itv;
-       struct  itimerval *oitv;
-};
+/*
+ * Returns:    0                       Success
+ *             EINVAL                  Invalid argument
+ *     copyin:EFAULT                   Bad address
+ *     getitimer:EINVAL                Invalid argument
+ *     getitimer:EFAULT                Bad address
+ */
 /* ARGSUSED */
 int
-setitimer(p, uap, retval)
-       struct proc *p;
-       register struct setitimer_args *uap;
-       register_t *retval;
+setitimer(struct proc *p, struct setitimer_args *uap, int32_t *retval)
 {
        struct itimerval aitv;
-       register struct itimerval *itvp;
+       user_addr_t itvp;
        int error;
 
-       if (uap->which > ITIMER_PROF)
-               return (EINVAL);
-       if ((itvp = uap->itv) &&
-               (error = copyin((caddr_t)itvp,
-                                                       (caddr_t)&aitv, sizeof (struct itimerval))))
-               return (error);
-       if ((uap->itv = uap->oitv) && (error = getitimer(p, uap, retval)))
-               return (error);
-       if (itvp == 0)
-               return (0);
-       if (itimerfix(&aitv.it_value) || itimerfix(&aitv.it_interval))
-               return (EINVAL);
-       if (uap->which == ITIMER_REAL) {
-               thread_call_func_cancel(realitexpire, (void *)p->p_pid, FALSE);
+       bzero(&aitv, sizeof(aitv));
+
+       if (uap->which > ITIMER_PROF) {
+               return EINVAL;
+       }
+       if ((itvp = uap->itv)) {
+               if (IS_64BIT_PROCESS(p)) {
+                       struct user64_itimerval user_itv;
+                       if ((error = copyin(itvp, (caddr_t)&user_itv, sizeof(user_itv)))) {
+                               return error;
+                       }
+                       aitv.it_interval.tv_sec = (__darwin_time_t)user_itv.it_interval.tv_sec;
+                       aitv.it_interval.tv_usec = user_itv.it_interval.tv_usec;
+                       aitv.it_value.tv_sec = (__darwin_time_t)user_itv.it_value.tv_sec;
+                       aitv.it_value.tv_usec = user_itv.it_value.tv_usec;
+               } else {
+                       struct user32_itimerval user_itv;
+                       if ((error = copyin(itvp, (caddr_t)&user_itv, sizeof(user_itv)))) {
+                               return error;
+                       }
+                       aitv.it_interval.tv_sec = user_itv.it_interval.tv_sec;
+                       aitv.it_interval.tv_usec = user_itv.it_interval.tv_usec;
+                       aitv.it_value.tv_sec = user_itv.it_value.tv_sec;
+                       aitv.it_value.tv_usec = user_itv.it_value.tv_usec;
+               }
+       }
+       if ((uap->itv = uap->oitv) && (error = getitimer(p, (struct getitimer_args *)uap, retval))) {
+               return error;
+       }
+       if (itvp == 0) {
+               return 0;
+       }
+       if (itimerfix(&aitv.it_value) || itimerfix(&aitv.it_interval)) {
+               return EINVAL;
+       }
+
+       switch (uap->which) {
+       case ITIMER_REAL:
+               proc_spinlock(p);
                if (timerisset(&aitv.it_value)) {
                        microuptime(&p->p_rtime);
                        timevaladd(&p->p_rtime, &aitv.it_value);
-                       thread_call_func_delayed(
-                                                               realitexpire, (void *)p->p_pid,
-                                                                               tvtoabstime(&p->p_rtime));
-               }
-               else
+                       p->p_realtimer = aitv;
+                       if (!thread_call_enter_delayed_with_leeway(p->p_rcall, NULL,
+                           tvtoabstime(&p->p_rtime), 0, THREAD_CALL_DELAY_USER_NORMAL)) {
+                               p->p_ractive++;
+                       }
+               } else {
                        timerclear(&p->p_rtime);
+                       p->p_realtimer = aitv;
+                       if (thread_call_cancel(p->p_rcall)) {
+                               p->p_ractive--;
+                       }
+               }
+               proc_spinunlock(p);
+
+               break;
+
+
+       case ITIMER_VIRTUAL:
+               if (timerisset(&aitv.it_value)) {
+                       task_vtimer_set(p->task, TASK_VTIMER_USER);
+               } else {
+                       task_vtimer_clear(p->task, TASK_VTIMER_USER);
+               }
+
+               proc_spinlock(p);
+               p->p_vtimer_user = aitv;
+               proc_spinunlock(p);
+               break;
 
-               p->p_realtimer = aitv;
+       case ITIMER_PROF:
+               if (timerisset(&aitv.it_value)) {
+                       task_vtimer_set(p->task, TASK_VTIMER_PROF);
+               } else {
+                       task_vtimer_clear(p->task, TASK_VTIMER_PROF);
+               }
+
+               proc_spinlock(p);
+               p->p_vtimer_prof = aitv;
+               proc_spinunlock(p);
+               break;
        }
-       else
-               p->p_stats->p_timer[uap->which] = aitv;
 
-       return (0);
+       return 0;
 }
 
 /*
@@ -391,66 +486,155 @@ setitimer(p, uap, retval)
  */
 void
 realitexpire(
-       void            *pid)
+       struct proc *p)
 {
-       register struct proc *p;
-       struct timeval  now;
-       boolean_t               funnel_state = thread_funnel_set(kernel_flock, TRUE);
+       struct proc *r;
+       struct timeval t;
+
+       r = proc_find(p->p_pid);
+
+       proc_spinlock(p);
 
-       p = pfind((pid_t)pid);
-       if (p == NULL) {
-               (void) thread_funnel_set(kernel_flock, FALSE);
+       assert(p->p_ractive > 0);
+
+       if (--p->p_ractive > 0 || r != p) {
+               /*
+                * bail, because either proc is exiting
+                * or there's another active thread call
+                */
+               proc_spinunlock(p);
+
+               if (r != NULL) {
+                       proc_rele(r);
+               }
                return;
        }
 
        if (!timerisset(&p->p_realtimer.it_interval)) {
+               /*
+                * p_realtimer was cleared while this call was pending,
+                * send one last SIGALRM, but don't re-arm
+                */
                timerclear(&p->p_rtime);
+               proc_spinunlock(p);
+
                psignal(p, SIGALRM);
+               proc_rele(p);
+               return;
+       }
+
+       proc_spinunlock(p);
+
+       /*
+        * Send the signal before re-arming the next thread call,
+        * so in case psignal blocks, we won't create yet another thread call.
+        */
+
+       psignal(p, SIGALRM);
+
+       proc_spinlock(p);
+
+       /* Should we still re-arm the next thread call? */
+       if (!timerisset(&p->p_realtimer.it_interval)) {
+               timerclear(&p->p_rtime);
+               proc_spinunlock(p);
 
-               (void) thread_funnel_set(kernel_flock, FALSE);
+               proc_rele(p);
                return;
        }
 
-       microuptime(&now);
+       microuptime(&t);
        timevaladd(&p->p_rtime, &p->p_realtimer.it_interval);
-       if (timercmp(&p->p_rtime, &now, <=)) {
-               if ((p->p_rtime.tv_sec + 2) >= now.tv_sec) {
+
+       if (timercmp(&p->p_rtime, &t, <=)) {
+               if ((p->p_rtime.tv_sec + 2) >= t.tv_sec) {
                        for (;;) {
                                timevaladd(&p->p_rtime, &p->p_realtimer.it_interval);
-                               if (timercmp(&p->p_rtime, &now, >))
+                               if (timercmp(&p->p_rtime, &t, >)) {
                                        break;
+                               }
                        }
-               }
-               else {
+               } else {
                        p->p_rtime = p->p_realtimer.it_interval;
-                       timevaladd(&p->p_rtime, &now);
+                       timevaladd(&p->p_rtime, &t);
                }
        }
 
-       thread_call_func_delayed(realitexpire, pid, tvtoabstime(&p->p_rtime));
+       assert(p->p_rcall != NULL);
 
-       psignal(p, SIGALRM);
+       if (!thread_call_enter_delayed_with_leeway(p->p_rcall, NULL, tvtoabstime(&p->p_rtime), 0,
+           THREAD_CALL_DELAY_USER_NORMAL)) {
+               p->p_ractive++;
+       }
+
+       proc_spinunlock(p);
+
+       proc_rele(p);
+}
+
+/*
+ * Called once in proc_exit to clean up after an armed or pending realitexpire
+ *
+ * This will only be called after the proc refcount is drained,
+ * so realitexpire cannot be currently holding a proc ref.
+ * i.e. it will/has gotten PROC_NULL from proc_find.
+ */
+void
+proc_free_realitimer(proc_t p)
+{
+       proc_spinlock(p);
+
+       assert(p->p_rcall != NULL);
+       assert(p->p_refcount == 0);
+
+       timerclear(&p->p_realtimer.it_interval);
+
+       if (thread_call_cancel(p->p_rcall)) {
+               assert(p->p_ractive > 0);
+               p->p_ractive--;
+       }
+
+       while (p->p_ractive > 0) {
+               proc_spinunlock(p);
+
+               delay(1);
 
-       (void) thread_funnel_set(kernel_flock, FALSE);
+               proc_spinlock(p);
+       }
+
+       thread_call_t call = p->p_rcall;
+       p->p_rcall = NULL;
+
+       proc_spinunlock(p);
+
+       thread_call_free(call);
 }
 
 /*
  * Check that a proposed value to load into the .it_value or
- * .it_interval part of an interval timer is acceptable, and
- * fix it to have at least minimal value (i.e. if it is less
- * than the resolution of the clock, round it up.)
+ * .it_interval part of an interval timer is acceptable.
  */
 int
-itimerfix(tv)
-       struct timeval *tv;
+itimerfix(
+       struct timeval *tv)
 {
-
        if (tv->tv_sec < 0 || tv->tv_sec > 100000000 ||
-           tv->tv_usec < 0 || tv->tv_usec >= 1000000)
-               return (EINVAL);
-       if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < tick)
-               tv->tv_usec = tick;
-       return (0);
+           tv->tv_usec < 0 || tv->tv_usec >= 1000000) {
+               return EINVAL;
+       }
+       return 0;
+}
+
+int
+timespec_is_valid(const struct timespec *ts)
+{
+       /* The INT32_MAX limit ensures the timespec is safe for clock_*() functions
+        * which accept 32-bit ints. */
+       if (ts->tv_sec < 0 || ts->tv_sec > INT32_MAX ||
+           ts->tv_nsec < 0 || (unsigned long long)ts->tv_nsec > NSEC_PER_SEC) {
+               return 0;
+       }
+       return 1;
 }
 
 /*
@@ -458,16 +642,16 @@ itimerfix(tv)
  * of microseconds, which must be less than a second,
  * i.e. < 1000000.  If the timer expires, then reload
  * it.  In this case, carry over (usec - old value) to
- * reducint the value reloaded into the timer so that
+ * reduce the value reloaded into the timer so that
  * the timer does not drift.  This routine assumes
  * that it is called in a context where the timers
  * on which it is operating cannot change in value.
  */
 int
-itimerdecr(itp, usec)
-       register struct itimerval *itp;
-       int usec;
+itimerdecr(proc_t p,
+    struct itimerval *itp, int usec)
 {
+       proc_spinlock(p);
 
        if (itp->it_value.tv_usec < usec) {
                if (itp->it_value.tv_sec == 0) {
@@ -480,20 +664,26 @@ itimerdecr(itp, usec)
        }
        itp->it_value.tv_usec -= usec;
        usec = 0;
-       if (timerisset(&itp->it_value))
-               return (1);
+       if (timerisset(&itp->it_value)) {
+               proc_spinunlock(p);
+               return 1;
+       }
        /* expired, exactly at end of interval */
 expire:
        if (timerisset(&itp->it_interval)) {
                itp->it_value = itp->it_interval;
-               itp->it_value.tv_usec -= usec;
-               if (itp->it_value.tv_usec < 0) {
-                       itp->it_value.tv_usec += 1000000;
-                       itp->it_value.tv_sec--;
+               if (itp->it_value.tv_sec > 0) {
+                       itp->it_value.tv_usec -= usec;
+                       if (itp->it_value.tv_usec < 0) {
+                               itp->it_value.tv_usec += 1000000;
+                               itp->it_value.tv_sec--;
+                       }
                }
-       } else
-               itp->it_value.tv_usec = 0;              /* sec is already 0 */
-       return (0);
+       } else {
+               itp->it_value.tv_usec = 0;              /* sec is already 0 */
+       }
+       proc_spinunlock(p);
+       return 0;
 }
 
 /*
@@ -508,7 +698,6 @@ timevaladd(
        struct timeval *t1,
        struct timeval *t2)
 {
-
        t1->tv_sec += t2->tv_sec;
        t1->tv_usec += t2->tv_usec;
        timevalfix(t1);
@@ -518,7 +707,6 @@ timevalsub(
        struct timeval *t1,
        struct timeval *t2)
 {
-
        t1->tv_sec -= t2->tv_sec;
        t1->tv_usec -= t2->tv_usec;
        timevalfix(t1);
@@ -527,7 +715,6 @@ void
 timevalfix(
        struct timeval *t1)
 {
-
        if (t1->tv_usec < 0) {
                t1->tv_sec--;
                t1->tv_usec += 1000000;
@@ -538,28 +725,64 @@ timevalfix(
        }
 }
 
+static boolean_t
+timeval_fixusec(
+       struct timeval *t1)
+{
+       assert(t1->tv_usec >= 0);
+       assert(t1->tv_sec >= 0);
+
+       if (t1->tv_usec >= 1000000) {
+               if (os_add_overflow(t1->tv_sec, t1->tv_usec / 1000000, &t1->tv_sec)) {
+                       return FALSE;
+               }
+               t1->tv_usec = t1->tv_usec % 1000000;
+       }
+
+       return TRUE;
+}
+
 /*
  * Return the best possible estimate of the time in the timeval
  * to which tvp points.
  */
 void
 microtime(
-       struct timeval  *tvp)
+       struct timeval  *tvp)
+{
+       clock_sec_t             tv_sec;
+       clock_usec_t    tv_usec;
+
+       clock_get_calendar_microtime(&tv_sec, &tv_usec);
+
+       tvp->tv_sec = tv_sec;
+       tvp->tv_usec = tv_usec;
+}
+
+void
+microtime_with_abstime(
+       struct timeval  *tvp, uint64_t *abstime)
 {
-       mach_timespec_t         now = clock_get_calendar_value();
+       clock_sec_t             tv_sec;
+       clock_usec_t    tv_usec;
 
-       tvp->tv_sec = now.tv_sec;
-       tvp->tv_usec = now.tv_nsec / NSEC_PER_USEC;
+       clock_get_calendar_absolute_and_microtime(&tv_sec, &tv_usec, abstime);
+
+       tvp->tv_sec = tv_sec;
+       tvp->tv_usec = tv_usec;
 }
 
 void
 microuptime(
-       struct timeval  *tvp)
+       struct timeval  *tvp)
 {
-       mach_timespec_t         now = clock_get_system_value();
+       clock_sec_t             tv_sec;
+       clock_usec_t    tv_usec;
+
+       clock_get_system_microtime(&tv_sec, &tv_usec);
 
-       tvp->tv_sec = now.tv_sec;
-       tvp->tv_usec = now.tv_nsec / NSEC_PER_USEC;
+       tvp->tv_sec = tv_sec;
+       tvp->tv_usec = tv_usec;
 }
 
 /*
@@ -569,41 +792,135 @@ void
 nanotime(
        struct timespec *tsp)
 {
-       mach_timespec_t         now = clock_get_calendar_value();
+       clock_sec_t             tv_sec;
+       clock_nsec_t    tv_nsec;
 
-       tsp->tv_sec = now.tv_sec;
-       tsp->tv_nsec = now.tv_nsec;
+       clock_get_calendar_nanotime(&tv_sec, &tv_nsec);
+
+       tsp->tv_sec = tv_sec;
+       tsp->tv_nsec = tv_nsec;
 }
 
 void
 nanouptime(
        struct timespec *tsp)
 {
-       mach_timespec_t         now = clock_get_system_value();
+       clock_sec_t             tv_sec;
+       clock_nsec_t    tv_nsec;
+
+       clock_get_system_nanotime(&tv_sec, &tv_nsec);
 
-       tsp->tv_sec = now.tv_sec;
-       tsp->tv_nsec = now.tv_nsec;
+       tsp->tv_sec = tv_sec;
+       tsp->tv_nsec = tv_nsec;
 }
 
 uint64_t
 tvtoabstime(
-       struct timeval  *tvp)
+       struct timeval  *tvp)
 {
-       uint64_t        result, usresult;
+       uint64_t        result, usresult;
 
        clock_interval_to_absolutetime_interval(
-                                               tvp->tv_sec, NSEC_PER_SEC, &result);
+               (uint32_t)tvp->tv_sec, NSEC_PER_SEC, &result);
        clock_interval_to_absolutetime_interval(
-                                               tvp->tv_usec, NSEC_PER_USEC, &usresult);
+               tvp->tv_usec, NSEC_PER_USEC, &usresult);
 
-       return (result + usresult);
+       return result + usresult;
 }
-void
-time_zone_slock_init(void)
+
+uint64_t
+tstoabstime(struct timespec *ts)
+{
+       uint64_t abstime_s, abstime_ns;
+       clock_interval_to_absolutetime_interval((uint32_t)ts->tv_sec, NSEC_PER_SEC, &abstime_s);
+       clock_interval_to_absolutetime_interval((uint32_t)ts->tv_nsec, 1, &abstime_ns);
+       return abstime_s + abstime_ns;
+}
+
+#if NETWORKING
+/*
+ * ratecheck(): simple time-based rate-limit checking.
+ */
+int
+ratecheck(struct timeval *lasttime, const struct timeval *mininterval)
 {
-       extern simple_lock_data_t tz_slock;
+       struct timeval tv, delta;
+       int rv = 0;
 
-       simple_lock_init(&tz_slock);
+       net_uptime2timeval(&tv);
+       delta = tv;
+       timevalsub(&delta, lasttime);
 
+       /*
+        * check for 0,0 is so that the message will be seen at least once,
+        * even if interval is huge.
+        */
+       if (timevalcmp(&delta, mininterval, >=) ||
+           (lasttime->tv_sec == 0 && lasttime->tv_usec == 0)) {
+               *lasttime = tv;
+               rv = 1;
+       }
+
+       return rv;
+}
+
+/*
+ * ppsratecheck(): packets (or events) per second limitation.
+ */
+int
+ppsratecheck(struct timeval *lasttime, int *curpps, int maxpps)
+{
+       struct timeval tv, delta;
+       int rv;
+
+       net_uptime2timeval(&tv);
+
+       timersub(&tv, lasttime, &delta);
+
+       /*
+        * Check for 0,0 so that the message will be seen at least once.
+        * If more than one second has passed since the last update of
+        * lasttime, reset the counter.
+        *
+        * we do increment *curpps even in *curpps < maxpps case, as some may
+        * try to use *curpps for stat purposes as well.
+        */
+       if ((lasttime->tv_sec == 0 && lasttime->tv_usec == 0) ||
+           delta.tv_sec >= 1) {
+               *lasttime = tv;
+               *curpps = 0;
+               rv = 1;
+       } else if (maxpps < 0) {
+               rv = 1;
+       } else if (*curpps < maxpps) {
+               rv = 1;
+       } else {
+               rv = 0;
+       }
 
+#if 1 /* DIAGNOSTIC? */
+       /* be careful about wrap-around */
+       if (*curpps + 1 > 0) {
+               *curpps = *curpps + 1;
+       }
+#else
+       /*
+        * assume that there's not too many calls to this function.
+        * not sure if the assumption holds, as it depends on *caller's*
+        * behavior, not the behavior of this function.
+        * IMHO it is wrong to make assumption on the caller's behavior,
+        * so the above #if is #if 1, not #ifdef DIAGNOSTIC.
+        */
+       *curpps = *curpps + 1;
+#endif
+
+       return rv;
+}
+#endif /* NETWORKING */
+
+int
+__mach_bridge_remote_time(__unused struct proc *p, struct __mach_bridge_remote_time_args *mbrt_args, uint64_t *retval)
+{
+       *retval = mach_bridge_remote_time(mbrt_args->local_timestamp);
+       return 0;
 }