#include <sys/quota.h>
#include <sys/dirent.h>
#include <sys/event.h>
+#include <kern/thread_call.h>
#include <kern/locks.h>
/* Resize variables: */
u_int32_t hfs_resize_filesmoved;
u_int32_t hfs_resize_totalfiles;
+
+ /*
+ * About the sync counters:
+ * hfs_sync_scheduled keeps track whether a timer was scheduled but we
+ * haven't started processing the callback (i.e. we
+ * haven't begun the flush). This will be non-zero
+ * even if the callback has been invoked, before we
+ * start the flush.
+ * hfs_sync_incomplete keeps track of the number of callbacks that have
+ * not completed yet (including callbacks not yet
+ * invoked). We cannot safely unmount until this
+ * drops to zero.
+ *
+ * In both cases, we use counters, not flags, so that we can avoid
+ * taking locks.
+ */
+ int32_t hfs_sync_scheduled;
+ int32_t hfs_sync_incomplete;
+ u_int64_t hfs_last_sync_request_time;
+ u_int64_t hfs_last_sync_time;
+ uint32_t hfs_active_threads;
+ thread_call_t hfs_syncer; // removeable devices get sync'ed by this guy
+
} hfsmount_t;
+#define HFS_META_DELAY (100)
+#define HFS_MILLISEC_SCALE (1000*1000)
+
typedef hfsmount_t ExtendedVCB;
/* Aliases for legacy (Mac OS 9) field names */
extern int hfs_start_transaction(struct hfsmount *hfsmp);
extern int hfs_end_transaction(struct hfsmount *hfsmp);
+extern void hfs_sync_ejectable(struct hfsmount *hfsmp);
/*****************************************************************************
}
case HFS_GET_MOUNT_TIME:
- return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time));
- break;
+ if (is64bit) {
+ *(user_time_t *)(ap->a_data) = (user_time_t) hfsmp->hfs_mount_time;
+ } else {
+ *(time_t *)(ap->a_data) = (time_t) hfsmp->hfs_mount_time;
+ }
+ return 0;
case HFS_GET_LAST_MTIME:
- return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime));
- break;
+ if (is64bit) {
+ *(user_time_t *)(ap->a_data) = (user_time_t) hfsmp->hfs_last_mounted_mtime;
+ } else {
+ *(time_t *)(ap->a_data) = (time_t) hfsmp->hfs_last_mounted_mtime;
+ }
+ return 0;
case HFS_SET_BOOT_INFO:
if (!vnode_isvroot(vp))
return (0);
}
+int hfs_last_io_wait_time = 125000;
+SYSCTL_INT (_kern, OID_AUTO, hfs_last_io_wait_time, CTLFLAG_RW, &hfs_last_io_wait_time, 0, "number of usecs to wait after an i/o before syncing ejectable media");
+
+static void
+hfs_syncer(void *arg0, void *unused)
+{
+#pragma unused(unused)
+
+ struct hfsmount *hfsmp = arg0;
+ uint32_t secs, usecs, delay = HFS_META_DELAY;
+ uint64_t now;
+ struct timeval nowtv, last_io;
+
+ clock_get_calendar_microtime(&secs, &usecs);
+ now = ((uint64_t)secs * 1000000LL) + usecs;
+ //
+ // If we have put off the last sync for more than
+ // 5 seconds, force it so that we don't let too
+ // much i/o queue up (since flushing the journal
+ // causes the i/o queue to drain)
+ //
+ if ((now - hfsmp->hfs_last_sync_time) >= 5000000LL) {
+ goto doit;
+ }
+
+ //
+ // Find out when the last i/o was done to this device (read or write).
+ //
+ throttle_info_get_last_io_time(hfsmp->hfs_mp, &last_io);
+ microuptime(&nowtv);
+ timevalsub(&nowtv, &last_io);
+
+ //
+ // If the last i/o was too recent, defer this sync until later.
+ // The limit chosen (125 milli-seconds) was picked based on
+ // some experiments copying data to an SD card and seems to
+ // prevent us from issuing too many syncs.
+ //
+ if (nowtv.tv_sec >= 0 && nowtv.tv_usec > 0 && nowtv.tv_usec < hfs_last_io_wait_time) {
+ delay /= 2;
+ goto resched;
+ }
+
+ //
+ // If there's pending i/o, also skip the sync.
+ //
+ if (hfsmp->hfs_devvp && hfsmp->hfs_devvp->v_numoutput > 0) {
+ goto resched;
+ }
+
+
+ //
+ // Only flush the journal if we have not sync'ed recently
+ // and the last sync request time was more than 100 milli
+ // seconds ago and there is no one in the middle of a
+ // transaction right now. Else we defer the sync and
+ // reschedule it for later.
+ //
+ if ( ((now - hfsmp->hfs_last_sync_time) >= 100000LL)
+ && ((now - hfsmp->hfs_last_sync_request_time) >= 100000LL)
+ && (hfsmp->hfs_active_threads == 0)
+ && (hfsmp->hfs_global_lock_nesting == 0)) {
+
+ doit:
+ OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
+ if (hfsmp->jnl) {
+ journal_flush(hfsmp->jnl);
+ }
+ OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
+
+ clock_get_calendar_microtime(&secs, &usecs);
+ hfsmp->hfs_last_sync_time = ((int64_t)secs * 1000000) + usecs;
+
+ } else if (hfsmp->hfs_active_threads == 0) {
+ uint64_t deadline;
+
+ resched:
+ clock_interval_to_deadline(delay, HFS_MILLISEC_SCALE, &deadline);
+ thread_call_enter_delayed(hfsmp->hfs_syncer, deadline);
+ return;
+ }
+
+ //
+ // NOTE: we decrement these *after* we're done the journal_flush() since
+ // it can take a significant amount of time and so we don't want more
+ // callbacks scheduled until we're done this one.
+ //
+ OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
+ OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
+ wakeup((caddr_t)&hfsmp->hfs_sync_incomplete);
+}
+
+extern int IOBSDIsMediaEjectable( const char *cdev_name );
/*
* Common code for mount and mountroot
u_int32_t iswritable;
daddr64_t mdb_offset;
int isvirtual = 0;
+ int isroot = 0;
ronly = vfs_isrdonly(mp);
dev = vnode_specrdev(devvp);
cred = p ? vfs_context_ucred(context) : NOCRED;
mntwrapper = 0;
+ if (args == NULL) {
+ /* only hfs_mountroot passes us NULL as the 'args' argument */
+ isroot = 1;
+ }
+
bp = NULL;
hfsmp = NULL;
mdbp = NULL;
}
}
+ /* ejectability checks will time out when the device is root_device, so skip them */
+ if (isroot == 0) {
+ if ((hfsmp->hfs_flags & HFS_VIRTUAL_DEVICE) == 0 &&
+ IOBSDIsMediaEjectable(mp->mnt_vfsstat.f_mntfromname)) {
+ hfsmp->hfs_syncer = thread_call_allocate(hfs_syncer, hfsmp);
+ if (hfsmp->hfs_syncer == NULL) {
+ printf("hfs: failed to allocate syncer thread callback for %s (%s)\n",
+ mp->mnt_vfsstat.f_mntfromname, mp->mnt_vfsstat.f_mntonname);
+ }
+ }
+ }
+
/*
* Start looking for free space to drop below this level and generate a
* warning immediately if needed:
if (hfsmp->hfs_flags & HFS_METADATA_ZONE)
(void) hfs_recording_suspend(hfsmp);
+ /*
+ * Cancel any pending timers for this volume. Then wait for any timers
+ * which have fired, but whose callbacks have not yet completed.
+ */
+ if (hfsmp->hfs_syncer)
+ {
+ struct timespec ts = {0, 100000000}; /* 0.1 seconds */
+
+ /*
+ * Cancel any timers that have been scheduled, but have not
+ * fired yet. NOTE: The kernel considers a timer complete as
+ * soon as it starts your callback, so the kernel does not
+ * keep track of the number of callbacks in progress.
+ */
+ if (thread_call_cancel(hfsmp->hfs_syncer))
+ OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
+ thread_call_free(hfsmp->hfs_syncer);
+ hfsmp->hfs_syncer = NULL;
+
+ /*
+ * This waits for all of the callbacks that were entered before
+ * we did thread_call_cancel above, but have not completed yet.
+ */
+ while(hfsmp->hfs_sync_incomplete > 0)
+ {
+ msleep((caddr_t)&hfsmp->hfs_sync_incomplete, NULL, PWAIT, "hfs_unmount", &ts);
+ }
+
+ if (hfsmp->hfs_sync_incomplete < 0)
+ printf("hfs_unmount: pm_sync_incomplete underflow (%d)!\n", hfsmp->hfs_sync_incomplete);
+ }
+
/*
* Flush out the b-trees, volume bitmap and Volume Header
*/
journal_flush(hfsmp->jnl);
}
+ {
+ uint32_t secs, usecs;
+ uint64_t now;
+
+ clock_get_calendar_microtime(&secs, &usecs);
+ now = ((uint64_t)secs * 1000000LL) + usecs;
+ hfsmp->hfs_last_sync_time = now;
+ }
+
lck_rw_unlock_shared(&hfsmp->hfs_insync);
return (allerror);
}
}
+
+//
+// Fire off a timed callback to sync the disk if the
+// volume is on ejectable media.
+//
+ __private_extern__
+void
+hfs_sync_ejectable(struct hfsmount *hfsmp)
+{
+ if (hfsmp->hfs_syncer) {
+ uint32_t secs, usecs;
+ uint64_t now;
+
+ clock_get_calendar_microtime(&secs, &usecs);
+ now = ((uint64_t)secs * 1000000) + usecs;
+
+ if (hfsmp->hfs_sync_scheduled == 0) {
+ uint64_t deadline;
+
+ hfsmp->hfs_last_sync_request_time = now;
+
+ clock_interval_to_deadline(HFS_META_DELAY, HFS_MILLISEC_SCALE, &deadline);
+
+ /*
+ * Increment hfs_sync_scheduled on the assumption that we're the
+ * first thread to schedule the timer. If some other thread beat
+ * us, then we'll decrement it. If we *were* the first to
+ * schedule the timer, then we need to keep track that the
+ * callback is waiting to complete.
+ */
+ OSIncrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
+ if (thread_call_enter_delayed(hfsmp->hfs_syncer, deadline))
+ OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
+ else
+ OSIncrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
+ }
+ }
+}
+
+
__private_extern__
int
hfs_start_transaction(struct hfsmount *hfsmp)
if (hfsmp->jnl == NULL || journal_owner(hfsmp->jnl) != thread) {
lck_rw_lock_shared(&hfsmp->hfs_global_lock);
+ OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
unlock_on_err = 1;
}
out:
if (ret != 0 && unlock_on_err) {
lck_rw_unlock_shared(&hfsmp->hfs_global_lock);
+ OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
}
return ret;
}
if (need_unlock) {
+ OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
lck_rw_unlock_shared(&hfsmp->hfs_global_lock);
+ hfs_sync_ejectable(hfsmp);
}
return ret;
}
hfs_unlock(cp);
+
+ if (ap->a_fflag & FWASWRITTEN) {
+ hfs_sync_ejectable(hfsmp);
+ }
+
return (0);
}
skip_rm:
/*
* All done with tvp and fvp
+ *
+ * We also jump to this point if there was no destination observed during lookup and namei.
+ * However, because only iocounts are held at the VFS layer, there is nothing preventing a
+ * competing thread from racing us and creating a file or dir at the destination of this rename
+ * operation. If this occurs, it may cause us to get a spurious EEXIST out of the cat_rename
+ * call below. To preserve rename's atomicity, we need to signal VFS to re-drive the
+ * namei/lookup and restart the rename operation. EEXIST is an allowable errno to be bubbled
+ * out of the rename syscall, but not for this reason, since it is a synonym errno for ENOTEMPTY.
+ * To signal VFS, we return ERECYCLE (which is also used for lookup restarts). This errno
+ * will be swallowed and it will restart the operation.
*/
lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
hfs_systemfile_unlock(hfsmp, lockflags);
if (error) {
+ if (error == EEXIST) {
+ error = ERECYCLE;
+ }
goto out;
}
#include <sys/ubc_internal.h>
+#include <hfs/hfs.h> /* <rdar://7042269 manifest constants */
+
struct psemnode;
struct pshmnode;
int devBlockSize = 0;
unsigned int fflag;
user_addr_t argp;
+ boolean_t is64bit;
AUDIT_ARG(fd, uap->fd);
AUDIT_ARG(cmd, uap->cmd);
}
context.vc_thread = current_thread();
context.vc_ucred = fp->f_cred;
- if (proc_is64bit(p)) {
+
+ is64bit = proc_is64bit(p);
+ if (is64bit) {
argp = uap->arg;
}
else {
}
default:
- if (uap->cmd < FCNTL_FS_SPECIFIC_BASE) {
- error = EINVAL;
+ /*
+ * This is an fcntl() that we d not recognize at this level;
+ * if this is a vnode, we send it down into the VNOP_IOCTL
+ * for this vnode; this can include special devices, and will
+ * effectively overload fcntl() to send ioctl()'s.
+ */
+ if((uap->cmd & IOC_VOID) && (uap->cmd & IOC_INOUT)){
+ error = EINVAL;
goto out;
}
-
- // if it's a fs-specific fcntl() then just pass it through
-
+
if (fp->f_type != DTYPE_VNODE) {
error = EBADF;
goto out;
proc_fdunlock(p);
if ( (error = vnode_getwithref(vp)) == 0 ) {
- error = VNOP_IOCTL(vp, uap->cmd, CAST_DOWN(caddr_t, argp), 0, &context);
+#define STK_PARAMS 128
+ char stkbuf[STK_PARAMS];
+ unsigned int size;
+ caddr_t data, memp;
+ int fix_cmd = uap->cmd;
+
+ /*
+ * For this to work properly, we have to copy in the
+ * ioctl() cmd argument if there is one; we must also
+ * check that a command parameter, if present, does
+ * not exceed the maximum command length dictated by
+ * the number of bits we have available in the command
+ * to represent a structure length. Finally, we have
+ * to copy the results back out, if it is that type of
+ * ioctl().
+ */
+ size = IOCPARM_LEN(uap->cmd);
+ if (size > IOCPARM_MAX) {
+ (void)vnode_put(vp);
+ error = EINVAL;
+ break;
+ }
+
+ /*
+ * <rdar://7042269> fix up the command we should have
+ * received via fcntl with one with a valid size and
+ * copy out argument.
+ */
+ if (fix_cmd == HFS_GET_MOUNT_TIME ||
+ fix_cmd == HFS_GET_LAST_MTIME) {
+ if (is64bit)
+ size = sizeof(user_time_t);
+ else
+ size = sizeof(time_t);
+ fix_cmd |= IOC_OUT;
+ }
+
+ memp = NULL;
+ if (size > sizeof (stkbuf)) {
+ if ((memp = (caddr_t)kalloc(size)) == 0) {
+ (void)vnode_put(vp);
+ error = ENOMEM;
+ }
+ data = memp;
+ } else {
+ data = &stkbuf[0];
+ }
+
+ if (fix_cmd & IOC_IN) {
+ if (size) {
+ /* structure */
+ error = copyin(argp, data, size);
+ if (error) {
+ (void)vnode_put(vp);
+ if (memp)
+ kfree(memp, size);
+ goto outdrop;
+ }
+ } else {
+ /* int */
+ if (is64bit) {
+ *(user_addr_t *)data = argp;
+ } else {
+ *(uint32_t *)data = (uint32_t)argp;
+ }
+ };
+ } else if ((fix_cmd & IOC_OUT) && size) {
+ /*
+ * Zero the buffer so the user always
+ * gets back something deterministic.
+ */
+ bzero(data, size);
+ } else if (fix_cmd & IOC_VOID) {
+ if (is64bit) {
+ *(user_addr_t *)data = argp;
+ } else {
+ *(uint32_t *)data = (uint32_t)argp;
+ }
+ }
+
+ /*
+ * <rdar://7042269> We pass the unmodified uap->cmd
+ * to the underlying VNOP so that we don't confuse it;
+ * but we are going to handle its copyout() when it
+ * gets back.
+ */
+ error = VNOP_IOCTL(vp, uap->cmd, CAST_DOWN(caddr_t, data), 0, &context);
(void)vnode_put(vp);
+
+ /* Copy any output data to user */
+ if (error == 0 && (fix_cmd & IOC_OUT) && size)
+ error = copyout(data, argp, size);
+ if (memp)
+ kfree(memp, size);
}
break;
-
}
outdrop:
fg->fg_lflags |= FG_TERM;
lck_mtx_unlock(&fg->fg_lock);
- proc_fdunlock(p);
+ if (p)
+ proc_fdunlock(p);
error = closef_finish(fp, fg, p, &context);
- proc_fdlock(p);
+
+ if (p)
+ proc_fdlock(p);
return(error);
}
static void unp_disconnect(struct unpcb *);
static void unp_shutdown(struct unpcb *);
static void unp_drop(struct unpcb *, int);
-static void unp_gc(void);
+__private_extern__ void unp_gc(void);
static void unp_scan(struct mbuf *, void (*)(struct fileglob *));
static void unp_mark(struct fileglob *);
static void unp_discard(struct fileglob *);
* gets them (resulting in a "panic: closef: count < 0").
*/
sorflush(unp->unp_socket);
+
+ /* Per domain mutex deadlock avoidance */
+ socket_unlock(unp->unp_socket, 0);
unp_gc();
+ socket_lock(unp->unp_socket, 0);
}
if (unp->unp_addr)
FREE(unp->unp_addr, M_SONAME);
}
static int unp_defer, unp_gcing, unp_gcwait;
+static thread_t unp_gcthread = NULL;
/* always called under uipc_lock */
void
unp_gc_wait(void)
{
+ if (unp_gcthread == current_thread())
+ return;
+
while (unp_gcing != 0) {
unp_gcwait = 1;
msleep(&unp_gcing, uipc_lock, 0 , "unp_gc_wait", NULL);
}
-static void
+__private_extern__ void
unp_gc(void)
{
struct fileglob *fg, *nextfg;
struct socket *so;
- struct fileglob **extra_ref, **fpp;
+ static struct fileglob **extra_ref;
+ struct fileglob **fpp;
int nunref, i;
int need_gcwakeup = 0;
}
unp_gcing = 1;
unp_defer = 0;
+ unp_gcthread = current_thread();
lck_mtx_unlock(uipc_lock);
/*
* before going through all this, set all FDs to
* to see if we hold any file descriptors in its
* message buffers. Follow those links and mark them
* as accessible too.
+ *
+ * In case a file is passed onto itself we need to
+ * release the file lock.
*/
- unp_scan(so->so_rcv.sb_mb, unp_mark);
lck_mtx_unlock(&fg->fg_lock);
+
+ unp_scan(so->so_rcv.sb_mb, unp_mark);
}
} while (unp_defer);
/*
tfg = *fpp;
if (tfg->fg_type == DTYPE_SOCKET && tfg->fg_data != NULL) {
- int locked = 0;
-
so = (struct socket *)(tfg->fg_data);
- /* XXXX */
- /* Assume local sockets use a global lock */
- if (so->so_proto->pr_domain->dom_family != PF_LOCAL) {
- socket_lock(so, 0);
- locked = 1;
- }
+ socket_lock(so, 0);
+
sorflush(so);
- if (locked)
- socket_unlock(so, 0);
+ socket_unlock(so, 0);
}
}
for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
lck_mtx_lock(uipc_lock);
unp_gcing = 0;
+ unp_gcthread = NULL;
if (unp_gcwait != 0) {
unp_gcwait = 0;
struct _throttle_io_info_t {
struct timeval last_normal_IO_timestamp;
+ struct timeval last_IO_timestamp;
SInt32 numthreads_throttling;
};
SYSCTL_INT(_debug, OID_AUTO, lowpri_max_window_msecs, CTLFLAG_RW, &lowpri_max_window_msecs, LOWPRI_INITIAL_WINDOW_MSECS, "");
SYSCTL_INT(_debug, OID_AUTO, lowpri_max_waiting_msecs, CTLFLAG_RW, &lowpri_max_waiting_msecs, LOWPRI_INITIAL_WINDOW_MSECS, "");
+void
+throttle_info_get_last_io_time(mount_t mp, struct timeval *tv)
+{
+ size_t devbsdunit;
+
+ devbsdunit = mp->mnt_devbsdunit;
+
+ if (devbsdunit < LOWPRI_MAX_NUM_DEV) {
+ *tv = _throttle_io_info[devbsdunit].last_IO_timestamp;
+ } else {
+ memset(tv, 0, sizeof(*tv));
+ }
+}
+
+void
+update_last_io_time(mount_t mp)
+{
+ size_t devbsdunit;
+
+ devbsdunit = mp->mnt_devbsdunit;
+
+ if (devbsdunit < LOWPRI_MAX_NUM_DEV) {
+ microuptime(&_throttle_io_info[devbsdunit].last_IO_timestamp);
+ }
+}
+
int throttle_io_will_be_throttled(int lowpri_window_msecs, size_t devbsdunit)
{
struct timeval elapsed;
}
}
}
+
+ if ((bflags & B_READ) == 0) {
+ size_t devbsdunit;
+
+ if (buf_vnode(bp)->v_mount != NULL)
+ devbsdunit = buf_vnode(bp)->v_mount->mnt_devbsdunit;
+ else
+ devbsdunit = LOWPRI_MAX_NUM_DEV - 1;
+
+ microuptime(&_throttle_io_info[devbsdunit].last_IO_timestamp);
+ }
+
(*bdevsw[major(bdev)].d_strategy)(bp);
return (0);
#include <netat/asp.h>
#include <netat/debug.h>
+__private_extern__ int atp_resp_seqno2big = 0;
+
static void atp_trans_complete(struct atp_trans *);
void atp_x_done_locked(void *);
void atp_treq_event(void *);
case ATP_CMD_TRESP:
{
register struct atp_trans *trp;
- register int seqno;
- register at_ddp_t *ddp;
+ register unsigned int seqno;
+ register at_ddp_t *ddp;
/*
* we just got a response, find the trans record
* If we can't find one then ignore the message
*/
seqno = athp->bitmap;
+ if (seqno > 7) {
+ atp_resp_seqno2big++;
+ ddp = AT_DDP_HDR(m);
+ dPrintf(D_M_ATP_LOW, (D_L_INPUT|D_L_ERROR),
+ ("atp_rput: dropping TRESP seqno too big, tid=%d,loc=%d,rem=%d.%d,seqno=%u\n",
+ UAS_VALUE_NTOH(athp->tid),
+ ddp->dst_socket, ddp->src_node, ddp->src_socket, seqno));
+ gbuf_freem(m);
+ return;
+ }
if (trp == NULL) {
ddp = AT_DDP_HDR(m);
dPrintf(D_M_ATP_LOW, (D_L_INPUT|D_L_ERROR),
- ("atp_rput: dropping TRESP, no trp,tid=%d,loc=%d,rem=%d.%d,seqno=%d\n",
+ ("atp_rput: dropping TRESP, no trp,tid=%d,loc=%d,rem=%d.%d,seqno=%u\n",
UAS_VALUE_NTOH(athp->tid),
ddp->dst_socket, ddp->src_node, ddp->src_socket, seqno));
gbuf_freem(m);
if (!(trp->tr_bitmap&atp_mask[seqno]) || trp->tr_rcv[seqno]) {
ddp = AT_DDP_HDR(m);
dPrintf(D_M_ATP_LOW, (D_L_INPUT|D_L_ERROR),
- ("atp_rput: dropping TRESP, duplicate,tid=%d,loc=%d,rem=%d.%d,seqno=%d\n",
+ ("atp_rput: dropping TRESP, duplicate,tid=%d,loc=%d,rem=%d.%d,seqno=%u\n",
UAS_VALUE_NTOH(athp->tid),
ddp->dst_socket, ddp->src_node, ddp->src_socket, seqno));
gbuf_freem(m);
at_ddp_stats_t at_ddp_stats; /* DDP statistics */
SYSCTL_STRUCT(_net_appletalk, OID_AUTO, ddpstats, CTLFLAG_RD,
&at_ddp_stats, at_ddp_stats, "AppleTalk DDP Stats");
+extern int atp_resp_seqno2big;
+SYSCTL_INT(_net_appletalk, OID_AUTO, atp_resp_seqno2big, CTLFLAG_RD,
+ &atp_resp_seqno2big, 0, "Appletalk ATP seqno too big count");
static void ioccmd_t_32_to_64( ioccmd_t *from_p, user_ioccmd_t *to_p );
static void ioccmd_t_64_to_32( user_ioccmd_t *from_p, ioccmd_t *to_p );
extern int icmp6_nodeinfo;
extern struct inpcbinfo ripcbinfo;
extern lck_mtx_t *ip6_mutex;
-extern lck_mtx_t *nd6_mutex;
+extern lck_mtx_t *nd6_mutex;
+extern lck_mtx_t *inet6_domain_mutex;
static void icmp6_errcount(struct icmp6errstat *, int, int);
static int icmp6_rip6_input(struct mbuf **, int);
icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_echo);
if (code != 0)
goto badcode;
+
+ if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) {
+ icmp6stat.icp6s_toofreq++;
+ goto freeit;
+ }
+
if ((n = m_copy(m, 0, M_COPYALL)) == NULL) {
/* Give up remote */
+ goto rate_limit_checked;
break;
}
if ((n->m_flags & M_EXT) != 0
if (maxlen >= MCLBYTES) {
/* Give up remote */
m_freem(n0);
+ goto rate_limit_checked;
break;
}
MGETHDR(n, M_DONTWAIT, n0->m_type); /* MAC-OK */
if (n == NULL) {
/* Give up remote */
m_freem(n0);
+ goto rate_limit_checked;
break;
}
M_COPY_PKTHDR(n, n0);
icmp6stat.icp6s_outhist[ICMP6_ECHO_REPLY]++;
icmp6_reflect(n, noff);
}
+ goto rate_limit_checked;
break;
case ICMP6_ECHO_REPLY:
icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mldquery);
else
icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mldreport);
+
+ if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) {
+ icmp6stat.icp6s_toofreq++;
+ goto freeit;
+ }
+
if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
/* give up local */
mld6_input(m, off);
}
mld6_input(n, off);
/* m stays. */
+ goto rate_limit_checked;
break;
case MLD6_LISTENER_DONE:
IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_nodeinfo),
return IPPROTO_DONE);
#endif
+ if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) {
+ icmp6stat.icp6s_toofreq++;
+ goto freeit;
+ }
+
n = m_copy(m, 0, M_COPYALL);
if (n)
n = ni6_input(n, off);
icmp6stat.icp6s_outhist[ICMP6_WRUREPLY]++;
icmp6_reflect(n, noff);
}
+ goto rate_limit_checked;
break;
case ICMP6_WRUREPLY:
goto badcode;
if (icmp6len < sizeof(struct nd_router_solicit))
goto badlen;
+
+ if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) {
+ icmp6stat.icp6s_toofreq++;
+ goto freeit;
+ }
+
if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
/* give up local */
nd6_rs_input(m, off, icmp6len);
}
nd6_rs_input(n, off, icmp6len);
/* m stays. */
+ goto rate_limit_checked;
break;
case ND_ROUTER_ADVERT:
goto badcode;
if (icmp6len < sizeof(struct nd_router_advert))
goto badlen;
+
+ if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) {
+ icmp6stat.icp6s_toofreq++;
+ goto freeit;
+ }
+
if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
/* give up local */
nd6_ra_input(m, off, icmp6len);
}
nd6_ra_input(n, off, icmp6len);
/* m stays. */
+ goto rate_limit_checked;
break;
case ND_NEIGHBOR_SOLICIT:
goto badcode;
if (icmp6len < sizeof(struct nd_neighbor_solicit))
goto badlen;
+
+ if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) {
+ icmp6stat.icp6s_toofreq++;
+ goto freeit;
+ }
+
if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
/* give up local */
nd6_ns_input(m, off, icmp6len);
}
nd6_ns_input(n, off, icmp6len);
/* m stays. */
+ goto rate_limit_checked;
break;
case ND_NEIGHBOR_ADVERT:
goto badcode;
if (icmp6len < sizeof(struct nd_neighbor_advert))
goto badlen;
+
+ if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) {
+ icmp6stat.icp6s_toofreq++;
+ goto freeit;
+ }
+
if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
/* give up local */
nd6_na_input(m, off, icmp6len);
}
nd6_na_input(n, off, icmp6len);
/* m stays. */
+ goto rate_limit_checked;
break;
case ND_REDIRECT:
goto badcode;
if (icmp6len < sizeof(struct nd_redirect))
goto badlen;
+
+ if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) {
+ icmp6stat.icp6s_toofreq++;
+ goto freeit;
+ }
+
if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
/* give up local */
icmp6_redirect_input(m, off);
}
icmp6_redirect_input(n, off);
/* m stays. */
+ goto rate_limit_checked;
break;
case ICMP6_ROUTER_RENUMBERING:
break;
default:
+ if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) {
+ icmp6stat.icp6s_toofreq++;
+ goto freeit;
+ }
+
nd6log((LOG_DEBUG,
"icmp6_input: unknown type %d(src=%s, dst=%s, ifid=%d)\n",
icmp6->icmp6_type, ip6_sprintf(&ip6->ip6_src),
/* deliver */
} else {
/* ICMPv6 informational: MUST not deliver */
+ goto rate_limit_checked;
break;
}
deliver:
+ if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) {
+ icmp6stat.icp6s_toofreq++;
+ goto freeit;
+ }
+
if (icmp6_notify_error(m, off, icmp6len, code)) {
/* In this case, m should've been freed. */
return(IPPROTO_DONE);
break;
}
+ if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) {
+ icmp6stat.icp6s_toofreq++;
+ goto freeit;
+ }
+rate_limit_checked:
/* deliver the packet to appropriate sockets */
icmp6_rip6_input(&m, *offp);
sdst.sin6_family = AF_INET6;
sdst.sin6_len = sizeof(struct sockaddr_in6);
bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
+
+ /*
+ * Radar 6843900
+ * Release the IPv6 domain lock because we are going to take domain_proto_mtx
+ * and could otherwise cause a deadlock with other threads taking these locks
+ * in the reverse order -- e.g. frag6_slowtimo() from pfslowtimo()
+ */
+ lck_mtx_unlock(inet6_domain_mutex);
pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&sdst);
+ lck_mtx_lock(inet6_domain_mutex);
#if IPSEC
key_sa_routechange((struct sockaddr *)&sdst);
#endif
#define IPV6CTL_MAXFRAGS 41 /* max fragments */
+#define IPV6CTL_NEIGHBORGCTHRESH 46
+#define IPV6CTL_MAXIFPREFIXES 47
+#define IPV6CTL_MAXIFDEFROUTERS 48
+#define IPV6CTL_MAXDYNROUTES 49
+
/* New entries should be added here from current IPV6CTL_MAXID value. */
/* to define items, should talk with KAME guys first, for *BSD compatibility */
-#define IPV6CTL_MAXID 42
+#define IPV6CTL_MAXID 50
#ifdef KERNEL_PRIVATE
#define CTL_IPV6PROTO_NAMES { \
* walk list every 5 sec. */
int ip6_v6only = 0; /* Mapped addresses on by default - Radar 3347718 */
+int ip6_neighborgcthresh = 2048; /* Threshold # of NDP entries for GC */
+int ip6_maxifprefixes = 16; /* Max acceptable prefixes via RA per IF */
+int ip6_maxifdefrouters = 16; /* Max acceptable def routers via RA */
+int ip6_maxdynroutes = 4096; /* Max # of routes created via redirect */
+
u_int32_t ip6_id = 0UL;
int ip6_keepfaith = 0;
time_t ip6_log_time = (time_t)0L;
&rip6stat, rip6stat, "");
SYSCTL_STRUCT(_net_inet6_ip6, OID_AUTO, mrt6stat, CTLFLAG_RD,
&mrt6stat, mrt6stat, "");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_NEIGHBORGCTHRESH,
+ neighborgcthresh, CTLFLAG_RW, &ip6_neighborgcthresh, 0, "");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXIFPREFIXES,
+ maxifprefixes, CTLFLAG_RW, &ip6_maxifprefixes, 0, "");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXIFDEFROUTERS,
+ maxifdefrouters, CTLFLAG_RW, &ip6_maxifdefrouters, 0, "");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXDYNROUTES,
+ maxdynroutes, CTLFLAG_RW, &ip6_maxdynroutes, 0, "");
/* net.inet6.icmp6 */
#define ip6_mapped_addr_on (!ip6_v6only)
extern int ip6_v6only;
+extern int ip6_neighborgcthresh; /* Threshold # of NDP entries for GC */
+extern int ip6_maxifprefixes; /* Max acceptable prefixes via RA per IF */
+extern int ip6_maxifdefrouters; /* Max acceptable def routers via RA */
+extern int ip6_maxdynroutes; /* Max # of routes created via redirect */
+
extern struct socket *ip6_mrouter; /* multicast routing daemon */
extern int ip6_sendredirects; /* send IP redirects when forwarding? */
extern int ip6_maxfragpackets; /* Maximum packets in reassembly queue */
static void nd6_slowtimo(void *ignored_arg);
+
void
nd6_init()
{
struct in6_ifaddr *ia6, *nia6;
struct in6_addrlifetime *lt6;
struct timeval timenow;
+ int count = 0;
getmicrotime(&timenow);
-
ln = llinfo_nd6.ln_next;
while (ln && ln != &llinfo_nd6) {
struct rtentry *rt;
ndi = &nd_ifinfo[ifp->if_index];
dst = (struct sockaddr_in6 *)rt_key(rt);
+ count++;
+
if (ln->ln_expire > timenow.tv_sec) {
- ln = next;
- continue;
+
+ /* Radar 6871508 Check if we have too many cache entries.
+ * In that case purge 20% of the table to make space
+ * for the new entries.
+ * This is a bit crude but keeps the deletion in timer
+ * thread only.
+ */
+
+ if ((ip6_neighborgcthresh >= 0 &&
+ nd6_inuse >= ip6_neighborgcthresh) &&
+ ((count % 5) == 0)) {
+
+ if (ln->ln_state > ND6_LLINFO_INCOMPLETE)
+ ln->ln_state = ND6_LLINFO_STALE;
+ else
+ ln->ln_state = ND6_LLINFO_PURGE;
+ ln->ln_expire = timenow.tv_sec;
+
+ /* fallthrough and call nd6_free() */
+ }
+
+ else {
+ ln = next;
+ continue;
+ }
}
/* sanity check */
break;
case ND6_LLINFO_STALE:
+ case ND6_LLINFO_PURGE:
/* Garbage Collection(RFC 2461 5.3) */
if (ln->ln_expire)
next = nd6_free(rt);
};
#endif /* KERNEL_PRIVATE */
+#define ND6_LLINFO_PURGE -3
#define ND6_LLINFO_NOSTATE -2
/*
* We don't need the WAITDELETE state any more, but we keep the definition
u_int8_t randomseed0[8]; /* upper 64 bits of MD5 digest */
u_int8_t randomseed1[8]; /* lower 64 bits (usually the EUI64 IFID) */
u_int8_t randomid[8]; /* current random ID */
+ /* keep track of routers and prefixes on this link */
+ int32_t nprefixes;
+ int32_t ndefrouters;
};
#define ND6_IFF_PERFORMNUD 0x1
struct nd_defrouter *dr, int nd6locked)
{
struct nd_defrouter *deldr = NULL;
+ struct nd_ifinfo *ndi = &nd_ifinfo[dr->ifp->if_index];
struct nd_prefix *pr;
/*
if (deldr)
defrouter_select();
+ ndi->ndefrouters--;
+ if (ndi->ndefrouters < 0) {
+ log(LOG_WARNING, "defrtrlist_del: negative count on %s\n",
+ if_name(dr->ifp));
+ }
+
if (nd6locked == 0)
lck_mtx_unlock(nd6_mutex);
struct nd_defrouter *new)
{
struct nd_defrouter *dr, *n;
+ struct nd_ifinfo *ndi = &nd_ifinfo[new->ifp->if_index];
lck_mtx_lock(nd6_mutex);
if ((dr = defrouter_lookup(&new->rtaddr, new->ifp)) != NULL) {
return(NULL);
}
+ if (ip6_maxifdefrouters >= 0 &&
+ ndi->ndefrouters >= ip6_maxifdefrouters) {
+ lck_mtx_unlock(nd6_mutex);
+ return (NULL);
+ }
+
n = (struct nd_defrouter *)_MALLOC(sizeof(*n), M_IP6NDP, M_NOWAIT);
if (n == NULL) {
lck_mtx_unlock(nd6_mutex);
TAILQ_INSERT_TAIL(&nd_defrouter, n, dr_entry);
if (TAILQ_FIRST(&nd_defrouter) == n)
defrouter_select();
+
+ ndi->ndefrouters++;
lck_mtx_unlock(nd6_mutex);
return(n);
lck_mtx_unlock(nd6_mutex);
}
+static void
+purge_detached(struct ifnet *ifp)
+{
+ struct nd_prefix *pr, *pr_next;
+ struct in6_ifaddr *ia;
+ struct ifaddr *ifa, *ifa_next;
+
+ lck_mtx_lock(nd6_mutex);
+
+ for (pr = nd_prefix.lh_first; pr; pr = pr_next) {
+ pr_next = pr->ndpr_next;
+ if (pr->ndpr_ifp != ifp ||
+ IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr) ||
+ ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 &&
+ !LIST_EMPTY(&pr->ndpr_advrtrs)))
+ continue;
+
+ for (ifa = ifp->if_addrlist.tqh_first; ifa; ifa = ifa_next) {
+ ifa_next = ifa->ifa_list.tqe_next;
+ if (ifa->ifa_addr->sa_family != AF_INET6)
+ continue;
+ ia = (struct in6_ifaddr *)ifa;
+ if ((ia->ia6_flags & IN6_IFF_AUTOCONF) ==
+ IN6_IFF_AUTOCONF && ia->ia6_ndpr == pr) {
+ in6_purgeaddr(ifa, 1);
+ }
+ }
+ if (pr->ndpr_refcnt == 0)
+ prelist_remove(pr, 1);
+ }
+
+ lck_mtx_unlock(nd6_mutex);
+}
+
int
nd6_prelist_add(
struct nd_prefix *pr,
{
struct nd_prefix *new = NULL;
int i;
+ struct nd_ifinfo *ndi = &nd_ifinfo[pr->ndpr_ifp->if_index];
+
+ if (ip6_maxifprefixes >= 0) {
+ if (ndi->nprefixes >= ip6_maxifprefixes / 2)
+ purge_detached(pr->ndpr_ifp);
+ if (ndi->nprefixes >= ip6_maxifprefixes)
+ return(ENOMEM);
+ }
new = (struct nd_prefix *)_MALLOC(sizeof(*new), M_IP6NDP, M_NOWAIT);
if (new == NULL)
if (dr) {
pfxrtr_add(new, dr);
}
+
+ ndi->nprefixes++;
+
lck_mtx_unlock(nd6_mutex);
return 0;
{
struct nd_pfxrouter *pfr, *next;
int e;
+ struct nd_ifinfo *ndi = &nd_ifinfo[pr->ndpr_ifp->if_index];
/* make sure to invalidate the prefix until it is really freed. */
pr->ndpr_vltime = 0;
FREE(pfr, M_IP6NDP);
}
+ ndi->nprefixes--;
+ if (ndi->nprefixes < 0) {
+ log(LOG_WARNING, "prelist_remove: negative count on %s\n",
+ if_name(pr->ndpr_ifp));
+ }
+
FREE(pr, M_IP6NDP);
pfxlist_onlink_check(1);
* OUT: NULL: not found.
* others: found and return the pointer.
*/
+u_int32_t sah_search_calls = 0;
+u_int32_t sah_search_count = 0;
struct secasvar *
key_allocsa_policy(saidx)
struct secasindex *saidx;
u_int16_t dstport;
lck_mtx_lock(sadb_mutex);
+ sah_search_calls++;
LIST_FOREACH(sah, &sahtree, chain) {
+ sah_search_count++;
if (sah->state == SADB_SASTATE_DEAD)
continue;
if (key_cmpsaidx(&sah->saidx, saidx, CMP_MODE | CMP_REQID))
* and do to remove or to expire.
* XXX: year 2038 problem may remain.
*/
-
+int key_timehandler_debug = 0;
+u_int32_t spd_count = 0, sah_count = 0, dead_sah_count = 0, empty_sah_count = 0, larval_sav_count = 0, mature_sav_count = 0, dying_sav_count = 0, dead_sav_count = 0;
+u_int64_t total_sav_count = 0;
void
key_timehandler(void)
{
sp != NULL;
sp = nextsp) {
+ spd_count++;
nextsp = LIST_NEXT(sp, chain);
if (sp->state == IPSEC_SPSTATE_DEAD) {
sah != NULL;
sah = nextsah) {
+ sah_count++;
nextsah = LIST_NEXT(sah, chain);
/* if sah has been dead, then delete it and process next sah. */
if (sah->state == SADB_SASTATE_DEAD) {
key_delsah(sah);
+ dead_sah_count++;
+ continue;
+ }
+
+ if (LIST_FIRST(&sah->savtree[SADB_SASTATE_LARVAL]) == NULL &&
+ LIST_FIRST(&sah->savtree[SADB_SASTATE_MATURE]) == NULL &&
+ LIST_FIRST(&sah->savtree[SADB_SASTATE_DYING]) == NULL &&
+ LIST_FIRST(&sah->savtree[SADB_SASTATE_DEAD]) == NULL) {
+ key_delsah(sah);
+ empty_sah_count++;
continue;
}
sav != NULL;
sav = nextsav) {
+ larval_sav_count++;
+ total_sav_count++;
nextsav = LIST_NEXT(sav, chain);
if (tv.tv_sec - sav->created > key_larval_lifetime) {
sav != NULL;
sav = nextsav) {
+ mature_sav_count++;
+ total_sav_count++;
nextsav = LIST_NEXT(sav, chain);
/* we don't need to check. */
sav != NULL;
sav = nextsav) {
+ dying_sav_count++;
+ total_sav_count++;
nextsav = LIST_NEXT(sav, chain);
/* we don't need to check. */
sav != NULL;
sav = nextsav) {
+ dead_sav_count++;
+ total_sav_count++;
nextsav = LIST_NEXT(sav, chain);
/* sanity check */
}
}
+ if (++key_timehandler_debug >= 300) {
+ if (key_debug_level) {
+ printf("%s: total stats for %u calls\n", __FUNCTION__, key_timehandler_debug);
+ printf("%s: walked %u SPDs\n", __FUNCTION__, spd_count);
+ printf("%s: walked %llu SAs: LARVAL SAs %u, MATURE SAs %u, DYING SAs %u, DEAD SAs %u\n", __FUNCTION__,
+ total_sav_count, larval_sav_count, mature_sav_count, dying_sav_count, dead_sav_count);
+ printf("%s: walked %u SAHs: DEAD SAHs %u, EMPTY SAHs %u\n", __FUNCTION__,
+ sah_count, dead_sah_count, empty_sah_count);
+ if (sah_search_calls) {
+ printf("%s: SAH search cost %d iters per call\n", __FUNCTION__,
+ (sah_search_count/sah_search_calls));
+ }
+ }
+ spd_count = 0;
+ sah_count = 0;
+ dead_sah_count = 0;
+ empty_sah_count = 0;
+ larval_sav_count = 0;
+ mature_sav_count = 0;
+ dying_sav_count = 0;
+ dead_sav_count = 0;
+ total_sav_count = 0;
+ sah_search_count = 0;
+ sah_search_calls = 0;
+ key_timehandler_debug = 0;
+ }
#ifndef IPSEC_NONBLOCK_ACQUIRE
/* ACQ tree */
{
void load_init_program(struct proc *p);
void __pthread_testcancel(int presyscall);
void syscall_exit_funnelcheck(void);
+void throttle_info_get_last_io_time(mount_t mp, struct timeval *tv);
+void update_last_io_time(mount_t mp);
#endif /* BSD_KERNEL_PRIVATE */
fslog_io_error(bp);
}
+ if (bp->b_vp && bp->b_vp->v_mount && (bp->b_flags & B_READ) == 0) {
+ update_last_io_time(bp->b_vp->v_mount);
+ }
+
if (kdebug_enable) {
int code = DKIO_DONE;
* - Neither the node nor the directory are immutable.
* - The user is not the superuser.
*
- * Deletion is not permitted if the directory is sticky and the caller is not owner of the
- * node or directory.
+ * Deletion is not permitted if the directory is sticky and the caller is
+ * not owner of the node or directory.
*
- * If either the node grants DELETE, or the directory grants DELETE_CHILD, the node may be
- * deleted. If neither denies the permission, and the caller has Posix write access to the
- * directory, then the node may be deleted.
+ * If either the node grants DELETE, or the directory grants DELETE_CHILD,
+ * the node may be deleted. If neither denies the permission, and the
+ * caller has Posix write access to the directory, then the node may be
+ * deleted.
+ *
+ * As an optimization, we cache whether or not delete child is permitted
+ * on directories without the sticky bit set.
*/
-static int
-vnode_authorize_delete(vauth_ctx vcp)
+int
+vnode_authorize_delete(vauth_ctx vcp, boolean_t cached_delete_child);
+/*static*/ int
+vnode_authorize_delete(vauth_ctx vcp, boolean_t cached_delete_child)
{
struct vnode_attr *vap = vcp->vap;
struct vnode_attr *dvap = vcp->dvap;
/* check the ACL on the directory */
delete_child_denied = 0;
- if (VATTR_IS_NOT(dvap, va_acl, NULL)) {
+ if (!cached_delete_child && VATTR_IS_NOT(dvap, va_acl, NULL)) {
eval.ae_requested = KAUTH_VNODE_DELETE_CHILD;
eval.ae_acl = &dvap->va_acl->acl_ace[0];
eval.ae_count = dvap->va_acl->acl_entrycount;
return(EACCES);
}
- /* enforce sticky bit behaviour */
- if ((dvap->va_mode & S_ISTXT) && !vauth_file_owner(vcp) && !vauth_dir_owner(vcp)) {
+ /*
+ * enforce sticky bit behaviour; the cached_delete_child property will
+ * be false and the dvap contents valis for sticky bit directories;
+ * this makes us check the directory each time, but it's unavoidable,
+ * as sticky bit is an exception to caching.
+ */
+ if (!cached_delete_child && (dvap->va_mode & S_ISTXT) && !vauth_file_owner(vcp) && !vauth_dir_owner(vcp)) {
KAUTH_DEBUG("%p DENIED - sticky bit rules (user %d file %d dir %d)",
vcp->vp, cred->cr_uid, vap->va_uid, dvap->va_uid);
return(EACCES);
}
/* check the directory */
- if ((error = vnode_authorize_posix(vcp, VWRITE, 1 /* on_dir */)) != 0) {
+ if (!cached_delete_child && (error = vnode_authorize_posix(vcp, VWRITE, 1 /* on_dir */)) != 0) {
KAUTH_DEBUG("%p ALLOWED - granted by posix permisssions", vcp->vp);
return(error);
}
int result;
int *errorp;
int noimmutable;
- boolean_t parent_authorized_for_delete = FALSE;
+ boolean_t parent_authorized_for_delete_child = FALSE;
boolean_t found_deny = FALSE;
boolean_t parent_ref= FALSE;
* can skip a whole bunch of work... we will still have to
* authorize that this specific child can be removed
*/
- if (vnode_cache_is_authorized(dvp, ctx, KAUTH_VNODE_DELETE) == TRUE)
- parent_authorized_for_delete = TRUE;
+ if (vnode_cache_is_authorized(dvp, ctx, KAUTH_VNODE_DELETE_CHILD) == TRUE)
+ parent_authorized_for_delete_child = TRUE;
} else {
dvp = NULL;
}
KAUTH_DEBUG("%p ERROR - failed to get vnode attributes - %d", vp, result);
goto out;
}
- if (dvp && parent_authorized_for_delete == FALSE) {
+ if (dvp && parent_authorized_for_delete_child == FALSE) {
VATTR_WANTED(&dva, va_mode);
VATTR_WANTED(&dva, va_uid);
VATTR_WANTED(&dva, va_gid);
if ((result = vnode_authorize_checkimmutable(vp, &va, rights, noimmutable)) != 0)
goto out;
if ((rights & KAUTH_VNODE_DELETE) &&
- parent_authorized_for_delete == FALSE &&
+ parent_authorized_for_delete_child == FALSE &&
((result = vnode_authorize_checkimmutable(dvp, &dva, KAUTH_VNODE_DELETE_CHILD, 0)) != 0))
goto out;
goto out;
/*
- * If we're not the superuser, authorize based on file properties.
+ * If we're not the superuser, authorize based on file properties;
+ * note that even if parent_authorized_for_delete_child is TRUE, we
+ * need to check on the node itself.
*/
if (!vfs_context_issuser(ctx)) {
/* process delete rights */
if ((rights & KAUTH_VNODE_DELETE) &&
- parent_authorized_for_delete == FALSE &&
- ((result = vnode_authorize_delete(vcp)) != 0))
+ ((result = vnode_authorize_delete(vcp, parent_authorized_for_delete_child)) != 0))
goto out;
/* process remaining rights */
vnode_cache_authorized_action(vp, ctx, KAUTH_VNODE_SEARCHBYANYONE);
}
}
- if ((rights & KAUTH_VNODE_DELETE) && parent_authorized_for_delete == FALSE) {
+ if ((rights & KAUTH_VNODE_DELETE) && parent_authorized_for_delete_child == FALSE) {
/*
- * parent was successfully and newly authorized for deletions
- * add it to the cache
+ * parent was successfully and newly authorized for content deletions
+ * add it to the cache, but only if it doesn't have the sticky
+ * bit set on it. This same check is done earlier guarding
+ * fetching of dva, and if we jumped to out without having done
+ * this, we will have returned already because of a non-zero
+ * 'result' value.
*/
- vnode_cache_authorized_action(dvp, ctx, KAUTH_VNODE_DELETE);
+ if (VATTR_IS_SUPPORTED(&dva, va_mode) &&
+ !(dva.va_mode & (S_ISVTX))) {
+ /* OK to cache delete rights */
+ vnode_cache_authorized_action(dvp, ctx, KAUTH_VNODE_DELETE_CHILD);
+ }
}
if (parent_ref)
vnode_put(vp);
/*
* We may encounter a race in the VNOP where the destination didn't
* exist when we did the namei, but it does by the time we go and
- * try to create the entry. In this case, we should re-drive this rename
- * call from the top again.
- */
- if (error == EEXIST) {
+ * try to create the entry. In this case, we should re-drive this rename
+ * call from the top again. Currently, only HFS bubbles out ERECYCLE,
+ * but other filesystem susceptible to this race could return it, too.
+ */
+ if (error == ERECYCLE) {
do_retry = 1;
}
-9.7.0
+9.8.0
# The first line of this file contains the master version number for the kernel.
# All other instances of the kernel version in xnu are derived from this file.
return KERN_SUCCESS;
}
+
+int IOBSDIsMediaEjectable( const char *cdev_name )
+{
+ int ret = 0;
+ OSDictionary *dictionary;
+ OSString *dev_name;
+
+ if (strncmp(cdev_name, "/dev/", 5) == 0) {
+ cdev_name += 5;
+ }
+
+ dictionary = IOService::serviceMatching( "IOMedia" );
+ if( dictionary ) {
+ dev_name = OSString::withCString( cdev_name );
+ if( dev_name ) {
+ IOService *service;
+ mach_timespec_t tv = { 5, 0 }; // wait up to "timeout" seconds for the device
+
+ dictionary->setObject( kIOBSDNameKey, dev_name );
+ dictionary->retain();
+ service = IOService::waitForService( dictionary, &tv );
+ if( service ) {
+ OSBoolean *ejectable = (OSBoolean *) service->getProperty( "Ejectable" );
+
+ if( ejectable ) {
+ ret = (int)ejectable->getValue();
+ }
+
+ }
+ dev_name->release();
+ }
+ dictionary->release();
+ }
+
+ return ret;
+}
+
} /* extern "C" */
/*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
acpi_hibernate_callback_data_t data;
boolean_t did_hibernate;
#endif
+ unsigned int cpu;
+ kern_return_t rc;
+ unsigned int my_cpu;
kprintf("acpi_sleep_kernel hib=%d\n",
current_cpu_datap()->cpu_hibernate);
+ /* Geta ll CPUs to be in the "off" state */
+ my_cpu = cpu_number();
+ for (cpu = 0; cpu < real_ncpus; cpu += 1) {
+ if (cpu == my_cpu)
+ continue;
+ rc = pmCPUExitHaltToOff(cpu);
+ if (rc != KERN_SUCCESS)
+ panic("Error %d trying to transition CPU %d to OFF",
+ rc, cpu);
+ }
+
/* shutdown local APIC before passing control to BIOS */
lapic_shutdown();
/*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
{
cpu_data_t *cdp = cpu_datap(cpu);
+ /*
+ * Wait until the CPU indicates that it has stopped.
+ */
simple_lock(&x86_topo_lock);
while ((cdp->lcpu.state != LCPU_HALT)
- && (cdp->lcpu.state != LCPU_OFF)) {
+ && (cdp->lcpu.state != LCPU_OFF)
+ && !cdp->lcpu.stopped) {
simple_unlock(&x86_topo_lock);
cpu_pause();
simple_lock(&x86_topo_lock);
/*
- * Copyright (c) 2004-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2009 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
return(rc);
}
+kern_return_t
+pmCPUExitHaltToOff(int cpu)
+{
+ kern_return_t rc = KERN_INVALID_ARGUMENT;
+
+ if (pmInitDone
+ && pmDispatch != NULL
+ && pmDispatch->exitHaltToOff != NULL)
+ rc = pmDispatch->exitHaltToOff(cpu_to_lcpu(cpu));
+
+ return(rc);
+}
+
/*
* Called to initialize the power management structures for the CPUs.
*/
/*
- * Copyright (c) 2006-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2006-2009 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* This value should be changed each time that pmDsipatch_t or pmCallBacks_t
* changes.
*/
-#define PM_DISPATCH_VERSION 15
+#define PM_DISPATCH_VERSION 16
/*
* Dispatch table for functions that get installed when the power
void (*pmTimerStateSave)(void);
void (*pmTimerStateRestore)(void);
kern_return_t (*exitHalt)(x86_lcpu_t *lcpu);
+ kern_return_t (*exitHaltToOff)(x86_lcpu_t *lcpu);
void (*markAllCPUsOff)(void);
void (*pmSetRunCount)(uint32_t count);
boolean_t (*pmIsCPUUnAvailable)(x86_lcpu_t *lcpu);
void pmTimerSave(void);
void pmTimerRestore(void);
kern_return_t pmCPUExitHalt(int cpu);
+kern_return_t pmCPUExitHaltToOff(int cpu);
#define PM_HALT_NORMAL 0 /* normal halt path */
#define PM_HALT_DEBUG 1 /* debug code wants to halt */