#define AUE_PIDFORTASK 43049 /* Darwin-specific. */
#define AUE_SYSCTL_NONADMIN 43050
#define AUE_COPYFILE 43051 /* Darwin-specific. */
-#define AUE_DBGPORTFORPID 43052 /* Darwin-specific. */
/*
* Events added to OpenBSM for FreeBSD and Linux; may also be used by Darwin
* in the future.
#define AUE_SETATTRLISTAT 43212 /* Darwin. */
#define AUE_FMOUNT 43213 /* Darwin. */
#define AUE_FSGETPATH_EXTENDED 43214 /* Darwin. */
+#define AUE_DBGPORTFORPID 43215 /* Darwin-specific. */
#define AUE_SESSION_START 44901 /* Darwin. */
#define AUE_SESSION_UPDATE 44902 /* Darwin. */
* Range check the count. How much data can we pass around?
* FIX ME!
*/
- if (dtmodsyms_count == 0 || (dtmodsyms_count > 100 * 1024)) {
+ if (dtmodsyms_count == 0) {
cmn_err(CE_WARN, "dtmodsyms_count is not valid");
return (EINVAL);
}
* Allocate a correctly sized structure and copyin the data.
*/
module_symbols_size = DTRACE_MODULE_SYMBOLS_SIZE(dtmodsyms_count);
+ if (module_symbols_size > (size_t)dtrace_copy_maxsize()) {
+ size_t dtmodsyms_max = DTRACE_MODULE_SYMBOLS_COUNT(dtrace_copy_maxsize());
+ cmn_err(CE_WARN, "dtmodsyms_count %ld is too high, maximum is %ld", dtmodsyms_count, dtmodsyms_max);
+ return (ENOBUFS);
+ }
+
if ((module_symbols = kmem_alloc(module_symbols_size, KM_SLEEP)) == NULL)
return (ENOMEM);
return ENOBUFS;
}
memset(bt, 0, bt_size);
- error = backtrace_user(bt, bt_len, &bt_filled, NULL, NULL);
- if (error) {
+ bt_filled = backtrace_user(bt, bt_len, &error, NULL, NULL);
+ if (error != 0) {
goto out;
}
bt_filled = min(bt_filled, bt_len);
struct sockaddr_ctl sac;
u_int32_t usecount;
u_int32_t kcb_usecount;
+ u_int32_t require_clearing_count;
#if DEVELOPMENT || DEBUG
enum ctl_status status;
#endif /* DEVELOPMENT || DEBUG */
}
/*
- * Use this function to serialize calls into the kctl subsystem
+ * Use this function and ctl_kcb_require_clearing to serialize
+ * critical calls into the kctl subsystem
*/
static void
ctl_kcb_increment_use_count(struct ctl_cb *kcb, lck_mtx_t *mutex_held)
{
LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
- while (kcb->kcb_usecount > 0) {
+ while (kcb->require_clearing_count > 0) {
+ msleep(&kcb->require_clearing_count, mutex_held, PSOCK | PCATCH, "kcb_require_clearing", NULL);
+ }
+ kcb->kcb_usecount++;
+}
+
+static void
+ctl_kcb_require_clearing(struct ctl_cb *kcb, lck_mtx_t *mutex_held)
+{
+ assert(kcb->kcb_usecount != 0);
+ kcb->require_clearing_count++;
+ kcb->kcb_usecount--;
+ while (kcb->kcb_usecount > 0) { // we need to wait until no one else is running
msleep(&kcb->kcb_usecount, mutex_held, PSOCK | PCATCH, "kcb_usecount", NULL);
}
kcb->kcb_usecount++;
}
static void
-clt_kcb_decrement_use_count(struct ctl_cb *kcb)
+ctl_kcb_done_clearing(struct ctl_cb *kcb)
+{
+ assert(kcb->require_clearing_count != 0);
+ kcb->require_clearing_count--;
+ wakeup((caddr_t)&kcb->require_clearing_count);
+}
+
+static void
+ctl_kcb_decrement_use_count(struct ctl_cb *kcb)
{
assert(kcb->kcb_usecount != 0);
kcb->kcb_usecount--;
- wakeup_one((caddr_t)&kcb->kcb_usecount);
+ wakeup((caddr_t)&kcb->kcb_usecount);
}
static int
lck_mtx_t *mtx_held = socket_getlock(so, PR_F_WILLUNLOCK);
ctl_kcb_increment_use_count(kcb, mtx_held);
+ ctl_kcb_require_clearing(kcb, mtx_held);
if (kcb->kctl != NULL && kcb->kctl->bind != NULL &&
kcb->userdata != NULL && !(so->so_state & SS_ISCONNECTED)) {
kcb->status = KCTL_DISCONNECTED;
#endif /* DEVELOPMENT || DEBUG */
so->so_flags |= SOF_PCBCLEARING;
- clt_kcb_decrement_use_count(kcb);
+ ctl_kcb_done_clearing(kcb);
+ ctl_kcb_decrement_use_count(kcb);
return 0;
}
lck_mtx_t *mtx_held = socket_getlock(so, PR_F_WILLUNLOCK);
ctl_kcb_increment_use_count(kcb, mtx_held);
+ ctl_kcb_require_clearing(kcb, mtx_held);
error = ctl_setup_kctl(so, nam, p);
if (error) {
socket_lock(so, 0);
out:
- clt_kcb_decrement_use_count(kcb);
+ ctl_kcb_done_clearing(kcb);
+ ctl_kcb_decrement_use_count(kcb);
return error;
}
lck_mtx_t *mtx_held = socket_getlock(so, PR_F_WILLUNLOCK);
ctl_kcb_increment_use_count(kcb, mtx_held);
+ ctl_kcb_require_clearing(kcb, mtx_held);
#if DEVELOPMENT || DEBUG
if (kcb->status != KCTL_DISCONNECTED && ctl_panic_debug) {
lck_mtx_unlock(ctl_mtx);
}
out:
- clt_kcb_decrement_use_count(kcb);
+ ctl_kcb_done_clearing(kcb);
+ ctl_kcb_decrement_use_count(kcb);
return error;
}
if ((kcb = (struct ctl_cb *)so->so_pcb)) {
lck_mtx_t *mtx_held = socket_getlock(so, PR_F_WILLUNLOCK);
ctl_kcb_increment_use_count(kcb, mtx_held);
+ ctl_kcb_require_clearing(kcb, mtx_held);
struct kctl *kctl = kcb->kctl;
if (kctl && kctl->disconnect) {
kctlstat.kcs_gencnt++;
lck_mtx_unlock(ctl_mtx);
socket_lock(so, 0);
- clt_kcb_decrement_use_count(kcb);
+ ctl_kcb_done_clearing(kcb);
+ ctl_kcb_decrement_use_count(kcb);
}
return 0;
}
ctl_sbrcv_trim(so);
out:
- clt_kcb_decrement_use_count(kcb);
+ ctl_kcb_decrement_use_count(kcb);
return error;
}
if (error != 0) {
OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_send_fail);
}
- clt_kcb_decrement_use_count(kcb);
+ ctl_kcb_decrement_use_count(kcb);
return error;
}
if (error != 0) {
OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_send_list_fail);
}
- clt_kcb_decrement_use_count(kcb);
+ ctl_kcb_decrement_use_count(kcb);
return error;
}
}
out:
- clt_kcb_decrement_use_count(kcb);
+ ctl_kcb_decrement_use_count(kcb);
return error;
}
#include <security/mac_framework.h>
#endif /* CONFIG_MACF */
-#if CONFIG_CSR
-#include <sys/codesign.h>
-#include <sys/csr.h>
-#endif
-
typedef struct {
int flavor; /* the number for this flavor */
mach_msg_type_number_t count; /* count of ints in this flavor */
}
#endif
-#if CONFIG_CSR
- /* If the process is restricted, CSR isn't configured to allow
- * restricted processes to be debugged, and CSR isn't configured in
- * AppleInternal mode, then don't dump core. */
- if (cs_restricted(core_proc) &&
- csr_check(CSR_ALLOW_TASK_FOR_PID) &&
- csr_check(CSR_ALLOW_APPLE_INTERNAL)) {
- error = EPERM;
- goto out2;
- }
-#endif
-
if (IS_64BIT_PROCESS(core_proc)) {
is_64 = 1;
mach_header_sz = sizeof(struct mach_header_64);
kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
- if (kr != KERN_SUCCESS) {
+ if (kr != KERN_SUCCESS || kernel_blob_size < fs.fs_blob_size) {
error = ENOMEM;
vnode_put(vp);
goto outdrop;
if (uap->cmd == F_ADDSIGS) {
error = copyin(fs.fs_blob_start,
(void *) kernel_blob_addr,
- kernel_blob_size);
+ fs.fs_blob_size);
} else { /* F_ADDFILESIGS || F_ADDFILESIGS_RETURN || F_ADDFILESIGS_FOR_DYLD_SIM */
int resid;
kern_return_t kr;
boolean_t task_has_watchport_boost = task_has_watchports(current_task());
boolean_t in_exec = (imgp->ip_flags & IMGPF_EXEC);
+ boolean_t suid_cred_specified = FALSE;
for (i = 0; i < pacts->pspa_count; i++) {
act = &pacts->pspa_actions[i];
goto done;
}
break;
+
+ case PSPA_SUID_CRED:
+ /* Only a single suid credential can be specified. */
+ if (suid_cred_specified) {
+ ret = EINVAL;
+ goto done;
+ }
+ suid_cred_specified = TRUE;
+ break;
+
default:
ret = EINVAL;
goto done;
/* hold on to this till end of spawn */
actions->registered_array[registered_i++] = port;
break;
+
+ case PSPA_SUID_CRED:
+ imgp->ip_sc_port = port;
+ break;
+
default:
ret = EINVAL;
break;
imgp->ip_cs_error = OS_REASON_NULL;
}
#endif
+ if (imgp->ip_sc_port != NULL) {
+ ipc_port_release_send(imgp->ip_sc_port);
+ imgp->ip_sc_port = NULL;
+ }
}
#if CONFIG_DTRACE
kauth_cred_getuid(cred) != imgp->ip_origvattr->va_uid) ||
((imgp->ip_origvattr->va_mode & VSGID) != 0 &&
((kauth_cred_ismember_gid(cred, imgp->ip_origvattr->va_gid, &leave_sugid_clear) || !leave_sugid_clear) ||
- (kauth_cred_getgid(cred) != imgp->ip_origvattr->va_gid)))) {
+ (kauth_cred_getgid(cred) != imgp->ip_origvattr->va_gid))) ||
+ (imgp->ip_sc_port != NULL)) {
#if CONFIG_MACF
/* label for MAC transition and neither VSUID nor VSGID */
handle_mac_transition:
* proc's ucred lock. This prevents others from accessing
* a garbage credential.
*/
+
+ if (imgp->ip_sc_port != NULL) {
+ extern int suid_cred_verify(ipc_port_t, vnode_t, uint32_t *);
+ int ret = -1;
+ uid_t uid = UINT32_MAX;
+
+ /*
+ * Check that the vnodes match. If a script is being
+ * executed check the script's vnode rather than the
+ * interpreter's.
+ */
+ struct vnode *vp = imgp->ip_scriptvp != NULL ? imgp->ip_scriptvp : imgp->ip_vp;
+
+ ret = suid_cred_verify(imgp->ip_sc_port, vp, &uid);
+ if (ret == 0) {
+ apply_kauth_cred_update(p, ^kauth_cred_t (kauth_cred_t my_cred) {
+ return kauth_cred_setresuid(my_cred,
+ KAUTH_UID_NONE,
+ uid,
+ uid,
+ KAUTH_UID_NONE);
+ });
+ } else {
+ error = EPERM;
+ }
+ }
+
if (imgp->ip_origvattr->va_mode & VSUID) {
apply_kauth_cred_update(p, ^kauth_cred_t (kauth_cred_t my_cred) {
return kauth_cred_setresuid(my_cred,
#include <miscfs/specfs/specdev.h>
+#include <nfs/nfs_conf.h>
#include <nfs/rpcv2.h>
#include <nfs/nfsproto.h>
#include <nfs/nfsnode.h>
"iov32", /* 19 M_IOV32 */
"mount", /* 20 M_MOUNT */
"fhandle", /* 21 M_FHANDLE */
-#if (NFSCLIENT || NFSSERVER)
+#if CONFIG_NFS
"NFS req", /* 22 M_NFSREQ */
"NFS mount", /* 23 M_NFSMNT */
"NFS node", /* 24 M_NFSNODE */
"NQNFS Lease", /* 47 M_NQLEASE */
"NQNFS Host", /* 48 M_NQMHOST */
"Export Host", /* 49 M_NETADDR */
-#if (NFSCLIENT || NFSSERVER)
+#if CONFIG_NFS
"NFS srvsock", /* 50 M_NFSSVC */
"NFS uid", /* 51 M_NFSUID */
"NFS daemon", /* 52 M_NFSD */
"mrt", /* 56 M_MRTABLE */
"", /* 57 unused entry */
"", /* 58 unused entry */
-#if (NFSCLIENT || NFSSERVER)
+#if CONFIG_NFS
"NFSV3 srvdesc",/* 59 M_NFSRVDESC */
"NFSV3 diroff", /* 60 M_NFSDIROFF */
"NFSV3 bigfh", /* 61 M_NFSBIGFH */
{ SOS(user32_iovec), KMZ_LOOKUPZONE, FALSE }, /* 19 M_IOV32 */
{ SOS(mount), KMZ_CREATEZONE, FALSE }, /* 20 M_MOUNT */
{ 0, KMZ_MALLOC, FALSE }, /* 21 M_FHANDLE */
-#if (NFSCLIENT || NFSSERVER)
+#if CONFIG_NFS
{ SOS(nfsreq), KMZ_CREATEZONE, FALSE }, /* 22 M_NFSREQ */
{ SOS(nfsmount), KMZ_CREATEZONE, FALSE }, /* 23 M_NFSMNT */
{ SOS(nfsnode), KMZ_CREATEZONE, FALSE }, /* 24 M_NFSNODE */
{ 0, KMZ_MALLOC, FALSE }, /* 47 M_NQLEASE */
{ 0, KMZ_MALLOC, FALSE }, /* 48 M_NQMHOST */
{ 0, KMZ_MALLOC, FALSE }, /* 49 M_NETADDR */
-#if (NFSCLIENT || NFSSERVER)
+#if CONFIG_NFS
{ SOX(nfsrv_sock),
KMZ_CREATEZONE_ACCT, FALSE }, /* 50 M_NFSSVC */
{ 0, KMZ_MALLOC, FALSE }, /* 51 M_NFSUID */
{ SOX(mrt), KMZ_CREATEZONE, TRUE }, /* 56 M_MRTABLE */
{ 0, KMZ_MALLOC, FALSE }, /* 57 unused entry */
{ 0, KMZ_MALLOC, FALSE }, /* 58 unused entry */
-#if (NFSCLIENT || NFSSERVER)
+#if CONFIG_NFS
{ SOS(nfsrv_descript),
KMZ_CREATEZONE_ACCT, FALSE }, /* 59 M_NFSRVDESC */
{ SOS(nfsdmap), KMZ_CREATEZONE, FALSE }, /* 60 M_NFSDIROFF */
unsigned int memorystatus_thaw_count = 0;
unsigned int memorystatus_refreeze_eligible_count = 0; /* # of processes currently thawed i.e. have state on disk & in-memory */
+/* Freezer counters collected for telemtry */
+static struct memorystatus_freezer_stats_t {
+ /*
+ * # of processes that we've considered freezing.
+ * Used to normalize the error reasons below.
+ */
+ uint64_t mfs_process_considered_count;
+
+ /*
+ * The following counters track how many times we've failed to freeze
+ * a process because of a specific FREEZER_ERROR.
+ */
+ /* EXCESS_SHARED_MEMORY */
+ uint64_t mfs_error_excess_shared_memory_count;
+ /* LOW_PRIVATE_SHARED_RATIO */
+ uint64_t mfs_error_low_private_shared_ratio_count;
+ /* NO_COMPRESSOR_SPACE */
+ uint64_t mfs_error_no_compressor_space_count;
+ /* NO_SWAP_SPACE */
+ uint64_t mfs_error_no_swap_space_count;
+ /* pages < memorystatus_freeze_pages_min */
+ uint64_t mfs_error_below_min_pages_count;
+ /* dasd determined it was unlikely to be relaunched. */
+ uint64_t mfs_error_low_probability_of_use_count;
+ /* transient reasons (like inability to acquire a lock). */
+ uint64_t mfs_error_other_count;
+
+ /*
+ * # of times that we saw memorystatus_available_pages <= memorystatus_freeze_threshold.
+ * Used to normalize skipped_full_count and shared_mb_high_count.
+ */
+ uint64_t mfs_below_threshold_count;
+
+ /* Skipped running the freezer because we were out of slots */
+ uint64_t mfs_skipped_full_count;
+
+ /* Skipped running the freezer because we were over the shared mb limit*/
+ uint64_t mfs_skipped_shared_mb_high_count;
+
+ /*
+ * How many pages have not been sent to swap because they were in a shared object?
+ * This is being used to gather telemtry so we can understand the impact we'd have
+ * on our NAND budget if we did swap out these pages.
+ */
+ uint64_t mfs_shared_pages_skipped;
+} memorystatus_freezer_stats = {0};
+
#endif /* XNU_KERNEL_PRIVATE */
static inline boolean_t memorystatus_can_freeze_processes(void);
#define DEGRADED_WINDOW_MINS (30)
#define NORMAL_WINDOW_MINS (24 * 60)
+/* Protected by the freezer_mutex */
static throttle_interval_t throttle_intervals[] = {
{ DEGRADED_WINDOW_MINS, 1, 0, 0, { 0, 0 }},
{ NORMAL_WINDOW_MINS, 1, 0, 0, { 0, 0 }},
SYSCTL_UINT(_kern, OID_AUTO, memorystatus_thaw_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_thaw_count, 0, "");
SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_pageouts, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_pageouts, "");
SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_budget_pages_remaining, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_budget_pages_remaining, "");
+SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_excess_shared_memory_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_excess_shared_memory_count, "");
+SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_low_private_shared_ratio_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count, "");
+SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_no_compressor_space_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_no_compressor_space_count, "");
+SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_no_swap_space_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_no_swap_space_count, "");
+SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_below_min_pages_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_below_min_pages_count, "");
+SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_low_probability_of_use_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_low_probability_of_use_count, "");
+SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_other_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_other_count, "");
+SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_process_considered_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_process_considered_count, "");
+SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_below_threshold_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_below_threshold_count, "");
+SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_skipped_full_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_skipped_full_count, "");
+SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_skipped_shared_mb_high_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count, "");
+SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_shared_pages_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_shared_pages_skipped, "");
+
+/*
+ * Calculates the hit rate for the freezer.
+ * The hit rate is defined as the percentage of procs that are currently in the
+ * freezer which we have thawed.
+ * A low hit rate means we're freezing bad candidates since they're not re-used.
+ */
+static int sysctl_memorystatus_freezer_thaw_percentage SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+ size_t thaw_count = 0, frozen_count = 0;
+ int thaw_percentage = 100;
+ unsigned int band = (unsigned int) memorystatus_freeze_jetsam_band;
+ proc_t p = PROC_NULL;
+ proc_list_lock();
+
+ p = memorystatus_get_first_proc_locked(&band, FALSE);
+
+ while (p) {
+ if (p->p_memstat_state & P_MEMSTAT_FROZEN) {
+ if (p->p_memstat_thaw_count > 0) {
+ thaw_count++;
+ }
+ frozen_count++;
+ }
+ p = memorystatus_get_next_proc_locked(&band, p, FALSE);
+ }
+ proc_list_unlock();
+ if (frozen_count > 0) {
+ thaw_percentage = 100 * thaw_count / frozen_count;
+ }
+ return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
+}
+SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage, "I", "");
#if DEVELOPMENT || DEBUG
again:
p = proc_find(pid);
if (p != NULL) {
+ memorystatus_freezer_stats.mfs_process_considered_count++;
uint32_t purgeable, wired, clean, dirty, shared;
uint32_t max_pages = 0, state = 0;
}
error = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
+ if (!error || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
+ memorystatus_freezer_stats.mfs_shared_pages_skipped += shared;
+ }
if (error) {
char reason[128];
if (freezer_error_code == FREEZER_ERROR_EXCESS_SHARED_MEMORY) {
+ memorystatus_freezer_stats.mfs_error_excess_shared_memory_count++;
strlcpy(reason, "too much shared memory", 128);
}
if (freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
+ memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count++;
strlcpy(reason, "low private-shared pages ratio", 128);
}
if (freezer_error_code == FREEZER_ERROR_NO_COMPRESSOR_SPACE) {
+ memorystatus_freezer_stats.mfs_error_no_compressor_space_count++;
strlcpy(reason, "no compressor space", 128);
}
sysctl_memorystatus_demote_frozen_process SYSCTL_HANDLER_ARGS
{
#pragma unused(arg1, arg2, oidp, req)
+ int error, val;
+ /*
+ * Only demote on write to prevent demoting during `sysctl -a`.
+ * The actual value written doesn't matter.
+ */
+ error = sysctl_handle_int(oidp, &val, 0, req);
+ if (error || !req->newptr) {
+ return error;
+ }
memorystatus_demote_frozen_processes(false);
return 0;
}
-SYSCTL_PROC(_kern, OID_AUTO, memorystatus_demote_frozen_processes, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_memorystatus_demote_frozen_process, "I", "");
+SYSCTL_PROC(_kern, OID_AUTO, memorystatus_demote_frozen_processes, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_memorystatus_demote_frozen_process, "I", "");
static int
sysctl_memorystatus_available_pages_thaw SYSCTL_HANDLER_ARGS
}
}
+ /*
+ * This proc is a suspended application.
+ * We're interested in tracking what percentage of these
+ * actually get frozen.
+ */
+ memorystatus_freezer_stats.mfs_process_considered_count++;
/* Only freeze applications meeting our minimum resident page criteria */
memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL);
if (pages < memorystatus_freeze_pages_min) {
+ memorystatus_freezer_stats.mfs_error_below_min_pages_count++;
goto out;
}
* memorystatus_freeze_top_process holds the proc_list_lock while it traverses the bands.
*/
if ((p->p_listflag & P_LIST_EXITED) != 0) {
+ memorystatus_freezer_stats.mfs_error_other_count++;
goto out;
}
}
if (probability_of_use == 0) {
+ memorystatus_freezer_stats.mfs_error_low_probability_of_use_count++;
goto out;
}
}
memorystatus_available_pages, 0, 0, 0, 0);
ret = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
+ if (ret == KERN_SUCCESS || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
+ memorystatus_freezer_stats.mfs_shared_pages_skipped += shared;
+ }
KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_END,
memorystatus_available_pages, aPid, 0, 0, 0);
ret = 0;
}
- proc_list_lock();
/* Update stats */
for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
throttle_intervals[i].pageouts += dirty;
}
- } else {
- proc_list_lock();
}
+ memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
+ os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: freezing (specific) pid %d [%s] done memorystatus_freeze_budget_pages_remaining %llu froze %u pages",
+ aPid, ((p && *p->p_name) ? p->p_name : "unknown"), memorystatus_freeze_budget_pages_remaining, dirty);
+
+ proc_list_lock();
memorystatus_freeze_pageouts += dirty;
* can freeze a more eligible process at this moment in time?
*/
}
-
- memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
- os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: freezing (specific) pid %d [%s] done memorystatus_freeze_budget_pages_remaining %llu froze %u pages",
- aPid, ((p && *p->p_name) ? p->p_name : "unknown"), memorystatus_freeze_budget_pages_remaining, dirty);
} else {
char reason[128];
if (freezer_error_code == FREEZER_ERROR_EXCESS_SHARED_MEMORY) {
+ memorystatus_freezer_stats.mfs_error_excess_shared_memory_count++;
strlcpy(reason, "too much shared memory", 128);
}
if (freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
+ memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count++;
strlcpy(reason, "low private-shared pages ratio", 128);
}
if (freezer_error_code == FREEZER_ERROR_NO_COMPRESSOR_SPACE) {
+ memorystatus_freezer_stats.mfs_error_no_compressor_space_count++;
strlcpy(reason, "no compressor space", 128);
}
if (freezer_error_code == FREEZER_ERROR_NO_SWAP_SPACE) {
+ memorystatus_freezer_stats.mfs_error_no_swap_space_count++;
strlcpy(reason, "no swap space", 128);
}
return ret;
}
+/*
+ * Caller must hold the freezer_mutex and it will be locked on return.
+ */
static int
memorystatus_freeze_top_process(void)
{
coalition_t coal = COALITION_NULL;
pid_t pid_list[MAX_XPC_SERVICE_PIDS];
unsigned int ntasks = 0;
+ LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_SCAN) | DBG_FUNC_START, memorystatus_available_pages, 0, 0, 0, 0);
p = proc_ref_locked(p);
if (!p) {
+ memorystatus_freezer_stats.mfs_error_other_count++;
break;
}
memorystatus_available_pages, 0, 0, 0, 0);
kr = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
+ if (kr == KERN_SUCCESS || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
+ memorystatus_freezer_stats.mfs_shared_pages_skipped += shared;
+ }
KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_END,
memorystatus_available_pages, aPid, 0, 0, 0);
ret = 0;
}
- proc_list_lock();
-
/* Update stats */
for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
throttle_intervals[i].pageouts += dirty;
}
- } else {
- proc_list_lock();
}
+ memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
+ os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: %sfreezing (%s) pid %d [%s] done, memorystatus_freeze_budget_pages_remaining %llu %sfroze %u pages\n",
+ refreeze_processes? "re" : "", (coal == NULL ? "general" : "coalition-driven"), aPid, ((p && *p->p_name) ? p->p_name : "unknown"), memorystatus_freeze_budget_pages_remaining, refreeze_processes? "Re" : "", dirty);
+
+ proc_list_lock();
memorystatus_freeze_pageouts += dirty;
*/
}
- memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
- os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: %sfreezing (%s) pid %d [%s] done, memorystatus_freeze_budget_pages_remaining %llu %sfroze %u pages\n",
- refreeze_processes? "re" : "", (coal == NULL ? "general" : "coalition-driven"), aPid, ((p && *p->p_name) ? p->p_name : "unknown"), memorystatus_freeze_budget_pages_remaining, refreeze_processes? "Re" : "", dirty);
-
/* Return KERN_SUCCESS */
ret = kr;
char reason[128];
if (freezer_error_code == FREEZER_ERROR_EXCESS_SHARED_MEMORY) {
+ memorystatus_freezer_stats.mfs_error_excess_shared_memory_count++;
strlcpy(reason, "too much shared memory", 128);
}
if (freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
+ memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count++;
strlcpy(reason, "low private-shared pages ratio", 128);
}
if (freezer_error_code == FREEZER_ERROR_NO_COMPRESSOR_SPACE) {
+ memorystatus_freezer_stats.mfs_error_no_compressor_space_count++;
strlcpy(reason, "no compressor space", 128);
}
if (freezer_error_code == FREEZER_ERROR_NO_SWAP_SPACE) {
+ memorystatus_freezer_stats.mfs_error_no_swap_space_count++;
strlcpy(reason, "no swap space", 128);
}
if (force_one == FALSE) {
/*
- * We use this counter to track daily thaws.
- * So we only reset it to 0 under the normal
+ * We use these counters to track daily hit rates.
+ * So we only reset them to 0 under the normal
* mode.
*/
memorystatus_thaw_count = 0;
proc_list_unlock();
}
+/*
+ * Calculate a new freezer budget.
+ * @param time_since_last_interval_expired_sec How long has it been (in seconds) since the previous interval expired.
+ * @param burst_multiple The burst_multiple for the new period
+ * @param interval_duration_min How many minutes will the new interval be?
+ * @param rollover The amount to rollover from the previous budget.
+ *
+ * @return A budget for the new interval.
+ */
+static uint32_t
+memorystatus_freeze_calculate_new_budget(
+ unsigned int time_since_last_interval_expired_sec,
+ unsigned int burst_multiple,
+ unsigned int interval_duration_min,
+ uint32_t rollover)
+{
+ uint64_t freeze_daily_budget = 0;
+ unsigned int daily_budget_pageouts = 0;
+ unsigned int freeze_daily_pageouts_max = 0;
+ const static unsigned int kNumSecondsInDay = 60 * 60 * 24;
+ /* Precision factor for days_missed. 2 decimal points. */
+ const static unsigned int kFixedPointFactor = 100;
+ unsigned int days_missed, budget_missed;
+
+ /* Get the daily budget from the storage layer */
+ if (vm_swap_max_budget(&freeze_daily_budget)) {
+ memorystatus_freeze_daily_mb_max = (freeze_daily_budget / (1024 * 1024));
+ os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: memorystatus_freeze_daily_mb_max set to %dMB\n", memorystatus_freeze_daily_mb_max);
+ }
+ /* Calculate the daily pageout budget */
+ freeze_daily_pageouts_max = memorystatus_freeze_daily_mb_max * (1024 * 1024 / PAGE_SIZE);
+
+ daily_budget_pageouts = (burst_multiple * (((uint64_t) interval_duration_min * freeze_daily_pageouts_max) / (kNumSecondsInDay / 60)));
+
+ /*
+ * Add additional budget for time since the interval expired.
+ * For example, if the interval expired n days ago, we should get an additional n days
+ * of budget since we didn't use any budget during those n days.
+ */
+ days_missed = time_since_last_interval_expired_sec * kFixedPointFactor / kNumSecondsInDay;
+ budget_missed = days_missed * freeze_daily_pageouts_max / kFixedPointFactor;
+ return rollover + daily_budget_pageouts + budget_missed;
+}
+
+#if DEVELOPMENT || DEBUG
+
+static int
+sysctl_memorystatus_freeze_calculate_new_budget SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+ int error = 0;
+ unsigned int time_since_last_interval_expired_sec = 0;
+ unsigned int new_budget;
+
+ error = sysctl_handle_int(oidp, &time_since_last_interval_expired_sec, 0, req);
+ if (error || !req->newptr) {
+ return error;
+ }
+ new_budget = memorystatus_freeze_calculate_new_budget(time_since_last_interval_expired_sec, 1, NORMAL_WINDOW_MINS, 0);
+ return copyout(&new_budget, req->oldptr, MIN(sizeof(req->oldlen), sizeof(new_budget)));
+}
+
+SYSCTL_PROC(_vm, OID_AUTO, memorystatus_freeze_calculate_new_budget, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED,
+ 0, 0, &sysctl_memorystatus_freeze_calculate_new_budget, "I", "");
+
+#endif /* DEVELOPMENT || DEBUG */
/*
* This function will do 4 things:
* 4) calculate the current rate of pageouts for DEGRADED_WINDOW_MINS duration. If that rate is below
* what we would normally expect, then we are running low on our daily budget and need to enter
* degraded perf. mode.
+ *
+ * Caller must hold the freezer mutex
+ * Caller must not hold the proc_list lock
*/
static void
{
clock_sec_t sec;
clock_nsec_t nsec;
- mach_timespec_t ts;
+ mach_timespec_t now_ts;
+ LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
+ LCK_MTX_ASSERT(proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
unsigned int freeze_daily_pageouts_max = 0;
#endif
clock_get_system_nanotime(&sec, &nsec);
- ts.tv_sec = sec;
- ts.tv_nsec = nsec;
+ now_ts.tv_sec = sec;
+ now_ts.tv_nsec = nsec;
struct throttle_interval_t *interval = NULL;
if (memorystatus_freeze_degradation == TRUE) {
interval = degraded_throttle_window;
- if (CMP_MACH_TIMESPEC(&ts, &interval->ts) >= 0) {
+ if (CMP_MACH_TIMESPEC(&now_ts, &interval->ts) >= 0) {
memorystatus_freeze_degradation = FALSE;
interval->pageouts = 0;
interval->max_pageouts = 0;
interval = normal_throttle_window;
- if (CMP_MACH_TIMESPEC(&ts, &interval->ts) >= 0) {
- /*
- * New throttle window.
- * Rollover any unused budget.
- * Also ask the storage layer what the new budget needs to be.
- */
- uint64_t freeze_daily_budget = 0;
- unsigned int daily_budget_pageouts = 0;
-
- if (vm_swap_max_budget(&freeze_daily_budget)) {
- memorystatus_freeze_daily_mb_max = (freeze_daily_budget / (1024 * 1024));
- os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: memorystatus_freeze_daily_mb_max set to %dMB\n", memorystatus_freeze_daily_mb_max);
- }
-
- freeze_daily_pageouts_max = memorystatus_freeze_daily_mb_max * (1024 * 1024 / PAGE_SIZE);
-
- daily_budget_pageouts = (interval->burst_multiple * (((uint64_t)interval->mins * freeze_daily_pageouts_max) / NORMAL_WINDOW_MINS));
- interval->max_pageouts = (interval->max_pageouts - interval->pageouts) + daily_budget_pageouts;
+ if (CMP_MACH_TIMESPEC(&now_ts, &interval->ts) >= 0) {
+ /* How long has it been since the previous interval expired? */
+ mach_timespec_t expiration_period_ts = now_ts;
+ SUB_MACH_TIMESPEC(&expiration_period_ts, &interval->ts);
+ interval->max_pageouts = memorystatus_freeze_calculate_new_budget(
+ expiration_period_ts.tv_sec, interval->burst_multiple,
+ interval->mins, interval->max_pageouts - interval->pageouts);
interval->ts.tv_sec = interval->mins * 60;
interval->ts.tv_nsec = 0;
- ADD_MACH_TIMESPEC(&interval->ts, &ts);
+ ADD_MACH_TIMESPEC(&interval->ts, &now_ts);
/* Since we update the throttle stats pre-freeze, adjust for overshoot here */
if (interval->pageouts > interval->max_pageouts) {
interval->pageouts -= interval->max_pageouts;
interval->pageouts = 0;
}
*budget_pages_allowed = interval->max_pageouts;
+ memorystatus_freezer_stats.mfs_shared_pages_skipped = 0;
memorystatus_demote_frozen_processes(FALSE); /* normal mode...don't force a demotion */
} else {
time_left.tv_sec = interval->ts.tv_sec;
time_left.tv_nsec = 0;
- SUB_MACH_TIMESPEC(&time_left, &ts);
+ SUB_MACH_TIMESPEC(&time_left, &now_ts);
if (budget_left <= budget_threshold) {
/*
}
MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_update_throttle_interval: throttle updated - %d frozen (%d max) within %dm; %dm remaining; throttle %s\n",
- interval->pageouts, interval->max_pageouts, interval->mins, (interval->ts.tv_sec - ts->tv_sec) / 60,
+ interval->pageouts, interval->max_pageouts, interval->mins, (interval->ts.tv_sec - now_ts->tv_sec) / 60,
interval->throttle ? "on" : "off");
}
goto out;
}
- if ((memorystatus_frozen_count >= memorystatus_frozen_processes_max) &&
- (memorystatus_refreeze_eligible_count < MIN_THAW_REFREEZE_THRESHOLD)) {
- goto out;
+ memorystatus_freezer_stats.mfs_below_threshold_count++;
+
+ if ((memorystatus_frozen_count >= memorystatus_frozen_processes_max)) {
+ /*
+ * Consider this as a skip even if we wake up to refreeze because
+ * we won't freeze any new procs.
+ */
+ memorystatus_freezer_stats.mfs_skipped_full_count++;
+ if (memorystatus_refreeze_eligible_count < MIN_THAW_REFREEZE_THRESHOLD) {
+ goto out;
+ }
}
if (memorystatus_frozen_shared_mb_max && (memorystatus_frozen_shared_mb >= memorystatus_frozen_shared_mb_max)) {
+ memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count++;
goto out;
}
SYSCTL_PROC(_hw_optional, OID_AUTO, adx, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasADX, 0, sysctl_cpu_capability, "I", "");
SYSCTL_PROC(_hw_optional, OID_AUTO, mpx, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasMPX, 0, sysctl_cpu_capability, "I", "");
SYSCTL_PROC(_hw_optional, OID_AUTO, sgx, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasSGX, 0, sysctl_cpu_capability, "I", "");
-#if !defined(RC_HIDE_XNU_J137)
SYSCTL_PROC(_hw_optional, OID_AUTO, avx512f, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasAVX512F, 0, sysctl_cpu_capability, "I", "");
SYSCTL_PROC(_hw_optional, OID_AUTO, avx512cd, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasAVX512CD, 0, sysctl_cpu_capability, "I", "");
SYSCTL_PROC(_hw_optional, OID_AUTO, avx512dq, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasAVX512DQ, 0, sysctl_cpu_capability, "I", "");
SYSCTL_PROC(_hw_optional, OID_AUTO, avx512vl, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasAVX512VL, 0, sysctl_cpu_capability, "I", "");
SYSCTL_PROC(_hw_optional, OID_AUTO, avx512ifma, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasAVX512IFMA, 0, sysctl_cpu_capability, "I", "");
SYSCTL_PROC(_hw_optional, OID_AUTO, avx512vbmi, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (void *) kHasAVX512VBMI, 0, sysctl_cpu_capability, "I", "");
-#endif /* not RC_HIDE_XNU_J137 */
#elif defined (__arm__) || defined (__arm64__)
int watchpoint_flag = -1;
int breakpoint_flag = -1;
#endif
#include <os/overflow.h>
-#ifndef CONFIG_EMBEDDED
-#include <IOKit/IOBSD.h> /* for IOTaskHasEntitlement */
-#include <sys/csr.h> /* for csr_check */
-#define MAP_32BIT_ENTITLEMENT "com.apple.security.mmap-map-32bit"
-#endif
-
/*
* XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct
* XXX usage is PROT_* from an interface perspective. Thus the values of
#ifndef CONFIG_EMBEDDED
if (flags & MAP_32BIT) {
- if (csr_check(CSR_ALLOW_UNTRUSTED_KEXTS) == 0 ||
- IOTaskHasEntitlement(current_task(), MAP_32BIT_ENTITLEMENT)) {
- vmk_flags.vmkf_32bit_map_va = TRUE;
- } else {
- error = EPERM;
- goto bad;
- }
+ vmk_flags.vmkf_32bit_map_va = TRUE;
}
#endif
static void system_override_end(uint64_t flags);
static void system_override_abort(uint64_t flags);
static void system_override_callouts(uint64_t flags, boolean_t enable_override);
-static __attribute__((noinline)) void PROCESS_OVERRIDING_SYSTEM_DEFAULTS(uint64_t timeout);
+static __attribute__((noinline)) int PROCESS_OVERRIDING_SYSTEM_DEFAULTS(uint64_t timeout);
void
init_system_override()
system_override_abort(flags);
} else {
system_override_begin(flags);
- PROCESS_OVERRIDING_SYSTEM_DEFAULTS(timeout);
+ error = PROCESS_OVERRIDING_SYSTEM_DEFAULTS(timeout);
system_override_end(flags);
}
}
}
-static __attribute__((noinline)) void
+static __attribute__((noinline)) int
PROCESS_OVERRIDING_SYSTEM_DEFAULTS(uint64_t timeout)
{
struct timespec ts;
ts.tv_sec = timeout / NSEC_PER_SEC;
ts.tv_nsec = timeout - ((long)ts.tv_sec * NSEC_PER_SEC);
- msleep((caddr_t)&sys_override_wait, &sys_override_lock, PRIBIO | PCATCH, "system_override", &ts);
+ int error = msleep((caddr_t)&sys_override_wait, &sys_override_lock, PRIBIO | PCATCH, "system_override", &ts);
+ /* msleep returns EWOULDBLOCK if timeout expires, treat that as success */
+ return (error == EWOULDBLOCK) ? 0 : error;
}
return current_proc()->p_ppid;
}
-int
+uint64_t
proc_selfcsflags(void)
{
- return current_proc()->p_csflags;
+ return (uint64_t)current_proc()->p_csflags;
+}
+
+int
+proc_csflags(proc_t p, uint64_t *flags)
+{
+ if (p && flags) {
+ *flags = (uint64_t)p->p_csflags;
+ return 0;
+ }
+ return EINVAL;
}
uint32_t
{
proc_t p;
+ if (size <= 0) {
+ return;
+ }
+
+ bzero(buf, size);
+
if ((p = proc_find(pid)) != PROC_NULL) {
strlcpy(buf, &p->p_comm[0], size);
proc_rele(p);
return NULLVP;
}
+int
+proc_gettty(proc_t p, vnode_t *vp)
+{
+ if (!p || !vp) {
+ return EINVAL;
+ }
+
+ struct session *procsp = proc_session(p);
+ int err = EINVAL;
+
+ if (procsp != SESSION_NULL) {
+ session_lock(procsp);
+ vnode_t ttyvp = procsp->s_ttyvp;
+ int ttyvid = procsp->s_ttyvid;
+ session_unlock(procsp);
+
+ if (ttyvp) {
+ if (vnode_getwithvid(ttyvp, ttyvid) == 0) {
+ *vp = procsp->s_ttyvp;
+ err = 0;
+ }
+ } else {
+ err = ENOENT;
+ }
+
+ session_rele(procsp);
+ }
+
+ return err;
+}
+
+int
+proc_gettty_dev(proc_t p, dev_t *dev)
+{
+ struct session *procsp = proc_session(p);
+ boolean_t has_tty = FALSE;
+
+ if (procsp != SESSION_NULL) {
+ session_lock(procsp);
+
+ struct tty * tp = SESSION_TP(procsp);
+ if (tp != TTY_NULL) {
+ *dev = tp->t_dev;
+ has_tty = TRUE;
+ }
+
+ session_unlock(procsp);
+ session_rele(procsp);
+ }
+
+ if (has_tty) {
+ return 0;
+ } else {
+ return EINVAL;
+ }
+}
+
int
proc_selfexecutableargs(uint8_t *buf, size_t *buflen)
{
*/
if (forself == 1 && IOTaskHasEntitlement(pt->task, CLEAR_LV_ENTITLEMENT)) {
proc_lock(pt);
- pt->p_csflags &= (~(CS_REQUIRE_LV & CS_FORCED_LV));
+ pt->p_csflags &= (~(CS_REQUIRE_LV | CS_FORCED_LV));
proc_unlock(pt);
error = 0;
} else {
#include <machine/machine_routines.h>
#include <machine/exec.h>
+#include <nfs/nfs_conf.h>
+
#include <vm/vm_protos.h>
#include <vm/vm_pageout.h>
#include <vm/vm_compressor_algorithms.h>
extern int
kdbg_control(int *name, u_int namelen, user_addr_t where, size_t * sizep);
-#if NFSCLIENT
+#if CONFIG_NFS_CLIENT
extern int
netboot_root(void);
#endif
STATIC int sysctl_procname(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
STATIC int sysctl_boottime(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
STATIC int sysctl_symfile(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
-#if NFSCLIENT
+#if CONFIG_NFS_CLIENT
STATIC int sysctl_netboot(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
#endif
#ifdef CONFIG_IMGSRC_ACCESS
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_LOCKED,
0, 0, sysctl_symfile, "A", "");
-#if NFSCLIENT
+#if CONFIG_NFS_CLIENT
STATIC int
sysctl_netboot
(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
#if DEVELOPMENT || DEBUG
+extern void do_cseg_wedge_thread(void);
+extern void do_cseg_unwedge_thread(void);
+
+static int
+cseg_wedge_thread SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+
+ int error, val = 0;
+ error = sysctl_handle_int(oidp, &val, 0, req);
+ if (error || val == 0) {
+ return error;
+ }
+
+ do_cseg_wedge_thread();
+ return 0;
+}
+SYSCTL_PROC(_kern, OID_AUTO, cseg_wedge_thread, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, cseg_wedge_thread, "I", "wedge c_seg thread");
+
+static int
+cseg_unwedge_thread SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+
+ int error, val = 0;
+ error = sysctl_handle_int(oidp, &val, 0, req);
+ if (error || val == 0) {
+ return error;
+ }
+
+ do_cseg_unwedge_thread();
+ return 0;
+}
+SYSCTL_PROC(_kern, OID_AUTO, cseg_unwedge_thread, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, cseg_unwedge_thread, "I", "unstuck c_seg thread");
+
static atomic_int wedge_thread_should_wake = 0;
static int
extern uint64_t MutexSpin;
-SYSCTL_QUAD(_kern, OID_AUTO, mutex_spin_us, CTLFLAG_RW, &MutexSpin,
- "Spin time for acquiring a kernel mutex");
+SYSCTL_QUAD(_kern, OID_AUTO, mutex_spin_abs, CTLFLAG_RW, &MutexSpin,
+ "Spin time in abs for acquiring a kernel mutex");
+
+extern uint64_t low_MutexSpin;
+extern int64_t high_MutexSpin;
+extern unsigned int real_ncpus;
+
+SYSCTL_QUAD(_kern, OID_AUTO, low_mutex_spin_abs, CTLFLAG_RW, &low_MutexSpin,
+ "Low spin threshold in abs for acquiring a kernel mutex");
+
+static int
+sysctl_high_mutex_spin_ns SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+ int error;
+ int64_t val = 0;
+ int64_t res;
+
+ /* Check if the user is writing to high_MutexSpin, or just reading it */
+ if (req->newptr) {
+ error = SYSCTL_IN(req, &val, sizeof(val));
+ if (error || (val < 0 && val != -1)) {
+ return error;
+ }
+ high_MutexSpin = val;
+ }
+
+ if (high_MutexSpin >= 0) {
+ res = high_MutexSpin;
+ } else {
+ res = low_MutexSpin * real_ncpus;
+ }
+ return SYSCTL_OUT(req, &res, sizeof(res));
+}
+SYSCTL_PROC(_kern, OID_AUTO, high_mutex_spin_abs, CTLFLAG_RW | CTLTYPE_QUAD, 0, 0, sysctl_high_mutex_spin_ns, "I",
+ "High spin threshold in abs for acquiring a kernel mutex");
#if defined (__x86_64__)
return rv;
}
-#if (MAC_POLICY_OPS_VERSION != 59)
+#if (MAC_POLICY_OPS_VERSION != 62)
# error "struct mac_policy_ops doesn't match definition in mac_policy.h"
#endif
/*
CHECK_SET_HOOK(vnode_check_trigger_resolve)
CHECK_SET_HOOK(mount_check_mount_late)
- .mpo_reserved1 = (mpo_reserved_hook_t *)common_hook,
+ CHECK_SET_HOOK(mount_check_snapshot_mount)
.mpo_reserved2 = (mpo_reserved_hook_t *)common_hook,
CHECK_SET_HOOK(skywalk_flow_check_connect)
CHECK_SET_HOOK(skywalk_flow_check_listen)
if (!oslog_stream_msg_bufc) {
return ENOMEM;
}
+ /* Zeroing to avoid copying uninitialized struct padding to userspace. */
+ bzero(oslog_stream_msg_bufc, oslog_stream_buf_size);
/* entries to support kernel logging in stream mode */
- entries = kalloc(oslog_stream_num_entries * sizeof(struct oslog_stream_buf_entry_s));
+ size_t entries_size = oslog_stream_num_entries * sizeof(struct oslog_stream_buf_entry_s);
+ entries = kalloc(entries_size);
if (!entries) {
kfree(oslog_stream_msg_bufc, oslog_stream_buf_size);
return ENOMEM;
}
+ /* Zeroing to avoid copying uninitialized struct padding to userspace. */
+ bzero(entries, entries_size);
stream_lock();
if (oslog_stream_open) {
stream_unlock();
kfree(oslog_stream_msg_bufc, oslog_stream_buf_size);
- kfree(entries, oslog_stream_num_entries * sizeof(struct oslog_stream_buf_entry_s));
+ kfree(entries, entries_size);
return EBUSY;
}
for (int i = 0; i < oslog_stream_num_entries; i++) {
oslog_stream_buf_entries[i].type = oslog_stream_link_type_log;
- oslog_stream_buf_entries[i].offset = 0;
- oslog_stream_buf_entries[i].size = 0;
- oslog_stream_buf_entries[i].timestamp = 0;
STAILQ_INSERT_TAIL(&oslog_stream_free_head, &oslog_stream_buf_entries[i], buf_entries);
}
extern const char *debugger_panic_str;
extern void cnputc(char); /* standard console putc */
-void (*v_putc)(char) = cnputc; /* routine to putc on virtual console */
extern struct tty cons; /* standard console tty */
extern struct tty *constty; /* pointer to console "window" tty */
log_putc_locked(msgbufp, c);
}
if ((pca->flags & TOCONS) && constty == 0 && c != '\0') {
- (*v_putc)(c);
+ cnputc(c);
}
if (pca->flags & TOSTR) {
**sp = c;
#include <sys/types.h>
#include <sys/sysent.h>
#include <sys/sysproto.h>
+#include <nfs/nfs_conf.h>
0 AUE_NULL ALL { int nosys(void); } { indirect syscall }
1 AUE_EXIT ALL { void exit(int rval) NO_SYSCALL_STUB; }
153 AUE_PREAD ALL { user_ssize_t pread(int fd, user_addr_t buf, user_size_t nbyte, off_t offset); }
154 AUE_PWRITE ALL { user_ssize_t pwrite(int fd, user_addr_t buf, user_size_t nbyte, off_t offset); }
-#if NFSSERVER
+#if NFSSERVER /* XXX */
155 AUE_NFS_SVC ALL { int nfssvc(int flag, caddr_t argp); }
#else
155 AUE_NULL ALL { int nosys(void); }
159 AUE_UNMOUNT ALL { int unmount(user_addr_t path, int flags); }
160 AUE_NULL ALL { int nosys(void); } { old async_daemon }
-#if NFSSERVER
+#if NFSSERVER /* XXX */
161 AUE_NFS_GETFH ALL { int getfh(char *fname, fhandle_t *fhp); }
#else
161 AUE_NULL ALL { int nosys(void); }
245 AUE_FFSCTL ALL { int ffsctl(int fd, u_long cmd, caddr_t data, u_int options); }
246 AUE_NULL ALL { int nosys(void); }
-#if NFSCLIENT
+#if NFSCLIENT /* XXX */
247 AUE_NULL ALL { int nfsclnt(int flag, caddr_t argp); }
#else
247 AUE_NULL ALL { int nosys(void); }
#endif
-#if NFSSERVER
+#if NFSSERVER /* XXX */
248 AUE_FHOPEN ALL { int fhopen(const struct fhandle *u_fhp, int flags); }
#else
248 AUE_NULL ALL { int nosys(void); }
0x3130164 VFS_devfs_label_associate_device
0x3130168 VFS_devfs_label_associate_directory
0x313016C VFS_label_associate_fdesc
+0x3130170 VFS_mount_check_snapshot_mount
0x3CF0000 CP_OFFSET_IO
0x4010004 proc_exit
0x4010008 force_exit
struct protosw *prp;
struct socket *so;
int error = 0;
+#if defined(XNU_TARGET_OS_OSX)
+ pid_t rpid = -1;
+#endif
#if TCPDEBUG
extern int tcpconsdebug;
so->e_pid = proc_pid(ep);
proc_getexecutableuuid(ep, so->e_uuid, sizeof(so->e_uuid));
so->so_flags |= SOF_DELEGATED;
+#if defined(XNU_TARGET_OS_OSX)
+ if (ep->p_responsible_pid != so->e_pid) {
+ rpid = ep->p_responsible_pid;
+ }
+#endif
+ }
+
+#if defined(XNU_TARGET_OS_OSX)
+ if (rpid < 0 && p->p_responsible_pid != so->last_pid) {
+ rpid = p->p_responsible_pid;
+ }
+
+ so->so_rpid = -1;
+ uuid_clear(so->so_ruuid);
+ if (rpid >= 0) {
+ proc_t rp = proc_find(rpid);
+ if (rp != PROC_NULL) {
+ proc_getexecutableuuid(rp, so->so_ruuid, sizeof(so->so_ruuid));
+ so->so_rpid = rpid;
+ proc_rele(rp);
+ }
}
+#endif
so->so_cred = kauth_cred_proc_ref(p);
if (!suser(kauth_cred_get(), NULL)) {
}
}
- retval = (data >= lowwat);
+ /*
+ * While the `data` field is the amount of data to read,
+ * 0-sized packets need to wake up the kqueue, see 58140856,
+ * so we need to take control bytes into account too.
+ */
+ retval = (so->so_rcv.sb_cc >= lowwat);
out:
if (retval && kev) {
so->e_upid = proc_uniqueid(ep);
so->e_pid = proc_pid(ep);
proc_getexecutableuuid(ep, so->e_uuid, sizeof(so->e_uuid));
+
+#if defined(XNU_TARGET_OS_OSX)
+ if (ep->p_responsible_pid != so->e_pid) {
+ proc_t rp = proc_find(ep->p_responsible_pid);
+ if (rp != PROC_NULL) {
+ proc_getexecutableuuid(rp, so->so_ruuid, sizeof(so->so_ruuid));
+ so->so_rpid = ep->p_responsible_pid;
+ proc_rele(rp);
+ } else {
+ uuid_clear(so->so_ruuid);
+ so->so_rpid = -1;
+ }
+ }
+#endif
}
if (so->so_proto != NULL && so->so_proto->pr_update_last_owner != NULL) {
(*so->so_proto->pr_update_last_owner)(so, NULL, ep);
.Vt statfs
structure returned by
.Xr statfs 2 .
+.
+.It ATTR_CMNEXT_CLONEID
+A
+.Vt u_int64_t
+that uniquely identifies the data stream associated with the file
+system object. Useful for finding which files are pure clones of each
+other (as they will have the same clone-id).
+.
+.It ATTR_CMNEXT_EXT_FLAGS
+A
+.Vt u_int64_t
+that contains additional flags with information about the file. The
+flags are:
+.
+.Bl -tag -width EF_MAY_SHARE_BLOCKS
+.
+.It EF_MAY_SHARE_BLOCKS
+If this bit is set then the file may share blocks with another file
+(i.e. it is a clone of another file).
+.
+.It EF_NO_XATTRS
+If this bit is set then the file has no extended attributes. Useful
+for avoiding a call to listxattr().
+.
+.It EF_IS_SYNC_ROOT
+If this bit is set the directory is a "sync root". This bit will
+never be set for regular files.
+.
+.It EF_IS_PURGEABLE
+If this bit is set the item is a "purgeable" item that can be deleted
+by the file system when asked to free space.
+.
+.It EF_IS_SPARSE
+If this bit is set the item has sparse regions.
+.
+.El
.El
+.Pp
.
.Sh VOLUME CAPABILITIES
.
will produce lower quality output if the entropy pool drains, while
.Nm /dev/random
will prefer to block and wait for additional entropy to be collected.
-With Yarrow, this choice and distinction is not necessary, and
+With Fortuna, this choice and distinction is not necessary, and
the two devices behave identically. You may use either.
.Pp
The
.Nm
device implements the
-.Nm Yarrow
+.Nm Fortuna
pseudo random number generator algorithm and maintains its entropy pool.
The kernel automatically seeds the algorithm with additional entropy during normal execution.
.Sh FILES
* Fill out some fields
*/
__IGNORE_WCASTALIGN(mp->mnt_data = (qaddr_t)devfs_mp_p);
- mp->mnt_vfsstat.f_fsid.val[0] = (int32_t)(uintptr_t)devfs_mp_p;
+ mp->mnt_vfsstat.f_fsid.val[0] = (int32_t)VM_KERNEL_ADDRHASH(devfs_mp_p);
mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp);
mp->mnt_flag |= MNT_LOCAL;
sbp->f_bavail = 0;
sbp->f_files = devfs_stats.nodes;
sbp->f_ffree = 0;
- sbp->f_fsid.val[0] = (int32_t)(uintptr_t)devfs_mp_p;
+ sbp->f_fsid.val[0] = (int32_t)VM_KERNEL_ADDRHASH(devfs_mp_p);
sbp->f_fsid.val[1] = vfs_typenum(mp);
return 0;
VATTR_RETURN(vap, va_nlink, file_node->dn_links);
VATTR_RETURN(vap, va_uid, file_node->dn_uid);
VATTR_RETURN(vap, va_gid, file_node->dn_gid);
- VATTR_RETURN(vap, va_fsid, (uintptr_t)file_node->dn_dvm);
+ VATTR_RETURN(vap, va_fsid, (uint32_t)VM_KERNEL_ADDRHASH(file_node->dn_dvm));
VATTR_RETURN(vap, va_fileid, (uintptr_t)file_node->dn_ino);
VATTR_RETURN(vap, va_data_size, file_node->dn_len);
* Fill out some fields
*/
__IGNORE_WCASTALIGN(mp->mnt_data = (qaddr_t)routefs_mp_p);
- mp->mnt_vfsstat.f_fsid.val[0] = (int32_t)(uintptr_t)routefs_mp_p;
+ mp->mnt_vfsstat.f_fsid.val[0] = (int32_t)VM_KERNEL_ADDRHASH(routefs_mp_p);
mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp);
mp->mnt_flag |= MNT_LOCAL;
sbp->f_bavail = 0;
sbp->f_files = 0;
sbp->f_ffree = 0;
- sbp->f_fsid.val[0] = (int32_t)(uintptr_t)routefs_mp_p;
+ sbp->f_fsid.val[0] = (int32_t)VM_KERNEL_ADDRHASH(routefs_mp_p);
sbp->f_fsid.val[1] = vfs_typenum(mp);
return 0;
uint32_t cfil_active_count = 0; /* Number of active content filters */
uint32_t cfil_sock_attached_count = 0; /* Number of sockets attachements */
uint32_t cfil_sock_udp_attached_count = 0; /* Number of UDP sockets attachements */
+uint32_t cfil_sock_attached_stats_count = 0; /* Number of sockets requested periodic stats report */
uint32_t cfil_close_wait_timeout = 1000; /* in milliseconds */
static kern_ctl_ref cfil_kctlref = NULL;
uint32_t cfe_necp_control_unit;
struct timeval cfe_last_event; /* To user space */
struct timeval cfe_last_action; /* From user space */
+ uint64_t cfe_byte_inbound_count_reported; /* stats already been reported */
+ uint64_t cfe_byte_outbound_count_reported; /* stats already been reported */
+ struct timeval cfe_stats_report_ts; /* Timestamp for last stats report */
+ uint32_t cfe_stats_report_frequency; /* Interval for stats report in msecs */
+ boolean_t cfe_laddr_sent;
struct cfe_buf {
/*
*/
struct cfil_info {
TAILQ_ENTRY(cfil_info) cfi_link;
+ TAILQ_ENTRY(cfil_info) cfi_link_stats;
struct socket *cfi_so;
uint64_t cfi_flags;
uint64_t cfi_sock_id;
#define CFI_ENTRY_KCUNIT(i, e) (((e) - &((i)->cfi_entries[0])) + 1)
TAILQ_HEAD(cfil_sock_head, cfil_info) cfil_sock_head;
+TAILQ_HEAD(cfil_sock_head_stats, cfil_info) cfil_sock_head_stats;
#define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x)
#define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x)
(addr.sa.sa_family == AF_INET && addr.sin.sin_addr.s_addr == 0) || \
(addr.sa.sa_family == AF_INET6 && IN6_IS_ADDR_UNSPECIFIED(&addr.sin6.sin6_addr)))
+/*
+ * Periodic Statistics Report:
+ */
+static struct thread *cfil_stats_report_thread;
+#define CFIL_STATS_REPORT_INTERVAL_MIN_MSEC 500 // Highest report frequency
+#define CFIL_STATS_REPORT_RUN_INTERVAL_NSEC (CFIL_STATS_REPORT_INTERVAL_MIN_MSEC * NSEC_PER_MSEC)
+#define CFIL_STATS_REPORT_MAX_COUNT 50 // Max stats to be reported per run
+
+/* This buffer must have same layout as struct cfil_msg_stats_report */
+struct cfil_stats_report_buffer {
+ struct cfil_msg_hdr msghdr;
+ uint32_t count;
+ struct cfil_msg_sock_stats stats[CFIL_STATS_REPORT_MAX_COUNT];
+};
+static struct cfil_stats_report_buffer *global_cfil_stats_report_buffers[MAX_CONTENT_FILTER];
+static uint32_t global_cfil_stats_counts[MAX_CONTENT_FILTER];
+
/*
* UDP Garbage Collection:
*/
u_short cfentry_lport;
sa_family_t cfentry_family;
u_int32_t cfentry_flowhash;
- u_int32_t cfentry_lastused;
+ u_int64_t cfentry_lastused;
union {
/* foreign host table entry */
struct in_addr_4in6 addr46;
#define DATA_DEBUG 0
#define SHOW_DEBUG 0
#define GC_DEBUG 0
+#define STATS_DEBUG 0
/*
* Sysctls for logs and statistics
static void cfil_fill_event_msg_addresses(struct cfil_hash_entry *, struct inpcb *,
union sockaddr_in_4_6 *, union sockaddr_in_4_6 *,
boolean_t, boolean_t);
+static void cfil_stats_report_thread_func(void *, wait_result_t);
+static void cfil_stats_report(void *v, wait_result_t w);
bool check_port(struct sockaddr *, u_short);
*unitinfo = cfc;
cfil_active_count++;
+
+ // Allocate periodic stats buffer for this filter
+ if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] == NULL) {
+ cfil_rw_unlock_exclusive(&cfil_lck_rw);
+
+ struct cfil_stats_report_buffer *buf;
+
+ MALLOC(buf,
+ struct cfil_stats_report_buffer *,
+ sizeof(struct cfil_stats_report_buffer),
+ M_TEMP,
+ M_WAITOK | M_ZERO);
+
+ cfil_rw_lock_exclusive(&cfil_lck_rw);
+
+ if (buf == NULL) {
+ error = ENOMEM;
+ cfil_rw_unlock_exclusive(&cfil_lck_rw);
+ goto done;
+ }
+
+ /* Another thread may have won the race */
+ if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] != NULL) {
+ FREE(buf, M_TEMP);
+ } else {
+ global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] = buf;
+ }
+ }
}
cfil_rw_unlock_exclusive(&cfil_lck_rw);
done:
}
verify_content_filter(cfc);
+ /* Free the stats buffer for this filter */
+ if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] != NULL) {
+ FREE(global_cfil_stats_report_buffers[cfc->cf_kcunit - 1], M_TEMP);
+ global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] = NULL;
+ }
VERIFY(cfc->cf_sock_count == 0);
/*
return so;
}
+static void
+cfil_info_stats_toggle(struct cfil_info *cfil_info, struct cfil_entry *entry, uint32_t report_frequency)
+{
+ struct cfil_info *cfil = NULL;
+ Boolean found = FALSE;
+ int kcunit;
+
+ if (cfil_info == NULL) {
+ return;
+ }
+
+ if (report_frequency) {
+ if (entry == NULL) {
+ return;
+ }
+
+ // Update stats reporting frequency.
+ if (entry->cfe_stats_report_frequency != report_frequency) {
+ entry->cfe_stats_report_frequency = report_frequency;
+ if (entry->cfe_stats_report_frequency < CFIL_STATS_REPORT_INTERVAL_MIN_MSEC) {
+ entry->cfe_stats_report_frequency = CFIL_STATS_REPORT_INTERVAL_MIN_MSEC;
+ }
+ microuptime(&entry->cfe_stats_report_ts);
+
+ // Insert cfil_info into list only if it is not in yet.
+ TAILQ_FOREACH(cfil, &cfil_sock_head_stats, cfi_link_stats) {
+ if (cfil == cfil_info) {
+ return;
+ }
+ }
+
+ TAILQ_INSERT_TAIL(&cfil_sock_head_stats, cfil_info, cfi_link_stats);
+
+ // Wake up stats thread if this is first flow added
+ if (cfil_sock_attached_stats_count == 0) {
+ thread_wakeup((caddr_t)&cfil_sock_attached_stats_count);
+ }
+ cfil_sock_attached_stats_count++;
+#if STATS_DEBUG
+ CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED - STATS FLOW INSERTED: <so %llx sockID %llu> stats frequency %d msecs",
+ cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0,
+ cfil_info->cfi_sock_id,
+ entry->cfe_stats_report_frequency);
+#endif
+ }
+ } else {
+ // Turn off stats reporting for this filter.
+ if (entry != NULL) {
+ // Already off, no change.
+ if (entry->cfe_stats_report_frequency == 0) {
+ return;
+ }
+
+ entry->cfe_stats_report_frequency = 0;
+ // If cfil_info still has filter(s) asking for stats, no need to remove from list.
+ for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
+ if (cfil_info->cfi_entries[kcunit - 1].cfe_stats_report_frequency > 0) {
+ return;
+ }
+ }
+ }
+
+ // No more filter asking for stats for this cfil_info, remove from list.
+ if (!TAILQ_EMPTY(&cfil_sock_head_stats)) {
+ found = FALSE;
+ TAILQ_FOREACH(cfil, &cfil_sock_head_stats, cfi_link_stats) {
+ if (cfil == cfil_info) {
+ found = TRUE;
+ break;
+ }
+ }
+ if (found) {
+ cfil_sock_attached_stats_count--;
+ TAILQ_REMOVE(&cfil_sock_head_stats, cfil_info, cfi_link_stats);
+#if STATS_DEBUG
+ CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED - STATS FLOW DELETED: <so %llx sockID %llu> stats frequency reset",
+ cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0,
+ cfil_info->cfi_sock_id);
+#endif
+ }
+ }
+ }
+}
+
static errno_t
cfil_ctl_send(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, mbuf_t m,
int flags)
if (error == EJUSTRETURN) {
error = 0;
}
+
+ // Toggle stats reporting according to received verdict.
+ cfil_rw_lock_exclusive(&cfil_lck_rw);
+ cfil_info_stats_toggle(cfil_info, entry, action_msg->cfa_stats_frequency);
+ cfil_rw_unlock_exclusive(&cfil_lck_rw);
+
break;
case CFM_OP_DROP:
lck_rw_init(&cfil_lck_rw, cfil_lck_grp, cfil_lck_attr);
TAILQ_INIT(&cfil_sock_head);
+ TAILQ_INIT(&cfil_sock_head_stats);
/*
* Register kernel control
/* this must not fail */
VERIFY(cfil_udp_gc_thread != NULL);
+ // Spawn thread for statistics reporting
+ if (kernel_thread_start(cfil_stats_report_thread_func, NULL,
+ &cfil_stats_report_thread) != KERN_SUCCESS) {
+ panic_plain("%s: Can't create statistics report thread", __func__);
+ /* NOTREACHED */
+ }
+ /* this must not fail */
+ VERIFY(cfil_stats_report_thread != NULL);
+
// Set UDP per-flow mbuf thresholds to 1/32 of platform max
mbuf_limit = MAX(UDP_FLOW_GC_MBUF_CNT_MAX, (nmbclusters << MCLSHIFT) >> UDP_FLOW_GC_MBUF_SHIFT);
cfil_udp_gc_mbuf_num_max = (mbuf_limit >> MCLSHIFT);
cfil_udp_gc_mbuf_cnt_max = mbuf_limit;
+
+ memset(&global_cfil_stats_report_buffers, 0, sizeof(global_cfil_stats_report_buffers));
}
struct cfil_info *
cfil_sock_attached_count--;
TAILQ_REMOVE(&cfil_sock_head, cfil_info, cfi_link);
+ // Turn off stats reporting for cfil_info.
+ cfil_info_stats_toggle(cfil_info, NULL, 0);
+
out_drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
in_drain += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
struct in6_addr *ip6, u_int16_t port)
{
+ if (sin46 == NULL) {
+ return;
+ }
+
struct sockaddr_in6 *sin6 = &sin46->sin6;
sin6->sin6_family = AF_INET6;
fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
struct in_addr ip, u_int16_t port)
{
+ if (sin46 == NULL) {
+ return;
+ }
+
struct sockaddr_in *sin = &sin46->sin;
sin->sin_family = AF_INET;
struct cfil_hash_entry *hash_entry;
struct cfil_db *db;
struct socket *so;
- u_int32_t current_time = 0;
+ u_int64_t current_time = 0;
current_time = net_uptime();
}
return NULL;
}
+
+static int
+cfil_dispatch_stats_event_locked(int kcunit, struct cfil_stats_report_buffer *buffer, uint32_t stats_count)
+{
+ struct content_filter *cfc = NULL;
+ errno_t error = 0;
+ size_t msgsize = 0;
+
+ if (buffer == NULL || stats_count == 0) {
+ return error;
+ }
+
+ if (content_filters == NULL || kcunit > MAX_CONTENT_FILTER) {
+ return error;
+ }
+
+ cfc = content_filters[kcunit - 1];
+ if (cfc == NULL) {
+ return error;
+ }
+
+ /* Would be wasteful to try */
+ if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
+ error = ENOBUFS;
+ goto done;
+ }
+
+ msgsize = sizeof(struct cfil_msg_stats_report) + (sizeof(struct cfil_msg_sock_stats) * stats_count);
+ buffer->msghdr.cfm_len = msgsize;
+ buffer->msghdr.cfm_version = 1;
+ buffer->msghdr.cfm_type = CFM_TYPE_EVENT;
+ buffer->msghdr.cfm_op = CFM_OP_STATS;
+ buffer->msghdr.cfm_sock_id = 0;
+ buffer->count = stats_count;
+
+#if STATS_DEBUG
+ CFIL_LOG(LOG_ERR, "STATS (kcunit %d): msg size %lu - %lu %lu %lu",
+ kcunit,
+ (unsigned long)msgsize,
+ (unsigned long)sizeof(struct cfil_msg_stats_report),
+ (unsigned long)sizeof(struct cfil_msg_sock_stats),
+ (unsigned long)stats_count);
+#endif
+
+ error = ctl_enqueuedata(cfc->cf_kcref, cfc->cf_kcunit,
+ buffer,
+ msgsize,
+ CTL_DATA_EOR);
+ if (error != 0) {
+ CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error);
+ goto done;
+ }
+ OSIncrementAtomic(&cfil_stats.cfs_stats_event_ok);
+
+#if STATS_DEBUG
+ CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT: send msg to %d", kcunit);
+#endif
+
+done:
+
+ if (error == ENOBUFS) {
+ OSIncrementAtomic(
+ &cfil_stats.cfs_stats_event_flow_control);
+
+ if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
+ cfil_rw_lock_exclusive(&cfil_lck_rw);
+ }
+
+ cfc->cf_flags |= CFF_FLOW_CONTROLLED;
+
+ cfil_rw_unlock_exclusive(&cfil_lck_rw);
+ } else if (error != 0) {
+ OSIncrementAtomic(&cfil_stats.cfs_stats_event_fail);
+ }
+
+ return error;
+}
+
+static void
+cfil_stats_report_thread_sleep(bool forever)
+{
+#if STATS_DEBUG
+ CFIL_LOG(LOG_ERR, "CFIL: STATS COLLECTION SLEEP");
+#endif
+
+ if (forever) {
+ (void) assert_wait((event_t) &cfil_sock_attached_stats_count,
+ THREAD_INTERRUPTIBLE);
+ } else {
+ uint64_t deadline = 0;
+ nanoseconds_to_absolutetime(CFIL_STATS_REPORT_RUN_INTERVAL_NSEC, &deadline);
+ clock_absolutetime_interval_to_deadline(deadline, &deadline);
+
+ (void) assert_wait_deadline(&cfil_sock_attached_stats_count,
+ THREAD_INTERRUPTIBLE, deadline);
+ }
+}
+
+static void
+cfil_stats_report_thread_func(void *v, wait_result_t w)
+{
+#pragma unused(v, w)
+
+ ASSERT(cfil_stats_report_thread == current_thread());
+ thread_set_thread_name(current_thread(), "CFIL_STATS_REPORT");
+
+ // Kick off gc shortly
+ cfil_stats_report_thread_sleep(false);
+ thread_block_parameter((thread_continue_t) cfil_stats_report, NULL);
+ /* NOTREACHED */
+}
+
+static bool
+cfil_stats_collect_flow_stats_for_filter(int kcunit,
+ struct cfil_info *cfil_info,
+ struct cfil_entry *entry,
+ struct timeval current_tv)
+{
+ struct cfil_stats_report_buffer *buffer = NULL;
+ struct cfil_msg_sock_stats *flow_array = NULL;
+ struct cfil_msg_sock_stats *stats = NULL;
+ struct inpcb *inp = NULL;
+ struct timeval diff_time;
+ uint64_t diff_time_usecs;
+ int index = 0;
+
+ if (entry->cfe_stats_report_frequency == 0) {
+ return false;
+ }
+
+ buffer = global_cfil_stats_report_buffers[kcunit - 1];
+ if (buffer == NULL) {
+#if STATS_DEBUG
+ CFIL_LOG(LOG_ERR, "CFIL: STATS: no buffer");
+#endif
+ return false;
+ }
+
+ timersub(¤t_tv, &entry->cfe_stats_report_ts, &diff_time);
+ diff_time_usecs = (diff_time.tv_sec * USEC_PER_SEC) + diff_time.tv_usec;
+
+#if STATS_DEBUG
+ CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - elapsed time - ts %llu %llu cur ts %llu %llu diff %llu %llu (usecs %llu) @freq %llu usecs sockID %llu",
+ (unsigned long long)entry->cfe_stats_report_ts.tv_sec,
+ (unsigned long long)entry->cfe_stats_report_ts.tv_usec,
+ (unsigned long long)current_tv.tv_sec,
+ (unsigned long long)current_tv.tv_usec,
+ (unsigned long long)diff_time.tv_sec,
+ (unsigned long long)diff_time.tv_usec,
+ (unsigned long long)diff_time_usecs,
+ (unsigned long long)((entry->cfe_stats_report_frequency * NSEC_PER_MSEC) / NSEC_PER_USEC),
+ cfil_info->cfi_sock_id);
+#endif
+
+ // Compare elapsed time in usecs
+ if (diff_time_usecs >= (entry->cfe_stats_report_frequency * NSEC_PER_MSEC) / NSEC_PER_USEC) {
+#if STATS_DEBUG
+ CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - in %llu reported %llu",
+ cfil_info->cfi_byte_inbound_count,
+ entry->cfe_byte_inbound_count_reported);
+ CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - out %llu reported %llu",
+ cfil_info->cfi_byte_outbound_count,
+ entry->cfe_byte_outbound_count_reported);
+#endif
+ // Check if flow has new bytes that have not been reported
+ if (entry->cfe_byte_inbound_count_reported < cfil_info->cfi_byte_inbound_count ||
+ entry->cfe_byte_outbound_count_reported < cfil_info->cfi_byte_outbound_count) {
+ flow_array = (struct cfil_msg_sock_stats *)&buffer->stats;
+ index = global_cfil_stats_counts[kcunit - 1];
+
+ stats = &flow_array[index];
+ stats->cfs_sock_id = cfil_info->cfi_sock_id;
+ stats->cfs_byte_inbound_count = cfil_info->cfi_byte_inbound_count;
+ stats->cfs_byte_outbound_count = cfil_info->cfi_byte_outbound_count;
+
+ if (entry->cfe_laddr_sent == false) {
+ /* cache it if necessary */
+ if (cfil_info->cfi_so_attach_laddr.sa.sa_len == 0) {
+ inp = cfil_info->cfi_so ? sotoinpcb(cfil_info->cfi_so) : NULL;
+ if (inp != NULL) {
+ boolean_t outgoing = (cfil_info->cfi_dir == CFS_CONNECTION_DIR_OUT);
+ union sockaddr_in_4_6 *src = outgoing ? &cfil_info->cfi_so_attach_laddr : NULL;
+ union sockaddr_in_4_6 *dst = outgoing ? NULL : &cfil_info->cfi_so_attach_laddr;
+ cfil_fill_event_msg_addresses(cfil_info->cfi_hash_entry, inp,
+ src, dst, inp->inp_vflag & INP_IPV4, outgoing);
+ }
+ }
+
+ if (cfil_info->cfi_so_attach_laddr.sa.sa_len != 0) {
+ stats->cfs_laddr.sin6 = cfil_info->cfi_so_attach_laddr.sin6;
+ entry->cfe_laddr_sent = true;
+ }
+ }
+
+ global_cfil_stats_counts[kcunit - 1]++;
+
+ entry->cfe_stats_report_ts = current_tv;
+ entry->cfe_byte_inbound_count_reported = cfil_info->cfi_byte_inbound_count;
+ entry->cfe_byte_outbound_count_reported = cfil_info->cfi_byte_outbound_count;
+#if STATS_DEBUG
+ cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: STATS COLLECTED");
+#endif
+ CFI_ADD_TIME_LOG(cfil_info, ¤t_tv, &cfil_info->cfi_first_event, CFM_OP_STATS);
+ return true;
+ }
+ }
+ return false;
+}
+
+static void
+cfil_stats_report(void *v, wait_result_t w)
+{
+#pragma unused(v, w)
+
+ struct cfil_info *cfil_info = NULL;
+ struct cfil_entry *entry = NULL;
+ struct timeval current_tv;
+ uint32_t flow_count = 0;
+ uint64_t saved_next_sock_id = 0; // Next sock id to be reported for next loop
+ bool flow_reported = false;
+
+#if STATS_DEBUG
+ CFIL_LOG(LOG_ERR, "CFIL: STATS COLLECTION RUNNING");
+#endif
+
+ do {
+ // Collect all sock ids of flows that has new stats
+ cfil_rw_lock_shared(&cfil_lck_rw);
+
+ if (cfil_sock_attached_stats_count == 0) {
+#if STATS_DEBUG
+ CFIL_LOG(LOG_ERR, "CFIL: STATS: no flow");
+#endif
+ cfil_rw_unlock_shared(&cfil_lck_rw);
+ goto go_sleep;
+ }
+
+ for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
+ if (global_cfil_stats_report_buffers[kcunit - 1] != NULL) {
+ memset(global_cfil_stats_report_buffers[kcunit - 1], 0, sizeof(struct cfil_stats_report_buffer));
+ }
+ global_cfil_stats_counts[kcunit - 1] = 0;
+ }
+
+ microuptime(¤t_tv);
+ flow_count = 0;
+
+ TAILQ_FOREACH(cfil_info, &cfil_sock_head_stats, cfi_link_stats) {
+ if (saved_next_sock_id != 0 &&
+ saved_next_sock_id == cfil_info->cfi_sock_id) {
+ // Here is where we left off previously, start accumulating
+ saved_next_sock_id = 0;
+ }
+
+ if (saved_next_sock_id == 0) {
+ if (flow_count >= CFIL_STATS_REPORT_MAX_COUNT) {
+ // Examine a fixed number of flows each round. Remember the current flow
+ // so we can start from here for next loop
+ saved_next_sock_id = cfil_info->cfi_sock_id;
+ break;
+ }
+
+ flow_reported = false;
+ for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
+ entry = &cfil_info->cfi_entries[kcunit - 1];
+ if (entry->cfe_filter == NULL) {
+#if STATS_DEBUG
+ CFIL_LOG(LOG_NOTICE, "CFIL: STATS REPORT - so %llx no filter",
+ cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0);
+#endif
+ continue;
+ }
+
+ if ((entry->cfe_stats_report_frequency > 0) &&
+ cfil_stats_collect_flow_stats_for_filter(kcunit, cfil_info, entry, current_tv) == true) {
+ flow_reported = true;
+ }
+ }
+ if (flow_reported == true) {
+ flow_count++;
+ }
+ }
+ }
+
+ if (flow_count > 0) {
+#if STATS_DEBUG
+ CFIL_LOG(LOG_ERR, "CFIL: STATS reporting for %d flows", flow_count);
+#endif
+ for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
+ if (global_cfil_stats_report_buffers[kcunit - 1] != NULL &&
+ global_cfil_stats_counts[kcunit - 1] > 0) {
+ cfil_dispatch_stats_event_locked(kcunit,
+ global_cfil_stats_report_buffers[kcunit - 1],
+ global_cfil_stats_counts[kcunit - 1]);
+ }
+ }
+ } else {
+ cfil_rw_unlock_shared(&cfil_lck_rw);
+ goto go_sleep;
+ }
+
+ cfil_rw_unlock_shared(&cfil_lck_rw);
+
+ // Loop again if we haven't finished the whole cfil_info list
+ } while (saved_next_sock_id != 0);
+
+go_sleep:
+
+ // Sleep forever (until waken up) if no more flow to report
+ cfil_rw_lock_shared(&cfil_lck_rw);
+ cfil_stats_report_thread_sleep(cfil_sock_attached_stats_count == 0 ? true : false);
+ cfil_rw_unlock_shared(&cfil_lck_rw);
+ thread_block_parameter((thread_continue_t) cfil_stats_report, NULL);
+ /* NOTREACHED */
+}
#define CFM_OP_DATA_IN 4 /* data being received */
#define CFM_OP_DISCONNECT_OUT 5 /* no more outgoing data */
#define CFM_OP_DISCONNECT_IN 6 /* no more incoming data */
+#define CFM_OP_STATS 7 /* periodic stats report(s) */
/*
* Operations associated with action from filter to kernel
uint32_t cfc_signature_length;
} __attribute__((aligned(8)));
+/*
+ * struct cfil_msg_stats_report
+ *
+ * Statistics report for flow(s).
+ *
+ * Action: No reply is expected.
+ *
+ * Valid Types: CFM_TYPE_EVENT
+ *
+ * Valid Op: CFM_OP_STATS
+ */
+struct cfil_msg_sock_stats {
+ cfil_sock_id_t cfs_sock_id;
+ uint64_t cfs_byte_inbound_count;
+ uint64_t cfs_byte_outbound_count;
+ union sockaddr_in_4_6 cfs_laddr;
+} __attribute__((aligned(8)));
+
+struct cfil_msg_stats_report {
+ struct cfil_msg_hdr cfr_msghdr;
+ uint32_t cfr_count;
+ struct cfil_msg_sock_stats cfr_stats[];
+} __attribute__((aligned(8)));
+
/*
* struct cfil_msg_action
*
uint64_t cfa_in_peek_offset;
uint64_t cfa_out_pass_offset;
uint64_t cfa_out_peek_offset;
+ uint32_t cfa_stats_frequency; // Statistics frequency in milliseconds
};
/*
int32_t cfs_data_event_flow_control;
int32_t cfs_data_event_fail;
+ int32_t cfs_stats_event_ok;
+ int32_t cfs_stats_event_flow_control;
+ int32_t cfs_stats_event_fail;
+
int32_t cfs_disconnect_in_event_ok;
int32_t cfs_disconnect_out_event_ok;
int32_t cfs_disconnect_event_flow_control;
lck_mtx_lock_spin(&inp->input_lck);
VERIFY(inp->input_waiting & DLIL_INPUT_RUNNING);
- if (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
+ if (!(inp->input_waiting & ~(DLIL_INPUT_RUNNING |
+ DLIL_INPUT_TERMINATE))) {
break;
}
}
lck_mtx_lock_spin(&inp->input_lck);
VERIFY(inp->input_waiting & DLIL_INPUT_RUNNING);
- if (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
+ if (!(inp->input_waiting & ~(DLIL_INPUT_RUNNING |
+ DLIL_INPUT_TERMINATE))) {
break;
}
}
#define BR_DBGF_MCAST 0x0080
#define BR_DBGF_HOSTFILTER 0x0100
#define BR_DBGF_CHECKSUM 0x0200
+#define BR_DBGF_MAC_NAT 0x0400
#endif /* BRIDGE_DEBUG */
#define _BRIDGE_LOCK(_sc) lck_mtx_lock(&(_sc)->sc_mtx)
#define BRIDGE_UNREF(_sc) bridge_unref(_sc)
#define BRIDGE_XLOCK(_sc) bridge_xlock(_sc)
#define BRIDGE_XDROP(_sc) bridge_xdrop(_sc)
+#define IF_BRIDGE_DEBUG(f) bridge_debug_flag_is_set(f)
#else /* !BRIDGE_DEBUG */
(_sc)->sc_iflist_xcnt--; \
} while (0)
+#define IF_BRIDGE_DEBUG(f) FALSE
+
#endif /* BRIDGE_DEBUG */
#if NBPFILTER > 0
#define BRIDGE_BPF_MTAP_INPUT(sc, m) \
- if (sc->sc_bpf_input) \
- bridge_bpf_input(sc->sc_ifp, m)
+ if (sc->sc_bpf_input != NULL) \
+ bridge_bpf_input(sc->sc_ifp, m, __func__, __LINE__)
#else /* NBPFILTER */
#define BRIDGE_BPF_MTAP_INPUT(ifp, m)
#endif /* NBPFILTER */
#define BRIDGE_RTABLE_PRUNE_PERIOD (5 * 60)
#endif
+/*
+ * Number of MAC NAT entries
+ * - sized based on 16 clients (including MAC NAT interface)
+ * each with 4 addresses
+ */
+#ifndef BRIDGE_MAC_NAT_ENTRY_MAX
+#define BRIDGE_MAC_NAT_ENTRY_MAX 64
+#endif /* BRIDGE_MAC_NAT_ENTRY_MAX */
+
/*
* List of capabilities to possibly mask on the member interface.
*/
#define BIFF_HF_IPSRC 0x40 /* host filter source IP is set */
#define BIFF_INPUT_BROADCAST 0x80 /* send broadcast packets in */
+/*
+ * mac_nat_entry
+ * - translates between an IP address and MAC address on a specific
+ * bridge interface member
+ */
+struct mac_nat_entry {
+ LIST_ENTRY(mac_nat_entry) mne_list; /* list linkage */
+ struct bridge_iflist *mne_bif; /* originating interface */
+ unsigned long mne_expire; /* expiration time */
+ union {
+ struct in_addr mneu_ip; /* originating IPv4 address */
+ struct in6_addr mneu_ip6; /* originating IPv6 address */
+ } mne_u;
+ uint8_t mne_mac[ETHER_ADDR_LEN];
+ uint8_t mne_flags;
+ uint8_t mne_reserved;
+};
+#define mne_ip mne_u.mneu_ip
+#define mne_ip6 mne_u.mneu_ip6
+
+#define MNE_FLAGS_IPV6 0x01 /* IPv6 address */
+
+LIST_HEAD(mac_nat_entry_list, mac_nat_entry);
+
+/*
+ * mac_nat_record
+ * - used by bridge_mac_nat_output() to convey the translation that needs
+ * to take place in bridge_mac_nat_translate
+ * - holds enough information so that the translation can be done later without
+ * holding the bridge lock
+ */
+struct mac_nat_record {
+ uint16_t mnr_ether_type;
+ union {
+ uint16_t mnru_arp_offset;
+ struct {
+ uint16_t mnruip_dhcp_flags;
+ uint16_t mnruip_udp_csum;
+ uint8_t mnruip_header_len;
+ } mnru_ip;
+ struct {
+ uint16_t mnruip6_icmp6_len;
+ uint16_t mnruip6_lladdr_offset;
+ uint8_t mnruip6_icmp6_type;
+ uint8_t mnruip6_header_len;
+ } mnru_ip6;
+ } mnr_u;
+};
+
+#define mnr_arp_offset mnr_u.mnru_arp_offset
+
+#define mnr_ip_header_len mnr_u.mnru_ip.mnruip_header_len
+#define mnr_ip_dhcp_flags mnr_u.mnru_ip.mnruip_dhcp_flags
+#define mnr_ip_udp_csum mnr_u.mnru_ip.mnruip_udp_csum
+
+#define mnr_ip6_icmp6_len mnr_u.mnru_ip6.mnruip6_icmp6_len
+#define mnr_ip6_icmp6_type mnr_u.mnru_ip6.mnruip6_icmp6_type
+#define mnr_ip6_header_len mnr_u.mnru_ip6.mnruip6_header_len
+#define mnr_ip6_lladdr_offset mnr_u.mnru_ip6.mnruip6_lladdr_offset
+
/*
* Bridge route node.
*/
u_char sc_defaddr[6]; /* Default MAC address */
char sc_if_xname[IFNAMSIZ];
+ struct bridge_iflist *sc_mac_nat_bif; /* single MAC NAT interface */
+ struct mac_nat_entry_list sc_mne_list; /* MAC NAT IPv4 */
+ struct mac_nat_entry_list sc_mne_list_v6;/* MAC NAT IPv6 */
+ uint32_t sc_mne_max; /* max # of entries */
+ uint32_t sc_mne_count; /* cur. # of entries */
+ uint32_t sc_mne_allocation_failures;
#if BRIDGE_DEBUG
/*
* Locking and unlocking calling history
#endif /* BRIDGE_DEBUG */
};
-#define SCF_DETACHING 0x01
-#define SCF_RESIZING 0x02
-#define SCF_MEDIA_ACTIVE 0x04
+#define SCF_DETACHING 0x01
+#define SCF_RESIZING 0x02
+#define SCF_MEDIA_ACTIVE 0x04
typedef enum {
kChecksumOperationNone = 0,
static int bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
static zone_t bridge_rtnode_pool = NULL;
+static zone_t bridge_mne_pool = NULL;
static int bridge_clone_create(struct if_clone *, uint32_t, void *);
static int bridge_clone_destroy(struct ifnet *);
static int bridge_output(struct ifnet *, struct mbuf *);
static void bridge_finalize_cksum(struct ifnet *, struct mbuf *);
static void bridge_start(struct ifnet *);
-__private_extern__ errno_t bridge_input(struct ifnet *, struct mbuf *, void *);
+static errno_t bridge_input(struct ifnet *, mbuf_t *);
+static errno_t bridge_iff_input(void *, ifnet_t, protocol_family_t,
+ mbuf_t *, char **);
static errno_t bridge_iff_output(void *, ifnet_t, protocol_family_t,
mbuf_t *);
static errno_t bridge_member_output(struct bridge_softc *sc, ifnet_t ifp,
- mbuf_t m);
+ mbuf_t *m);
-static int bridge_enqueue(struct bridge_softc *, struct ifnet *,
+static int bridge_enqueue(ifnet_t, struct ifnet *,
struct ifnet *, struct mbuf *, ChecksumOperation);
static void bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
static int bridge_ioctl_sfilt(struct bridge_softc *, void *);
static int bridge_ioctl_ghostfilter(struct bridge_softc *, void *);
static int bridge_ioctl_shostfilter(struct bridge_softc *, void *);
+static int bridge_ioctl_gmnelist32(struct bridge_softc *, void *);
+static int bridge_ioctl_gmnelist64(struct bridge_softc *, void *);
#ifdef PFIL_HOOKS
static int bridge_pfil(struct mbuf **, struct ifnet *, struct ifnet *,
int);
-static int bridge_ip_checkbasic(struct mbuf **);
-#ifdef INET6
-static int bridge_ip6_checkbasic(struct mbuf **);
-#endif /* INET6 */
static int bridge_fragment(struct ifnet *, struct mbuf *,
struct ether_header *, int, struct llc *);
#endif /* PFIL_HOOKS */
+static int bridge_ip_checkbasic(struct mbuf **);
+#ifdef INET6
+static int bridge_ip6_checkbasic(struct mbuf **);
+#endif /* INET6 */
+
+static int bridge_pf(struct mbuf **, struct ifnet *, uint32_t sc_filter_flags, int input);
static errno_t bridge_set_bpf_tap(ifnet_t, bpf_tap_mode, bpf_packet_func);
-__private_extern__ errno_t bridge_bpf_input(ifnet_t, struct mbuf *);
-__private_extern__ errno_t bridge_bpf_output(ifnet_t, struct mbuf *);
+static errno_t bridge_bpf_input(ifnet_t, struct mbuf *, const char *, int);
+static errno_t bridge_bpf_output(ifnet_t, struct mbuf *);
static void bridge_detach(ifnet_t);
static void bridge_link_event(struct ifnet *, u_int32_t);
static void bridge_schedule_delayed_call(struct bridge_delayed_call *);
static void bridge_cancel_delayed_call(struct bridge_delayed_call *);
static void bridge_cleanup_delayed_call(struct bridge_delayed_call *);
-static int bridge_host_filter(struct bridge_iflist *, struct mbuf *);
+static int bridge_host_filter(struct bridge_iflist *, mbuf_t *);
+
+static errno_t bridge_mac_nat_enable(struct bridge_softc *,
+ struct bridge_iflist *);
+static void bridge_mac_nat_disable(struct bridge_softc *sc);
+static void bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long);
+static void bridge_mac_nat_populate_entries(struct bridge_softc *sc);
+static void bridge_mac_nat_flush_entries(struct bridge_softc *sc,
+ struct bridge_iflist *);
+static ifnet_t bridge_mac_nat_input(struct bridge_softc *, mbuf_t *,
+ boolean_t *);
+static boolean_t bridge_mac_nat_output(struct bridge_softc *,
+ struct bridge_iflist *, mbuf_t *, struct mac_nat_record *);
+static void bridge_mac_nat_translate(mbuf_t *, struct mac_nat_record *,
+ const caddr_t);
#define m_copypacket(m, how) m_copym(m, 0, M_COPYALL, how)
.bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
{ .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
.bc_flags = BC_F_COPYIN | BC_F_SUSER },
+
+ { .bc_func = bridge_ioctl_gmnelist32, .bc_argsize = sizeof(struct ifbrmnelist32),
+ .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
};
static const struct bridge_control bridge_control_table64[] = {
.bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
{ .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
.bc_flags = BC_F_COPYIN | BC_F_SUSER },
+
+ { .bc_func = bridge_ioctl_gmnelist64, .bc_argsize = sizeof(struct ifbrmnelist64),
+ .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
};
static const unsigned int bridge_control_table_size =
printf("\n");
}
+static boolean_t
+bridge_debug_flag_is_set(uint32_t flag)
+{
+ return (if_bridge_debug & flag) != 0;
+}
+
#endif /* BRIDGE_DEBUG */
/*
1024 * sizeof(struct bridge_rtnode), 0, "bridge_rtnode");
zone_change(bridge_rtnode_pool, Z_CALLERACCT, FALSE);
+ bridge_mne_pool = zinit(sizeof(struct mac_nat_entry),
+ 256 * sizeof(struct mac_nat_entry), 0, "bridge_mac_nat_entry");
+ zone_change(bridge_mne_pool, Z_CALLERACCT, FALSE);
+
lck_grp_attr = lck_grp_attr_alloc_init();
bridge_lock_grp = lck_grp_alloc_init("if_bridge", lck_grp_attr);
lck_mtx_init(&sc->sc_mtx, bridge_lock_grp, bridge_lock_attr);
sc->sc_brtmax = BRIDGE_RTABLE_MAX;
+ sc->sc_mne_max = BRIDGE_MAC_NAT_ENTRY_MAX;
sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
- sc->sc_filter_flags = IFBF_FILT_DEFAULT;
-#ifndef BRIDGE_IPF
- /*
- * For backwards compatibility with previous behaviour...
- * Switch off filtering on the bridge itself if BRIDGE_IPF is
- * not defined.
- */
- sc->sc_filter_flags &= ~IFBF_FILT_USEIPF;
-#endif
+ sc->sc_filter_flags = 0;
TAILQ_INIT(&sc->sc_iflist);
__func__, error);
goto done;
}
+ LIST_INIT(&sc->sc_mne_list);
+ LIST_INIT(&sc->sc_mne_list_v6);
sc->sc_ifp = ifp;
error = bridge_ifnet_set_attrs(ifp);
if (error != 0) {
sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
#if BRIDGE_DEBUG
- if (if_bridge_debug & BR_DBGF_LIFECYCLE) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_LIFECYCLE)) {
link_print(sc);
}
#endif
BRIDGE_LOCK_ASSERT_NOTHELD(sc);
#if BRIDGE_DEBUG
- if (if_bridge_debug & BR_DBGF_IOCTL) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_IOCTL)) {
printf("%s: ifp %s cmd 0x%08lx (%c%c [%lu] %c %lu)\n",
__func__, ifp->if_xname, cmd, (cmd & IOC_IN) ? 'I' : ' ',
(cmd & IOC_OUT) ? 'O' : ' ', IOCPARM_LEN(cmd),
error = ifnet_set_offload(sc->sc_ifp, offload);
if (error != 0) {
#if BRIDGE_DEBUG
- if (if_bridge_debug & BR_DBGF_LIFECYCLE) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_LIFECYCLE)) {
printf("%s: ifnet_set_offload(%s, 0x%x) "
"failed %d\n", __func__,
sc->sc_ifp->if_xname, offload, error);
tso_v4_mtu);
if (error != 0) {
#if BRIDGE_DEBUG
- if (if_bridge_debug & BR_DBGF_LIFECYCLE) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_LIFECYCLE)) {
printf("%s: ifnet_set_tso_mtu(%s, "
"AF_INET, %u) failed %d\n",
__func__, sc->sc_ifp->if_xname,
tso_v6_mtu);
if (error != 0) {
#if BRIDGE_DEBUG
- if (if_bridge_debug & BR_DBGF_LIFECYCLE) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_LIFECYCLE)) {
printf("%s: ifnet_set_tso_mtu(%s, "
"AF_INET6, %u) failed %d\n",
__func__, sc->sc_ifp->if_xname,
frmlen = (char *)mbuf_data(m) - *frame_ptr;
}
#if BRIDGE_DEBUG
- if (if_bridge_debug & BR_DBGF_INPUT) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_INPUT)) {
printf("%s: %s from %s m 0x%llx data 0x%llx frame 0x%llx %s "
"frmlen %lu\n", __func__, sc->sc_ifp->if_xname,
ifp->if_xname, (uint64_t)VM_KERNEL_ADDRPERM(m),
(uint64_t)VM_KERNEL_ADDRPERM(*frame_ptr),
included ? "inside" : "outside", frmlen);
- if (if_bridge_debug & BR_DBGF_MBUF) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_MBUF)) {
printf_mbuf(m, "bridge_iff_input[", "\n");
printf_ether_header((struct ether_header *)
(void *)*frame_ptr);
}
}
#endif /* BRIDGE_DEBUG */
+ if (included == 0) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_INPUT)) {
+ printf("%s: frame_ptr outside mbuf\n", __func__);
+ }
+ goto out;
+ }
/* Move data pointer to start of frame to the link layer header */
- if (included) {
- (void) mbuf_setdata(m, (char *)mbuf_data(m) - frmlen,
- mbuf_len(m) + frmlen);
- (void) mbuf_pkthdr_adjustlen(m, frmlen);
- } else {
- printf("%s: frame_ptr outside mbuf\n", __func__);
+ (void) mbuf_setdata(m, (char *)mbuf_data(m) - frmlen,
+ mbuf_len(m) + frmlen);
+ (void) mbuf_pkthdr_adjustlen(m, frmlen);
+
+ /* make sure we can access the ethernet header */
+ if (mbuf_pkthdr_len(m) < sizeof(struct ether_header)) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_INPUT)) {
+ printf("%s: short frame %lu < %lu\n", __func__,
+ mbuf_pkthdr_len(m), sizeof(struct ether_header));
+ }
goto out;
}
+ if (mbuf_len(m) < sizeof(struct ether_header)) {
+ error = mbuf_pullup(data, sizeof(struct ether_header));
+ if (error != 0) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_INPUT)) {
+ printf("%s: mbuf_pullup(%lu) failed %d\n",
+ __func__, sizeof(struct ether_header),
+ error);
+ }
+ error = EJUSTRETURN;
+ goto out;
+ }
+ if (m != *data) {
+ m = *data;
+ *frame_ptr = mbuf_data(m);
+ }
+ }
- error = bridge_input(ifp, m, *frame_ptr);
+ error = bridge_input(ifp, data);
/* Adjust packet back to original */
if (error == 0) {
+ /* bridge_input might have modified *data */
+ if (*data != m) {
+ m = *data;
+ *frame_ptr = mbuf_data(m);
+ }
(void) mbuf_setdata(m, (char *)mbuf_data(m) + frmlen,
mbuf_len(m) - frmlen);
(void) mbuf_pkthdr_adjustlen(m, -frmlen);
}
#if BRIDGE_DEBUG
- if ((if_bridge_debug & BR_DBGF_INPUT) &&
- (if_bridge_debug & BR_DBGF_MBUF)) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_INPUT) &&
+ IF_BRIDGE_DEBUG(BR_DBGF_MBUF)) {
printf("\n");
printf_mbuf(m, "bridge_iff_input]", "\n");
}
}
#if BRIDGE_DEBUG
- if (if_bridge_debug & BR_DBGF_OUTPUT) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_OUTPUT)) {
printf("%s: %s from %s m 0x%llx data 0x%llx\n", __func__,
sc->sc_ifp->if_xname, ifp->if_xname,
(uint64_t)VM_KERNEL_ADDRPERM(m),
}
#endif /* BRIDGE_DEBUG */
- error = bridge_member_output(sc, ifp, m);
+ error = bridge_member_output(sc, ifp, data);
if (error != 0 && error != EJUSTRETURN) {
printf("%s: bridge_member_output failed error %d\n", __func__,
error);
event_msg->kev_class == KEV_NETWORK_CLASS &&
event_msg->kev_subclass == KEV_DL_SUBCLASS) {
#if BRIDGE_DEBUG
- if (if_bridge_debug & BR_DBGF_LIFECYCLE) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_LIFECYCLE)) {
printf("%s: %s event_code %u - %s\n", __func__,
ifp->if_xname, event_msg->event_code,
dlil_kev_dl_code_str(event_msg->event_code));
struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
#if BRIDGE_DEBUG
- if (if_bridge_debug & BR_DBGF_LIFECYCLE) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_LIFECYCLE)) {
printf("%s: %s\n", __func__, ifp->if_xname);
}
#endif /* BRIDGE_DEBUG */
struct ifnet_attach_proto_param reg;
#if BRIDGE_DEBUG
- if (if_bridge_debug & BR_DBGF_LIFECYCLE) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_LIFECYCLE)) {
printf("%s: %s\n", __func__, ifp->if_xname);
}
#endif /* BRIDGE_DEBUG */
int error;
#if BRIDGE_DEBUG
- if (if_bridge_debug & BR_DBGF_LIFECYCLE) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_LIFECYCLE)) {
printf("%s: %s\n", __func__, ifp->if_xname);
}
#endif /* BRIDGE_DEBUG */
VERIFY(ifs != NULL);
/*
- * First, remove the member from the list first so it cannot be found anymore
+ * Remove the member from the list first so it cannot be found anymore
* when we release the bridge lock below
*/
BRIDGE_XLOCK(sc);
TAILQ_REMOVE(&sc->sc_iflist, bif, bif_next);
BRIDGE_XDROP(sc);
+ if (sc->sc_mac_nat_bif != NULL) {
+ if (bif == sc->sc_mac_nat_bif) {
+ bridge_mac_nat_disable(sc);
+ } else {
+ bridge_mac_nat_flush_entries(sc, bif);
+ }
+ }
+
if (!gone) {
switch (ifs->if_type) {
case IFT_ETHER:
uint8_t eaddr[ETHER_ADDR_LEN];
struct iff_filter iff;
u_int32_t event_code = 0;
+ boolean_t mac_nat = FALSE;
ifs = ifunit(req->ifbr_ifsname);
if (ifs == NULL) {
}
/* If it's in the span list, it can't be a member. */
- TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
- if (ifs == bif->bif_ifp) {
- return EBUSY;
+ TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
+ if (ifs == bif->bif_ifp) {
+ return EBUSY;
+ }
}
if (ifs->if_bridge == sc) {
switch (ifs->if_type) {
case IFT_ETHER:
+ if (strcmp(ifs->if_name, "en") == 0 &&
+ ifs->if_subfamily == IFNET_SUBFAMILY_WIFI) {
+ /* XXX is there a better way to identify Wi-Fi STA? */
+ mac_nat = TRUE;
+ }
case IFT_L2VLAN:
/* permitted interface types */
break;
return EINVAL;
}
+ /* fail to add the interface if the MTU doesn't match */
+ if (!TAILQ_EMPTY(&sc->sc_iflist) && sc->sc_ifp->if_mtu != ifs->if_mtu) {
+ printf("%s: %s: invalid MTU for %s", __func__,
+ sc->sc_ifp->if_xname,
+ ifs->if_xname);
+ return EINVAL;
+ }
+
+ /* there's already an interface that's doing MAC NAT */
+ if (mac_nat && sc->sc_mac_nat_bif != NULL) {
+ return EBUSY;
+ }
bif = _MALLOC(sizeof(*bif), M_DEVBUF, M_WAITOK | M_ZERO);
if (bif == NULL) {
return ENOMEM;
}
-
bif->bif_ifp = ifs;
ifnet_reference(ifs);
- bif->bif_ifflags = IFBIF_LEARNING | IFBIF_DISCOVER;
+ bif->bif_ifflags |= IFBIF_LEARNING | IFBIF_DISCOVER;
#if HAS_IF_CAP
bif->bif_savedcaps = ifs->if_capenable;
#endif /* HAS_IF_CAP */
bif->bif_sc = sc;
+ if (mac_nat) {
+ (void)bridge_mac_nat_enable(sc, bif);
+ }
/* Allow the first Ethernet member to define the MTU */
if (TAILQ_EMPTY(&sc->sc_iflist)) {
sc->sc_ifp->if_mtu = ifs->if_mtu;
- } else if (sc->sc_ifp->if_mtu != ifs->if_mtu) {
- printf("%s: %s: invalid MTU for %s", __func__,
- sc->sc_ifp->if_xname,
- ifs->if_xname);
- return EINVAL;
}
/*
req->ifbr_proto = bp->bp_protover;
req->ifbr_role = bp->bp_role;
req->ifbr_stpflags = bp->bp_flags;
+ req->ifbr_ifsflags = bif->bif_ifflags;
+
/* Copy STP state options as flags */
if (bp->bp_operedge) {
req->ifbr_ifsflags |= IFBIF_BSTP_EDGE;
req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST;
}
- req->ifbr_ifsflags = bif->bif_ifflags;
req->ifbr_portno = bif->bif_ifp->if_index & 0xfff;
req->ifbr_addrcnt = bif->bif_addrcnt;
req->ifbr_addrmax = bif->bif_addrmax;
/* SPAN is readonly */
return EINVAL;
}
+ if ((req->ifbr_ifsflags & IFBIF_MAC_NAT) != 0) {
+ errno_t error;
+ error = bridge_mac_nat_enable(sc, bif);
+ if (error != 0) {
+ return error;
+ }
+ } else if (sc->sc_mac_nat_bif != NULL) {
+ bridge_mac_nat_disable(sc);
+ }
#if BRIDGESTP
return EINVAL;
}
-#ifndef BRIDGE_IPF
if (param->ifbrp_filter & IFBF_FILT_USEIPF) {
return EINVAL;
}
-#endif
sc->sc_filter_flags = param->ifbrp_filter;
return 0;
}
+static char *
+bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,
+ unsigned int * count_p, char *buf, unsigned int *len_p)
+{
+ unsigned int count = *count_p;
+ struct ifbrmne ifbmne;
+ unsigned int len = *len_p;
+ struct mac_nat_entry *mne;
+ unsigned long now;
+
+ bzero(&ifbmne, sizeof(ifbmne));
+ LIST_FOREACH(mne, list, mne_list) {
+ if (len < sizeof(ifbmne)) {
+ break;
+ }
+ snprintf(ifbmne.ifbmne_ifname, sizeof(ifbmne.ifbmne_ifname),
+ "%s", mne->mne_bif->bif_ifp->if_xname);
+ memcpy(ifbmne.ifbmne_mac, mne->mne_mac,
+ sizeof(ifbmne.ifbmne_mac));
+ now = (unsigned long) net_uptime();
+ if (now < mne->mne_expire) {
+ ifbmne.ifbmne_expire = mne->mne_expire - now;
+ } else {
+ ifbmne.ifbmne_expire = 0;
+ }
+ if ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) {
+ ifbmne.ifbmne_af = AF_INET6;
+ ifbmne.ifbmne_ip6_addr = mne->mne_ip6;
+ } else {
+ ifbmne.ifbmne_af = AF_INET;
+ ifbmne.ifbmne_ip_addr = mne->mne_ip;
+ }
+ memcpy(buf, &ifbmne, sizeof(ifbmne));
+ count++;
+ buf += sizeof(ifbmne);
+ len -= sizeof(ifbmne);
+ }
+ *count_p = count;
+ *len_p = len;
+ return buf;
+}
+
+/*
+ * bridge_ioctl_gmnelist()
+ * Perform the get mac_nat_entry list ioctl.
+ *
+ * Note:
+ * The struct ifbrmnelist32 and struct ifbrmnelist64 have the same
+ * field size/layout except for the last field ifbml_buf, the user-supplied
+ * buffer pointer. That is passed in separately via the 'user_addr'
+ * parameter from the respective 32-bit or 64-bit ioctl routine.
+ */
+static int
+bridge_ioctl_gmnelist(struct bridge_softc *sc, struct ifbrmnelist32 *mnl,
+ user_addr_t user_addr)
+{
+ unsigned int count;
+ char *buf;
+ int error = 0;
+ char *outbuf = NULL;
+ struct mac_nat_entry *mne;
+ unsigned int buflen;
+ unsigned int len;
+
+ mnl->ifbml_elsize = sizeof(struct ifbrmne);
+ count = 0;
+ LIST_FOREACH(mne, &sc->sc_mne_list, mne_list)
+ count++;
+ LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list)
+ count++;
+ buflen = sizeof(struct ifbrmne) * count;
+ if (buflen == 0 || mnl->ifbml_len == 0) {
+ mnl->ifbml_len = buflen;
+ return error;
+ }
+ BRIDGE_UNLOCK(sc);
+ outbuf = _MALLOC(buflen, M_TEMP, M_WAITOK | M_ZERO);
+ BRIDGE_LOCK(sc);
+ count = 0;
+ buf = outbuf;
+ len = min(mnl->ifbml_len, buflen);
+ buf = bridge_mac_nat_entry_out(&sc->sc_mne_list, &count, buf, &len);
+ buf = bridge_mac_nat_entry_out(&sc->sc_mne_list_v6, &count, buf, &len);
+ mnl->ifbml_len = count * sizeof(struct ifbrmne);
+ BRIDGE_UNLOCK(sc);
+ error = copyout(outbuf, user_addr, mnl->ifbml_len);
+ _FREE(outbuf, M_TEMP);
+ BRIDGE_LOCK(sc);
+ return error;
+}
+
+static int
+bridge_ioctl_gmnelist64(struct bridge_softc *sc, void *arg)
+{
+ struct ifbrmnelist64 *mnl = arg;
+
+ return bridge_ioctl_gmnelist(sc, arg, mnl->ifbml_buf);
+}
+
+static int
+bridge_ioctl_gmnelist32(struct bridge_softc *sc, void *arg)
+{
+ struct ifbrmnelist32 *mnl = arg;
+
+ return bridge_ioctl_gmnelist(sc, arg,
+ CAST_USER_ADDR_T(mnl->ifbml_buf));
+}
/*
* bridge_ifdetach:
struct bridge_softc *sc = ifp->if_bridge;
#if BRIDGE_DEBUG
- if (if_bridge_debug & BR_DBGF_LIFECYCLE) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_LIFECYCLE)) {
printf("%s: %s\n", __func__, ifp->if_xname);
}
#endif /* BRIDGE_DEBUG */
struct bridge_softc *sc = ifp->if_bridge;
#if BRIDGE_DEBUG
- if (if_bridge_debug & BR_DBGF_LIFECYCLE) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_LIFECYCLE)) {
printf("%s: %s\n", __func__, ifp->if_xname);
}
#endif /* BRIDGE_DEBUG */
}
BRIDGE_UNLOCK(sc);
#if BRIDGE_DEBUG
- if ((if_bridge_debug & BR_DBGF_LIFECYCLE) != 0 && changed) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_LIFECYCLE)) {
printf("%s: input broadcast %s", ifp->if_xname,
input_broadcast ? "ENABLED" : "DISABLED");
}
u_int32_t event_code = 0;
#if BRIDGE_DEBUG
- if (if_bridge_debug & BR_DBGF_LIFECYCLE) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_LIFECYCLE)) {
printf("%s: %s\n", __func__, ifp->if_xname);
}
#endif /* BRIDGE_DEBUG */
} else {
bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
}
+ if (sc->sc_mac_nat_bif != NULL) {
+ bridge_mac_nat_flush_entries(sc, bif);
+ }
event_code = bridge_updatelinkstatus(sc);
}
BRIDGE_LOCK(sc);
#if BRIDGE_DEBUG_DELAYED_CALLBACK
- if (if_bridge_debug & BR_DBGF_DELAYED_CALL) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_DELAYED_CALL)) {
printf("%s: %s call 0x%llx flags 0x%x\n", __func__,
sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
call->bdc_flags);
call->bdc_flags = BDCF_OUTSTANDING;
#if BRIDGE_DEBUG_DELAYED_CALLBACK
- if (if_bridge_debug & BR_DBGF_DELAYED_CALL) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_DELAYED_CALL)) {
printf("%s: %s call 0x%llx flags 0x%x\n", __func__,
sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
call->bdc_flags);
while (call->bdc_flags & BDCF_OUTSTANDING) {
#if BRIDGE_DEBUG
- if (if_bridge_debug & BR_DBGF_DELAYED_CALL) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_DELAYED_CALL)) {
printf("%s: %s call 0x%llx flags 0x%x\n", __func__,
sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
call->bdc_flags);
#endif /* INET6 */
}
#if BRIDGE_DEBUG
- if (if_bridge_debug & BR_DBGF_CHECKSUM) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_CHECKSUM)) {
printf("%s: [%s -> %s] before 0x%x did 0x%x after 0x%x\n",
__func__,
src_if->if_xname, dst_if->if_xname, csum_flags, did_sw,
*
*/
static int
-bridge_enqueue(struct bridge_softc *sc, struct ifnet *src_ifp,
+bridge_enqueue(ifnet_t bridge_ifp, struct ifnet *src_ifp,
struct ifnet *dst_ifp, struct mbuf *m, ChecksumOperation cksum_op)
{
int len, error = 0;
}
if (_error == 0) {
- (void) ifnet_stat_increment_out(sc->sc_ifp, 1, len, 0);
+ (void) ifnet_stat_increment_out(bridge_ifp, 1, len, 0);
} else {
- (void) ifnet_stat_increment_out(sc->sc_ifp, 0, 0, 1);
+ (void) ifnet_stat_increment_out(bridge_ifp, 0, 0, 1);
}
}
return;
}
}
-
- (void) bridge_enqueue(sc, NULL, ifp, m, kChecksumOperationNone);
+ (void) bridge_enqueue(sc->sc_ifp, NULL, ifp, m, kChecksumOperationNone);
}
#endif /* HAS_BRIDGE_DUMMYNET */
* The mbuf has the Ethernet header already attached.
*/
static errno_t
-bridge_member_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t m)
+bridge_member_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t *data)
{
+ ifnet_t bridge_ifp;
struct ether_header *eh;
struct ifnet *dst_if;
uint16_t vlan;
+ struct bridge_iflist *mac_nat_bif;
+ ifnet_t mac_nat_ifp;
+ mbuf_t m = *data;
#if BRIDGE_DEBUG
- if (if_bridge_debug & BR_DBGF_OUTPUT) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_OUTPUT)) {
printf("%s: ifp %s\n", __func__, ifp->if_xname);
}
#endif /* BRIDGE_DEBUG */
if (m->m_len < ETHER_HDR_LEN) {
m = m_pullup(m, ETHER_HDR_LEN);
if (m == NULL) {
- return ENOBUFS;
+ *data = NULL;
+ return EJUSTRETURN;
}
}
vlan = VLANTAGOF(m);
BRIDGE_LOCK(sc);
+ mac_nat_bif = sc->sc_mac_nat_bif;
+ mac_nat_ifp = (mac_nat_bif != NULL) ? mac_nat_bif->bif_ifp : NULL;
+ if (mac_nat_ifp == ifp) {
+ /* record the IP address used by the MAC NAT interface */
+ (void)bridge_mac_nat_output(sc, mac_nat_bif, data, NULL);
+ m = *data;
+ if (m == NULL) {
+ /* packet was deallocated */
+ BRIDGE_UNLOCK(sc);
+ return EJUSTRETURN;
+ }
+ }
+ bridge_ifp = sc->sc_ifp;
/*
* APPLE MODIFICATION
* go ahead and send out that interface. Otherwise, the packet
* is dropped below.
*/
- if ((sc->sc_ifp->if_flags & IFF_RUNNING) == 0) {
+ if ((bridge_ifp->if_flags & IFF_RUNNING) == 0) {
dst_if = ifp;
goto sendunicast;
}
BRIDGE_LOCK2REF(sc, error);
if (error != 0) {
m_freem(m);
- return error;
+ return EJUSTRETURN;
}
TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
if ((dst_if->if_flags & IFF_RUNNING) == 0) {
continue;
}
-
- /*
- * If this is not the original output interface,
- * and the interface is participating in spanning
- * tree, make sure the port is in a state that
- * allows forwarding.
- */
- if (dst_if != ifp && (bif->bif_ifflags & IFBIF_STP) &&
- bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
- continue;
+ if (dst_if != ifp) {
+ /*
+ * If this is not the original output interface,
+ * and the interface is participating in spanning
+ * tree, make sure the port is in a state that
+ * allows forwarding.
+ */
+ if ((bif->bif_ifflags & IFBIF_STP) &&
+ bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
+ continue;
+ }
+ /*
+ * If this is not the original output interface,
+ * and the destination is the MAC NAT interface,
+ * drop the packet. The packet can't be sent
+ * if the source MAC is incorrect.
+ */
+ if (dst_if == mac_nat_ifp) {
+ continue;
+ }
}
if (TAILQ_NEXT(bif, bif_next) == NULL) {
used = 1;
mc = m_dup(m, M_DONTWAIT);
if (mc == NULL) {
(void) ifnet_stat_increment_out(
- sc->sc_ifp, 0, 0, 1);
+ bridge_ifp, 0, 0, 1);
continue;
}
}
- (void) bridge_enqueue(sc, ifp, dst_if, mc,
- kChecksumOperationCompute);
+ (void) bridge_enqueue(bridge_ifp, ifp, dst_if,
+ mc, kChecksumOperationCompute);
}
if (used == 0) {
m_freem(m);
/* just let the packet continue on its way */
return 0;
}
- (void) bridge_enqueue(sc, ifp, dst_if, m,
- kChecksumOperationCompute);
+ if (dst_if != mac_nat_ifp) {
+ (void) bridge_enqueue(bridge_ifp, ifp, dst_if, m,
+ kChecksumOperationCompute);
+ } else {
+ /*
+ * This is not the original output interface
+ * and the destination is the MAC NAT interface.
+ * Drop the packet because the packet can't be sent
+ * if the source MAC is incorrect.
+ */
+ m_freem(m);
+ }
return EJUSTRETURN;
}
{
struct bridge_softc *sc = ifnet_softc(ifp);
struct ether_header *eh;
- struct ifnet *dst_if;
+ struct ifnet *dst_if = NULL;
int error = 0;
eh = mtod(m, struct ether_header *);
- dst_if = NULL;
BRIDGE_LOCK(sc);
/* callee will unlock */
bridge_broadcast(sc, NULL, m, 0);
} else {
+ ifnet_t bridge_ifp;
+
+ bridge_ifp = sc->sc_ifp;
BRIDGE_UNLOCK(sc);
- error = bridge_enqueue(sc, NULL, dst_if, m,
+ error = bridge_enqueue(bridge_ifp, NULL, dst_if, m,
kChecksumOperationFinalize);
}
struct mbuf *m)
{
struct bridge_iflist *dbif;
- struct ifnet *src_if, *dst_if, *ifp;
+ ifnet_t bridge_ifp;
+ struct ifnet *src_if, *dst_if;
struct ether_header *eh;
uint16_t vlan;
uint8_t *dst;
int error;
+ struct mac_nat_record mnr;
+ boolean_t translate_mac = FALSE;
+ uint32_t sc_filter_flags = 0;
BRIDGE_LOCK_ASSERT_HELD(sc);
+ bridge_ifp = sc->sc_ifp;
#if BRIDGE_DEBUG
- if (if_bridge_debug & BR_DBGF_OUTPUT) {
- printf("%s: %s m 0x%llx\n", __func__, sc->sc_ifp->if_xname,
+ if (IF_BRIDGE_DEBUG(BR_DBGF_OUTPUT)) {
+ printf("%s: %s m 0x%llx\n", __func__, bridge_ifp->if_xname,
(uint64_t)VM_KERNEL_ADDRPERM(m));
}
#endif /* BRIDGE_DEBUG */
src_if = m->m_pkthdr.rcvif;
- ifp = sc->sc_ifp;
- (void) ifnet_stat_increment_in(ifp, 1, m->m_pkthdr.len, 0);
+ (void) ifnet_stat_increment_in(bridge_ifp, 1, m->m_pkthdr.len, 0);
vlan = VLANTAGOF(m);
/* ...forward it to all interfaces. */
- atomic_add_64(&ifp->if_imcasts, 1);
+ atomic_add_64(&bridge_ifp->if_imcasts, 1);
dst_if = NULL;
}
#if NBPFILTER > 0
if (eh->ether_type == htons(ETHERTYPE_RSN_PREAUTH) ||
dst_if != NULL || (m->m_flags & (M_BCAST | M_MCAST)) == 0) {
- m->m_pkthdr.rcvif = ifp;
- if (sc->sc_bpf_input) {
- bridge_bpf_input(ifp, m);
- }
+ m->m_pkthdr.rcvif = bridge_ifp;
+ BRIDGE_BPF_MTAP_INPUT(sc, m);
}
#endif /* NBPFILTER */
/* run the packet filter */
if (PFIL_HOOKED(&inet_pfil_hook) || PFIL_HOOKED_INET6) {
BRIDGE_UNLOCK(sc);
- if (bridge_pfil(&m, ifp, src_if, PFIL_IN) != 0) {
+ if (bridge_pfil(&m, bridge_ifp, src_if, PFIL_IN) != 0) {
return;
}
if (m == NULL) {
#endif /* PFIL_HOOKS */
if (dst_if == NULL) {
+ /* bridge_broadcast will unlock */
bridge_broadcast(sc, src_if, m, 1);
return;
}
if ((dst_if->if_extflags & IFEXTF_DHCPRA_MASK) != 0) {
m = ip_xdhcpra_output(dst_if, m);
if (!m) {
- ++sc->sc_sc.sc_ifp.if_xdhcpra;
+ ++bridge_ifp.if_xdhcpra;
+ BRIDGE_UNLOCK(sc);
return;
}
}
#endif /* HAS_DHCPRA_MASK */
- BRIDGE_UNLOCK(sc);
+ if (dbif == sc->sc_mac_nat_bif) {
+ /* determine how to translate the packet */
+ translate_mac
+ = bridge_mac_nat_output(sc, sbif, &m, &mnr);
+ if (m == NULL) {
+ /* packet was deallocated */
+ BRIDGE_UNLOCK(sc);
+ return;
+ }
+ }
#if defined(PFIL_HOOKS)
if (PFIL_HOOKED(&inet_pfil_hook) || PFIL_HOOKED_INET6) {
- if (bridge_pfil(&m, ifp, dst_if, PFIL_OUT) != 0) {
+ if (bridge_pfil(&m, bridge_ifp, dst_if, PFIL_OUT) != 0) {
return;
}
if (m == NULL) {
}
#endif /* PFIL_HOOKS */
+ sc_filter_flags = sc->sc_filter_flags;
+ BRIDGE_UNLOCK(sc);
+ if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
+ if (bridge_pf(&m, dst_if, sc_filter_flags, FALSE) != 0) {
+ return;
+ }
+ if (m == NULL) {
+ return;
+ }
+ }
+
+ /* if we need to, translate the MAC address */
+ if (translate_mac) {
+ bridge_mac_nat_translate(&m, &mnr, IF_LLADDR(dst_if));
+ }
/*
* This is an inbound packet where the checksum
* (if applicable) is already present/valid. Since
* forwarding), there's no need to validate the checksum.
* Clear the checksum offload flags and send it along.
*/
- (void) bridge_enqueue(sc, NULL, dst_if, m, kChecksumOperationClear);
+ if (m != NULL) {
+ (void) bridge_enqueue(bridge_ifp, NULL, dst_if, m,
+ kChecksumOperationClear);
+ }
return;
drop:
#if BRIDGE_DEBUG
-char *ether_ntop(char *, size_t, const u_char *);
-
-__private_extern__ char *
+static char *
ether_ntop(char *buf, size_t len, const u_char *ap)
{
snprintf(buf, len, "%02x:%02x:%02x:%02x:%02x:%02x",
#endif /* BRIDGE_DEBUG */
+static void
+inject_input_packet(ifnet_t ifp, mbuf_t m)
+{
+ mbuf_pkthdr_setrcvif(m, ifp);
+ mbuf_pkthdr_setheader(m, mbuf_data(m));
+ mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,
+ mbuf_len(m) - ETHER_HDR_LEN);
+ mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
+ m->m_flags |= M_PROTO1; /* set to avoid loops */
+ dlil_input_packet_list(ifp, m);
+ return;
+}
+
/*
* bridge_input:
*
* Filter input from a member interface. Queue the packet for
* bridging if it is not for us.
*/
-__private_extern__ errno_t
-bridge_input(struct ifnet *ifp, struct mbuf *m, void *frame_header)
+errno_t
+bridge_input(struct ifnet *ifp, mbuf_t *data)
{
struct bridge_softc *sc = ifp->if_bridge;
struct bridge_iflist *bif, *bif2;
- struct ifnet *bifp;
+ ifnet_t bridge_ifp;
struct ether_header *eh;
struct mbuf *mc, *mc2;
uint16_t vlan;
- int error;
+ errno_t error;
+ boolean_t is_ifp_mac = FALSE;
+ mbuf_t m = *data;
+ uint32_t sc_filter_flags = 0;
+ bridge_ifp = sc->sc_ifp;
#if BRIDGE_DEBUG
- if (if_bridge_debug & BR_DBGF_INPUT) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_INPUT)) {
printf("%s: %s from %s m 0x%llx data 0x%llx\n", __func__,
- sc->sc_ifp->if_xname, ifp->if_xname,
+ bridge_ifp->if_xname, ifp->if_xname,
(uint64_t)VM_KERNEL_ADDRPERM(m),
(uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)));
}
if ((sc->sc_ifp->if_flags & IFF_RUNNING) == 0) {
#if BRIDGE_DEBUG
- if (if_bridge_debug & BR_DBGF_INPUT) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_INPUT)) {
printf("%s: %s not running passing along\n",
- __func__, sc->sc_ifp->if_xname);
+ __func__, bridge_ifp->if_xname);
}
#endif /* BRIDGE_DEBUG */
return 0;
}
- bifp = sc->sc_ifp;
vlan = VLANTAGOF(m);
#ifdef IFF_MONITOR
* the bpf(4) machinery, but before we do, increment the byte and
* packet counters associated with this interface.
*/
- if ((bifp->if_flags & IFF_MONITOR) != 0) {
- m->m_pkthdr.rcvif = bifp;
+ if ((bridge_ifp->if_flags & IFF_MONITOR) != 0) {
+ m->m_pkthdr.rcvif = bridge_ifp;
BRIDGE_BPF_MTAP_INPUT(sc, m);
- (void) ifnet_stat_increment_in(bifp, 1, m->m_pkthdr.len, 0);
+ (void) ifnet_stat_increment_in(bridge_ifp, 1, m->m_pkthdr.len, 0);
m_freem(m);
return EJUSTRETURN;
}
mbuf_setflags_mask(m, 0, MBUF_PROMISC);
}
+ sc_filter_flags = sc->sc_filter_flags;
+ if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
+ error = bridge_pf(&m, ifp, sc_filter_flags, TRUE);
+ if (error != 0) {
+ return EJUSTRETURN;
+ }
+ if (m == NULL) {
+ return EJUSTRETURN;
+ }
+ /*
+ * bridge_pf could have modified the pointer on success in order
+ * to do its processing. Updated data such that we don't use a
+ * stale pointer.
+ */
+ *data = m;
+ }
+
BRIDGE_LOCK(sc);
bif = bridge_lookup_member_if(sc, ifp);
if (bif == NULL) {
BRIDGE_UNLOCK(sc);
#if BRIDGE_DEBUG
- if (if_bridge_debug & BR_DBGF_INPUT) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_INPUT)) {
printf("%s: %s bridge_lookup_member_if failed\n",
- __func__, sc->sc_ifp->if_xname);
+ __func__, bridge_ifp->if_xname);
}
#endif /* BRIDGE_DEBUG */
return 0;
}
if (bif->bif_flags & BIFF_HOST_FILTER) {
- error = bridge_host_filter(bif, m);
+ error = bridge_host_filter(bif, data);
if (error != 0) {
- if (if_bridge_debug & BR_DBGF_INPUT) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_INPUT)) {
printf("%s: %s bridge_host_filter failed\n",
__func__, bif->bif_ifp->if_xname);
}
BRIDGE_UNLOCK(sc);
return EJUSTRETURN;
}
+ m = *data;
}
eh = mtod(m, struct ether_header *);
if (m->m_flags & (M_BCAST | M_MCAST)) {
#if BRIDGE_DEBUG
- if (if_bridge_debug & BR_DBGF_MCAST) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_MCAST)) {
if ((m->m_flags & M_MCAST)) {
printf("%s: multicast: "
"%02x:%02x:%02x:%02x:%02x:%02x\n",
* here from ether_input as a bridge is never a member of a
* bridge.
*/
- VERIFY(bifp->if_bridge == NULL);
+ VERIFY(bridge_ifp->if_bridge == NULL);
mc2 = m_dup(m, M_DONTWAIT);
if (mc2 != NULL) {
/* Keep the layer3 header aligned */
}
if (mc2 != NULL) {
/* mark packet as arriving on the bridge */
- mc2->m_pkthdr.rcvif = bifp;
+ mc2->m_pkthdr.rcvif = bridge_ifp;
mc2->m_pkthdr.pkt_hdr = mbuf_data(mc2);
-#if NBPFILTER > 0
- if (sc->sc_bpf_input) {
- bridge_bpf_input(bifp, mc2);
- }
-#endif /* NBPFILTER */
+ BRIDGE_BPF_MTAP_INPUT(sc, m);
+
(void) mbuf_setdata(mc2,
(char *)mbuf_data(mc2) + ETHER_HDR_LEN,
mbuf_len(mc2) - ETHER_HDR_LEN);
(void) mbuf_pkthdr_adjustlen(mc2, -ETHER_HDR_LEN);
- (void) ifnet_stat_increment_in(bifp, 1,
+ (void) ifnet_stat_increment_in(bridge_ifp, 1,
mbuf_pkthdr_len(mc2), 0);
#if BRIDGE_DEBUG
- if (if_bridge_debug & BR_DBGF_MCAST) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_MCAST)) {
printf("%s: %s mcast for us\n", __func__,
- sc->sc_ifp->if_xname);
+ bridge_ifp->if_xname);
}
#endif /* BRIDGE_DEBUG */
- dlil_input_packet_list(bifp, mc2);
+ dlil_input_packet_list(bridge_ifp, mc2);
}
/* Return the original packet for local processing. */
} \
} \
BRIDGE_UNLOCK(sc); \
- mbuf_pkthdr_setrcvif(m, iface); \
- mbuf_pkthdr_setheader(m, mbuf_data(m)); \
- mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN, \
- mbuf_len(m) - ETHER_HDR_LEN); \
- mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN); \
- m->m_flags |= M_PROTO1; /* set to avoid loops */ \
- dlil_input_packet_list(iface, m); \
+ inject_input_packet(iface, m); \
return (EJUSTRETURN); \
} \
\
/*
* Unicast.
*/
+ if (memcmp(eh->ether_dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0) {
+ is_ifp_mac = TRUE;
+ }
+
+ /* handle MAC-NAT if enabled */
+ if (is_ifp_mac && sc->sc_mac_nat_bif == bif) {
+ ifnet_t dst_if;
+ boolean_t is_input = FALSE;
+
+ dst_if = bridge_mac_nat_input(sc, data, &is_input);
+ m = *data;
+ if (dst_if == ifp) {
+ /* our input packet */
+ } else if (dst_if != NULL || m == NULL) {
+ BRIDGE_UNLOCK(sc);
+ if (dst_if != NULL) {
+ ASSERT(m != NULL);
+ if (is_input) {
+ inject_input_packet(dst_if, m);
+ } else {
+ (void)bridge_enqueue(bridge_ifp, NULL,
+ dst_if, m,
+ kChecksumOperationClear);
+ }
+ }
+ return EJUSTRETURN;
+ }
+ }
+
/*
- * If the packet is for us, set the packets source as the
- * bridge, and return the packet back to ether_input for
- * local processing.
+ * If the packet is for the bridge, set the packet's source interface
+ * and return the packet back to ether_input for local processing.
*/
- if (memcmp(eh->ether_dhost, IF_LLADDR(bifp),
- ETHER_ADDR_LEN) == 0 || CARP_CHECK_WE_ARE_DST(bifp)) {
+ if (memcmp(eh->ether_dhost, IF_LLADDR(bridge_ifp),
+ ETHER_ADDR_LEN) == 0 || CARP_CHECK_WE_ARE_DST(bridge_ifp)) {
/* Mark the packet as arriving on the bridge interface */
- (void) mbuf_pkthdr_setrcvif(m, bifp);
- mbuf_pkthdr_setheader(m, frame_header);
+ (void) mbuf_pkthdr_setrcvif(m, bridge_ifp);
+ mbuf_pkthdr_setheader(m, mbuf_data(m));
/*
* If the interface is learning, and the source
mbuf_len(m) - ETHER_HDR_LEN);
(void) mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
- (void) ifnet_stat_increment_in(bifp, 1, mbuf_pkthdr_len(m), 0);
+ (void) ifnet_stat_increment_in(bridge_ifp, 1, mbuf_pkthdr_len(m), 0);
BRIDGE_UNLOCK(sc);
#if BRIDGE_DEBUG
- if (if_bridge_debug & BR_DBGF_INPUT) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_INPUT)) {
printf("%s: %s packet for bridge\n", __func__,
- sc->sc_ifp->if_xname);
+ bridge_ifp->if_xname);
}
#endif /* BRIDGE_DEBUG */
- dlil_input_packet_list(bifp, m);
+ dlil_input_packet_list(bridge_ifp, m);
return EJUSTRETURN;
}
* bridge's own MAC address, because the bridge may be
* using the SAME MAC address as one of its interfaces
*/
- if (memcmp(eh->ether_dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0) {
+ if (is_ifp_mac) {
#ifdef VERY_VERY_VERY_DIAGNOSTIC
printf("%s: not forwarding packet bound for member "
bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
struct mbuf *m, int runfilt)
{
-#ifndef PFIL_HOOKS
-#pragma unused(runfilt)
-#endif
+ ifnet_t bridge_ifp;
struct bridge_iflist *dbif, *sbif;
struct mbuf *mc;
struct mbuf *mc_in;
struct ifnet *dst_if;
int error = 0, used = 0;
- boolean_t is_output;
+ boolean_t bridge_if_out;
ChecksumOperation cksum_op;
+ struct mac_nat_record mnr;
+ struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
+ boolean_t translate_mac = FALSE;
+ uint32_t sc_filter_flags = 0;
+ bridge_ifp = sc->sc_ifp;
if (src_if != NULL) {
- is_output = FALSE;
+ bridge_if_out = FALSE;
cksum_op = kChecksumOperationClear;
sbif = bridge_lookup_member_if(sc, src_if);
+ if (sbif != NULL && mac_nat_bif != NULL && sbif != mac_nat_bif) {
+ /* get the translation record while holding the lock */
+ translate_mac
+ = bridge_mac_nat_output(sc, sbif, &m, &mnr);
+ if (m == NULL) {
+ /* packet was deallocated */
+ BRIDGE_UNLOCK(sc);
+ return;
+ }
+ }
} else {
/*
* src_if is NULL when the bridge interface calls
* bridge_broadcast().
*/
- is_output = TRUE;
+ bridge_if_out = TRUE;
cksum_op = kChecksumOperationFinalize;
sbif = NULL;
}
#ifdef PFIL_HOOKS
/* Filter on the bridge interface before broadcasting */
if (runfilt && (PFIL_HOOKED(&inet_pfil_hook) || PFIL_HOOKED_INET6)) {
- if (bridge_pfil(&m, sc->sc_ifp, NULL, PFIL_OUT) != 0) {
+ if (bridge_pfil(&m, bridge_ifp, NULL, PFIL_OUT) != 0) {
goto out;
}
if (m == NULL) {
}
}
#endif /* PFIL_HOOKS */
-
TAILQ_FOREACH(dbif, &sc->sc_iflist, bif_next) {
dst_if = dbif->bif_ifp;
if (dst_if == src_if) {
} else {
mc = m_dup(m, M_DONTWAIT);
if (mc == NULL) {
- (void) ifnet_stat_increment_out(sc->sc_ifp,
+ (void) ifnet_stat_increment_out(bridge_ifp,
0, 0, 1);
continue;
}
* If broadcast input is enabled, do so only if this
* is an input packet.
*/
- if (!is_output &&
+ if (!bridge_if_out &&
(dbif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
mc_in = m_dup(mc, M_DONTWAIT);
/* this could fail, but we continue anyways */
mc = m_copyup(mc, i, ETHER_ALIGN);
if (mc == NULL) {
(void) ifnet_stat_increment_out(
- sc->sc_ifp, 0, 0, 1);
+ bridge_ifp, 0, 0, 1);
if (mc_in != NULL) {
m_freem(mc_in);
}
#endif /* PFIL_HOOKS */
/* out */
- (void) bridge_enqueue(sc, NULL, dst_if, mc, cksum_op);
+ if (translate_mac && mac_nat_bif == dbif) {
+ /* translate the packet without holding the lock */
+ bridge_mac_nat_translate(&mc, &mnr, IF_LLADDR(dst_if));
+ }
+
+ sc_filter_flags = sc->sc_filter_flags;
+ if (runfilt &&
+ PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
+ if (used == 0) {
+ /* Keep the layer3 header aligned */
+ int i = min(mc->m_pkthdr.len, max_protohdr);
+ mc = m_copyup(mc, i, ETHER_ALIGN);
+ if (mc == NULL) {
+ (void) ifnet_stat_increment_out(
+ sc->sc_ifp, 0, 0, 1);
+ if (mc_in != NULL) {
+ m_freem(mc_in);
+ mc_in = NULL;
+ }
+ continue;
+ }
+ }
+ if (bridge_pf(&mc, dst_if, sc_filter_flags, FALSE) != 0) {
+ if (mc_in != NULL) {
+ m_freem(mc_in);
+ mc_in = NULL;
+ }
+ continue;
+ }
+ if (mc == NULL) {
+ if (mc_in != NULL) {
+ m_freem(mc_in);
+ mc_in = NULL;
+ }
+ continue;
+ }
+ }
+
+ if (mc != NULL) {
+ (void) bridge_enqueue(bridge_ifp,
+ NULL, dst_if, mc, cksum_op);
+ }
/* in */
if (mc_in == NULL) {
continue;
}
- (void) bridge_enqueue(sc, NULL, dst_if, mc,
+ (void) bridge_enqueue(sc->sc_ifp, NULL, dst_if, mc,
kChecksumOperationNone);
}
}
*/
brt = zalloc_noblock(bridge_rtnode_pool);
if (brt == NULL) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_RT_TABLE)) {
+ printf("%s: zalloc_nolock failed", __func__);
+ }
return ENOMEM;
}
bzero(brt, sizeof(struct bridge_rtnode));
brt->brt_dst = bif;
bif->bif_addrcnt++;
#if BRIDGE_DEBUG
- if (if_bridge_debug & BR_DBGF_RT_TABLE) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_RT_TABLE)) {
printf("%s: added %02x:%02x:%02x:%02x:%02x:%02x "
"on %s count %u hashsize %u\n", __func__,
dst[0], dst[1], dst[2], dst[3], dst[4], dst[5],
BRIDGE_LOCK_ASSERT_HELD(sc);
bridge_rtage(sc);
-
if ((sc->sc_ifp->if_flags & IFF_RUNNING) &&
(sc->sc_flags & SCF_DETACHING) == 0) {
sc->sc_aging_timer.bdc_sc = sc;
}
}
}
+ if (sc->sc_mac_nat_bif != NULL) {
+ bridge_mac_nat_age_entries(sc, now);
+ }
}
/*
/*
* bridge_rtdelete:
*
- * Delete routes to a speicifc member interface.
+ * Delete routes to a specific member interface.
*/
static void
bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
out:
if (error == 0) {
#if BRIDGE_DEBUG
- if (if_bridge_debug & BR_DBGF_RT_TABLE) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_RT_TABLE)) {
printf("%s: %s new size %u\n", __func__,
sc->sc_ifp->if_xname, sc->sc_rthash_size);
}
dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan) {
#if BRIDGE_DEBUG
- if (if_bridge_debug & BR_DBGF_RT_TABLE) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_RT_TABLE)) {
printf("%s: %s EEXIST "
"%02x:%02x:%02x:%02x:%02x:%02x\n",
__func__, sc->sc_ifp->if_xname,
} while (lbrt != NULL);
#if BRIDGE_DEBUG
- if (if_bridge_debug & BR_DBGF_RT_TABLE) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_RT_TABLE)) {
printf("%s: %s impossible %02x:%02x:%02x:%02x:%02x:%02x\n",
__func__, sc->sc_ifp->if_xname,
brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2],
*mp = NULL;
return error;
}
+#endif /* PFIL_HOOKS */
/*
* Perform basic checks on header size since
ipstat.ips_toosmall++;
goto bad;
}
- } else if (__predict_false(m->m_len < sizeof(struct ip))) {
+ } else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip), 0)) {
if ((m = m_pullup(m, sizeof(struct ip))) == NULL) {
ipstat.ips_toosmall++;
goto bad;
goto bad;
}
- if (ip->ip_v != IPVERSION) {
+ if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
ipstat.ips_badvers++;
goto bad;
}
- hlen = ip->ip_hl << 2;
- if (hlen < sizeof(struct ip)) { /* minimum header length */
+ hlen = IP_VHL_HL(ip->ip_vhl) << 2;
+ if (hlen < (int)sizeof(struct ip)) { /* minimum header length */
ipstat.ips_badhlen++;
goto bad;
}
in6_ifstat_inc(inifp, ifs6_in_hdrerr);
goto bad;
}
- } else if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) {
+ } else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip6_hdr), 0)) {
struct ifnet *inifp = m->m_pkthdr.rcvif;
if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
ip6stat.ip6s_toosmall++;
}
#endif /* INET6 */
+#ifdef PFIL_HOOKS
/*
* bridge_fragment:
*
*
* Invoke the input BPF callback if enabled
*/
-__private_extern__ errno_t
-bridge_bpf_input(ifnet_t ifp, struct mbuf *m)
+static errno_t
+bridge_bpf_input(ifnet_t ifp, struct mbuf *m, const char * func, int line)
{
struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
+ bpf_packet_func input_func = sc->sc_bpf_input;
- if (sc->sc_bpf_input) {
+ if (input_func != NULL) {
if (mbuf_pkthdr_rcvif(m) != ifp) {
- printf("%s: rcvif: 0x%llx != ifp 0x%llx\n", __func__,
+ printf("%s.%d: rcvif: 0x%llx != ifp 0x%llx\n", func, line,
(uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
(uint64_t)VM_KERNEL_ADDRPERM(ifp));
}
- (*sc->sc_bpf_input)(ifp, m);
+ (*input_func)(ifp, m);
}
return 0;
}
*
* Invoke the output BPF callback if enabled
*/
-__private_extern__ errno_t
+static errno_t
bridge_bpf_output(ifnet_t ifp, struct mbuf *m)
{
struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
+ bpf_packet_func output_func = sc->sc_bpf_output;
- if (sc->sc_bpf_output) {
- (*sc->sc_bpf_output)(ifp, m);
+ if (output_func != NULL) {
+ (*output_func)(ifp, m);
}
return 0;
}
} event;
#if BRIDGE_DEBUG
- if (if_bridge_debug & BR_DBGF_LIFECYCLE) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_LIFECYCLE)) {
printf("%s: %s event_code %u - %s\n", __func__, ifp->if_xname,
event_code, dlil_kev_dl_code_str(event_code));
}
ifnet_event(ifp, &event.header);
}
-#define BRIDGE_HF_DROP(reason, func, line) { \
- bridge_hostfilter_stats.reason++; \
- if (if_bridge_debug & BR_DBGF_HOSTFILTER) \
- printf("%s.%d" #reason, func, line); \
- error = EINVAL; \
-}
+#define BRIDGE_HF_DROP(reason, func, line) { \
+ bridge_hostfilter_stats.reason++; \
+ if (IF_BRIDGE_DEBUG(BR_DBGF_HOSTFILTER)) { \
+ printf("%s.%d" #reason, func, line); \
+ error = EINVAL; \
+ } \
+ }
/*
* Make sure this is a DHCP or Bootp request that match the host filter
}
static int
-bridge_host_filter(struct bridge_iflist *bif, struct mbuf *m)
+bridge_host_filter(struct bridge_iflist *bif, mbuf_t *data)
{
int error = EINVAL;
struct ether_header *eh;
static struct in_addr inaddr_any = { .s_addr = INADDR_ANY };
+ mbuf_t m = *data;
- /*
- * Check the Ethernet header is large enough
- */
- if (mbuf_pkthdr_len(m) < sizeof(struct ether_header)) {
- BRIDGE_HF_DROP(brhf_ether_too_small, __func__, __LINE__);
- goto done;
- }
- if (mbuf_len(m) < sizeof(struct ether_header) &&
- mbuf_pullup(&m, sizeof(struct ether_header)) != 0) {
- BRIDGE_HF_DROP(brhf_ether_pullup_failed, __func__, __LINE__);
- goto done;
- }
eh = mtod(m, struct ether_header *);
/*
BRIDGE_HF_DROP(brhf_arp_too_small, __func__, __LINE__);
goto done;
}
- if (mbuf_len(m) < minlen && mbuf_pullup(&m, minlen) != 0) {
+ if (mbuf_len(m) < minlen && mbuf_pullup(data, minlen) != 0) {
BRIDGE_HF_DROP(brhf_arp_pullup_failed,
__func__, __LINE__);
goto done;
}
+ m = *data;
+
/*
* Verify this is an ethernet/ip arp
*/
BRIDGE_HF_DROP(brhf_arp_bad_spa, __func__, __LINE__);
goto done;
}
- /*
- *
- */
bridge_hostfilter_stats.brhf_arp_ok += 1;
error = 0;
} else if (eh->ether_type == htons(ETHERTYPE_IP)) {
}
done:
if (error != 0) {
- if (if_bridge_debug & BR_DBGF_HOSTFILTER) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_HOSTFILTER)) {
if (m) {
printf_mbuf_data(m, 0,
sizeof(struct ether_header) +
}
return error;
}
+
+/*
+ * MAC NAT
+ */
+
+static errno_t
+bridge_mac_nat_enable(struct bridge_softc *sc, struct bridge_iflist *bif)
+{
+ errno_t error = 0;
+
+ BRIDGE_LOCK_ASSERT_HELD(sc);
+
+ if (sc->sc_mac_nat_bif != NULL) {
+ if (sc->sc_mac_nat_bif != bif) {
+ error = EBUSY;
+ }
+ goto done;
+ }
+ sc->sc_mac_nat_bif = bif;
+ bif->bif_ifflags |= IFBIF_MAC_NAT;
+ bridge_mac_nat_populate_entries(sc);
+
+done:
+ return error;
+}
+
+static void
+bridge_mac_nat_disable(struct bridge_softc *sc)
+{
+ struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
+
+ assert(mac_nat_bif != NULL);
+ bridge_mac_nat_flush_entries(sc, mac_nat_bif);
+ mac_nat_bif->bif_ifflags &= ~IFBIF_MAC_NAT;
+ sc->sc_mac_nat_bif = NULL;
+ return;
+}
+
+static void
+mac_nat_entry_print2(struct mac_nat_entry *mne,
+ char *ifname, const char *msg1, const char *msg2)
+{
+ int af;
+ char etopbuf[24];
+ char ntopbuf[MAX_IPv6_STR_LEN];
+ const char *space;
+
+ af = ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) ? AF_INET6 : AF_INET;
+ ether_ntop(etopbuf, sizeof(etopbuf), mne->mne_mac);
+ (void)inet_ntop(af, &mne->mne_u, ntopbuf, sizeof(ntopbuf));
+ if (msg2 == NULL) {
+ msg2 = "";
+ space = "";
+ } else {
+ space = " ";
+ }
+ printf("%s %s%s%s %p (%s, %s, %s)\n",
+ ifname, msg1, space, msg2, mne, mne->mne_bif->bif_ifp->if_xname,
+ ntopbuf, etopbuf);
+}
+
+static void
+mac_nat_entry_print(struct mac_nat_entry *mne,
+ char *ifname, const char *msg)
+{
+ mac_nat_entry_print2(mne, ifname, msg, NULL);
+}
+
+static struct mac_nat_entry *
+bridge_lookup_mac_nat_entry(struct bridge_softc *sc, int af, void * ip)
+{
+ struct mac_nat_entry *mne;
+ struct mac_nat_entry *ret_mne = NULL;
+
+ if (af == AF_INET) {
+ in_addr_t s_addr = ((struct in_addr *)ip)->s_addr;
+
+ LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
+ if (mne->mne_ip.s_addr == s_addr) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+ mac_nat_entry_print(mne, sc->sc_if_xname,
+ "found");
+ }
+ ret_mne = mne;
+ break;
+ }
+ }
+ } else {
+ const struct in6_addr *ip6 = (const struct in6_addr *)ip;
+
+ LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
+ if (IN6_ARE_ADDR_EQUAL(&mne->mne_ip6, ip6)) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+ mac_nat_entry_print(mne, sc->sc_if_xname,
+ "found");
+ }
+ ret_mne = mne;
+ break;
+ }
+ }
+ }
+ return ret_mne;
+}
+
+static void
+bridge_destroy_mac_nat_entry(struct bridge_softc *sc,
+ struct mac_nat_entry *mne, const char *reason)
+{
+ LIST_REMOVE(mne, mne_list);
+ if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+ mac_nat_entry_print(mne, sc->sc_if_xname, reason);
+ }
+ zfree(bridge_mne_pool, mne);
+ sc->sc_mne_count--;
+}
+
+static struct mac_nat_entry *
+bridge_create_mac_nat_entry(struct bridge_softc *sc,
+ struct bridge_iflist *bif, int af, const void *ip, uint8_t *eaddr)
+{
+ struct mac_nat_entry_list *list;
+ struct mac_nat_entry *mne;
+
+ if (sc->sc_mne_count >= sc->sc_mne_max) {
+ sc->sc_mne_allocation_failures++;
+ return NULL;
+ }
+ mne = zalloc_noblock(bridge_mne_pool);
+ if (mne == NULL) {
+ sc->sc_mne_allocation_failures++;
+ return NULL;
+ }
+ sc->sc_mne_count++;
+ bzero(mne, sizeof(*mne));
+ bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
+ mne->mne_bif = bif;
+ if (af == AF_INET) {
+ bcopy(ip, &mne->mne_ip, sizeof(mne->mne_ip));
+ list = &sc->sc_mne_list;
+ } else {
+ bcopy(ip, &mne->mne_ip6, sizeof(mne->mne_ip6));
+ mne->mne_flags |= MNE_FLAGS_IPV6;
+ list = &sc->sc_mne_list_v6;
+ }
+ LIST_INSERT_HEAD(list, mne, mne_list);
+ mne->mne_expire = (unsigned long)net_uptime() + sc->sc_brttimeout;
+ if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+ mac_nat_entry_print(mne, sc->sc_if_xname, "created");
+ }
+ return mne;
+}
+
+static struct mac_nat_entry *
+bridge_update_mac_nat_entry(struct bridge_softc *sc,
+ struct bridge_iflist *bif, int af, void *ip, uint8_t *eaddr)
+{
+ struct mac_nat_entry *mne;
+
+ mne = bridge_lookup_mac_nat_entry(sc, af, ip);
+ if (mne != NULL) {
+ struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
+
+ if (mne->mne_bif == mac_nat_bif) {
+ /* the MAC NAT interface takes precedence */
+ if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+ if (mne->mne_bif != bif) {
+ mac_nat_entry_print2(mne,
+ sc->sc_if_xname, "reject",
+ bif->bif_ifp->if_xname);
+ }
+ }
+ } else if (mne->mne_bif != bif) {
+ const char *old_if = mne->mne_bif->bif_ifp->if_xname;
+
+ mne->mne_bif = bif;
+ if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+ mac_nat_entry_print2(mne,
+ sc->sc_if_xname, "replaced",
+ old_if);
+ }
+ bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
+ }
+ mne->mne_expire = (unsigned long)net_uptime() +
+ sc->sc_brttimeout;
+ } else {
+ mne = bridge_create_mac_nat_entry(sc, bif, af, ip, eaddr);
+ }
+ return mne;
+}
+
+static void
+bridge_mac_nat_flush_entries_common(struct bridge_softc *sc,
+ struct mac_nat_entry_list *list, struct bridge_iflist *bif)
+{
+ struct mac_nat_entry *mne;
+ struct mac_nat_entry *tmne;
+
+ LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
+ if (bif != NULL && mne->mne_bif != bif) {
+ continue;
+ }
+ bridge_destroy_mac_nat_entry(sc, mne, "flushed");
+ }
+}
+
+/*
+ * bridge_mac_nat_flush_entries:
+ *
+ * Flush MAC NAT entries for the specified member. Flush all entries if
+ * the member is the one that requires MAC NAT, otherwise just flush the
+ * ones for the specified member.
+ */
+static void
+bridge_mac_nat_flush_entries(struct bridge_softc *sc, struct bridge_iflist * bif)
+{
+ struct bridge_iflist *flush_bif;
+
+ flush_bif = (bif == sc->sc_mac_nat_bif) ? NULL : bif;
+ bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list, flush_bif);
+ bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list_v6, flush_bif);
+}
+
+static void
+bridge_mac_nat_populate_entries(struct bridge_softc *sc)
+{
+ errno_t error;
+ ifnet_t ifp;
+ ifaddr_t *list;
+ struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
+
+ assert(mac_nat_bif != NULL);
+ ifp = mac_nat_bif->bif_ifp;
+ error = ifnet_get_address_list(ifp, &list);
+ if (error != 0) {
+ printf("%s: ifnet_get_address_list(%s) failed %d\n",
+ __func__, ifp->if_xname, error);
+ return;
+ }
+ for (ifaddr_t *scan = list; *scan != NULL; scan++) {
+ sa_family_t af;
+ void *ip;
+
+ union {
+ struct sockaddr sa;
+ struct sockaddr_in sin;
+ struct sockaddr_in6 sin6;
+ } u;
+ af = ifaddr_address_family(*scan);
+ switch (af) {
+ case AF_INET:
+ case AF_INET6:
+ error = ifaddr_address(*scan, &u.sa, sizeof(u));
+ if (error != 0) {
+ printf("%s: ifaddr_address failed %d\n",
+ __func__, error);
+ break;
+ }
+ if (af == AF_INET) {
+ ip = (void *)&u.sin.sin_addr;
+ } else {
+ if (IN6_IS_ADDR_LINKLOCAL(&u.sin6.sin6_addr)) {
+ /* remove scope ID */
+ u.sin6.sin6_addr.s6_addr16[1] = 0;
+ }
+ ip = (void *)&u.sin6.sin6_addr;
+ }
+ bridge_create_mac_nat_entry(sc, mac_nat_bif, af, ip,
+ (uint8_t *)IF_LLADDR(ifp));
+ break;
+ default:
+ break;
+ }
+ }
+ ifnet_free_address_list(list);
+ return;
+}
+
+static void
+bridge_mac_nat_age_entries_common(struct bridge_softc *sc,
+ struct mac_nat_entry_list *list, unsigned long now)
+{
+ struct mac_nat_entry *mne;
+ struct mac_nat_entry *tmne;
+
+ LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
+ if (now >= mne->mne_expire) {
+ bridge_destroy_mac_nat_entry(sc, mne, "aged out");
+ }
+ }
+}
+
+static void
+bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long now)
+{
+ if (sc->sc_mac_nat_bif == NULL) {
+ return;
+ }
+ bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list, now);
+ bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list_v6, now);
+}
+
+static const char *
+get_in_out_string(boolean_t is_output)
+{
+ return is_output ? "OUT" : "IN";
+}
+
+/*
+ * is_valid_arp_packet:
+ * Verify that this is a valid ARP packet.
+ *
+ * Returns TRUE if the packet is valid, FALSE otherwise.
+ */
+static boolean_t
+is_valid_arp_packet(mbuf_t *data, boolean_t is_output,
+ struct ether_header **eh_p, struct ether_arp **ea_p)
+{
+ struct ether_arp *ea;
+ struct ether_header *eh;
+ size_t minlen = sizeof(struct ether_header) + sizeof(struct ether_arp);
+ boolean_t is_valid = FALSE;
+ int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
+
+ if (mbuf_pkthdr_len(*data) < minlen) {
+ if (IF_BRIDGE_DEBUG(flags)) {
+ printf("%s: ARP %s short frame %lu < %lu\n",
+ __func__,
+ get_in_out_string(is_output),
+ mbuf_pkthdr_len(*data), minlen);
+ }
+ goto done;
+ }
+ if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
+ if (IF_BRIDGE_DEBUG(flags)) {
+ printf("%s: ARP %s size %lu mbuf_pullup fail\n",
+ __func__,
+ get_in_out_string(is_output),
+ minlen);
+ }
+ *data = NULL;
+ goto done;
+ }
+
+ /* validate ARP packet */
+ eh = mtod(*data, struct ether_header *);
+ ea = (struct ether_arp *)(eh + 1);
+ if (ntohs(ea->arp_hrd) != ARPHRD_ETHER) {
+ if (IF_BRIDGE_DEBUG(flags)) {
+ printf("%s: ARP %s htype not ethernet\n",
+ __func__,
+ get_in_out_string(is_output));
+ }
+ goto done;
+ }
+ if (ea->arp_hln != ETHER_ADDR_LEN) {
+ if (IF_BRIDGE_DEBUG(flags)) {
+ printf("%s: ARP %s hlen not ethernet\n",
+ __func__,
+ get_in_out_string(is_output));
+ }
+ goto done;
+ }
+ if (ntohs(ea->arp_pro) != ETHERTYPE_IP) {
+ if (IF_BRIDGE_DEBUG(flags)) {
+ printf("%s: ARP %s ptype not IP\n",
+ __func__,
+ get_in_out_string(is_output));
+ }
+ goto done;
+ }
+ if (ea->arp_pln != sizeof(struct in_addr)) {
+ if (IF_BRIDGE_DEBUG(flags)) {
+ printf("%s: ARP %s plen not IP\n",
+ __func__,
+ get_in_out_string(is_output));
+ }
+ goto done;
+ }
+ is_valid = TRUE;
+ *ea_p = ea;
+ *eh_p = eh;
+done:
+ return is_valid;
+}
+
+static struct mac_nat_entry *
+bridge_mac_nat_arp_input(struct bridge_softc *sc, mbuf_t *data)
+{
+ struct ether_arp *ea;
+ struct ether_header *eh;
+ struct mac_nat_entry *mne = NULL;
+ u_short op;
+ struct in_addr tpa;
+
+ if (!is_valid_arp_packet(data, FALSE, &eh, &ea)) {
+ goto done;
+ }
+ op = ntohs(ea->arp_op);
+ switch (op) {
+ case ARPOP_REQUEST:
+ case ARPOP_REPLY:
+ /* only care about REQUEST and REPLY */
+ break;
+ default:
+ goto done;
+ }
+
+ /* check the target IP address for a NAT entry */
+ bcopy(ea->arp_tpa, &tpa, sizeof(tpa));
+ if (tpa.s_addr != 0) {
+ mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &tpa);
+ }
+ if (mne != NULL) {
+ if (op == ARPOP_REPLY) {
+ /* translate the MAC address */
+ if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+ char mac_src[24];
+ char mac_dst[24];
+
+ ether_ntop(mac_src, sizeof(mac_src),
+ ea->arp_tha);
+ ether_ntop(mac_dst, sizeof(mac_dst),
+ mne->mne_mac);
+ printf("%s %s ARP %s -> %s\n",
+ sc->sc_if_xname,
+ mne->mne_bif->bif_ifp->if_xname,
+ mac_src, mac_dst);
+ }
+ bcopy(mne->mne_mac, ea->arp_tha, sizeof(ea->arp_tha));
+ }
+ } else {
+ /* handle conflicting ARP (sender matches mne) */
+ struct in_addr spa;
+
+ bcopy(ea->arp_spa, &spa, sizeof(spa));
+ if (spa.s_addr != 0 && spa.s_addr != tpa.s_addr) {
+ /* check the source IP for a NAT entry */
+ mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &spa);
+ }
+ }
+
+done:
+ return mne;
+}
+
+static boolean_t
+bridge_mac_nat_arp_output(struct bridge_softc *sc,
+ struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
+{
+ struct ether_arp *ea;
+ struct ether_header *eh;
+ struct in_addr ip;
+ struct mac_nat_entry *mne = NULL;
+ u_short op;
+ boolean_t translate = FALSE;
+
+ if (!is_valid_arp_packet(data, TRUE, &eh, &ea)) {
+ goto done;
+ }
+ op = ntohs(ea->arp_op);
+ switch (op) {
+ case ARPOP_REQUEST:
+ case ARPOP_REPLY:
+ /* only care about REQUEST and REPLY */
+ break;
+ default:
+ goto done;
+ }
+
+ bcopy(ea->arp_spa, &ip, sizeof(ip));
+ if (ip.s_addr == 0) {
+ goto done;
+ }
+ /* XXX validate IP address: no multicast/broadcast */
+ mne = bridge_update_mac_nat_entry(sc, bif, AF_INET, &ip, ea->arp_sha);
+ if (mnr != NULL && mne != NULL) {
+ /* record the offset to do the replacement */
+ translate = TRUE;
+ mnr->mnr_arp_offset = (char *)ea->arp_sha - (char *)eh;
+ }
+
+done:
+ return translate;
+}
+
+#define ETHER_IPV4_HEADER_LEN (sizeof(struct ether_header) + \
+ + sizeof(struct ip))
+static struct ether_header *
+get_ether_ip_header(mbuf_t *data, boolean_t is_output)
+{
+ struct ether_header *eh = NULL;
+ int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
+ size_t minlen = ETHER_IPV4_HEADER_LEN;
+
+ if (mbuf_pkthdr_len(*data) < minlen) {
+ if (IF_BRIDGE_DEBUG(flags)) {
+ printf("%s: IP %s short frame %lu < %lu\n",
+ __func__,
+ get_in_out_string(is_output),
+ mbuf_pkthdr_len(*data), minlen);
+ }
+ goto done;
+ }
+ if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
+ if (IF_BRIDGE_DEBUG(flags)) {
+ printf("%s: IP %s size %lu mbuf_pullup fail\n",
+ __func__,
+ get_in_out_string(is_output),
+ minlen);
+ }
+ *data = NULL;
+ goto done;
+ }
+ eh = mtod(*data, struct ether_header *);
+done:
+ return eh;
+}
+
+static struct mac_nat_entry *
+bridge_mac_nat_ip_input(struct bridge_softc *sc, mbuf_t *data)
+{
+ struct in_addr dst;
+ struct ether_header *eh;
+ struct ip *iphdr;
+ struct mac_nat_entry *mne = NULL;
+
+ eh = get_ether_ip_header(data, FALSE);
+ if (eh == NULL) {
+ goto done;
+ }
+ iphdr = (struct ip *)(void *)(eh + 1);
+ bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
+ /* XXX validate IP address */
+ if (dst.s_addr == 0) {
+ goto done;
+ }
+ mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &dst);
+done:
+ return mne;
+}
+
+static void
+bridge_mac_nat_udp_output(struct bridge_softc *sc,
+ struct bridge_iflist *bif, mbuf_t m,
+ uint8_t ip_header_len, struct mac_nat_record *mnr)
+{
+ uint16_t dp_flags;
+ errno_t error;
+ size_t offset;
+ struct udphdr udphdr;
+
+ /* copy the UDP header */
+ offset = sizeof(struct ether_header) + ip_header_len;
+ error = mbuf_copydata(m, offset, sizeof(struct udphdr), &udphdr);
+ if (error != 0) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+ printf("%s: mbuf_copydata udphdr failed %d",
+ __func__, error);
+ }
+ return;
+ }
+ if (ntohs(udphdr.uh_sport) != IPPORT_BOOTPC ||
+ ntohs(udphdr.uh_dport) != IPPORT_BOOTPS) {
+ /* not a BOOTP/DHCP packet */
+ return;
+ }
+ /* check whether the broadcast bit is already set */
+ offset += sizeof(struct udphdr) + offsetof(struct dhcp, dp_flags);
+ error = mbuf_copydata(m, offset, sizeof(dp_flags), &dp_flags);
+ if (error != 0) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+ printf("%s: mbuf_copydata dp_flags failed %d",
+ __func__, error);
+ }
+ return;
+ }
+ if ((ntohs(dp_flags) & DHCP_FLAGS_BROADCAST) != 0) {
+ /* it's already set, nothing to do */
+ return;
+ }
+ /* broadcast bit needs to be set */
+ mnr->mnr_ip_dhcp_flags = dp_flags | htons(DHCP_FLAGS_BROADCAST);
+ mnr->mnr_ip_header_len = ip_header_len;
+ if (udphdr.uh_sum != 0) {
+ uint16_t delta;
+
+ /* adjust checksum to take modified dp_flags into account */
+ delta = dp_flags - mnr->mnr_ip_dhcp_flags;
+ mnr->mnr_ip_udp_csum = udphdr.uh_sum + delta;
+ }
+ if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+ printf("%s %s DHCP dp_flags 0x%x UDP cksum 0x%x\n",
+ sc->sc_if_xname,
+ bif->bif_ifp->if_xname,
+ ntohs(mnr->mnr_ip_dhcp_flags),
+ ntohs(mnr->mnr_ip_udp_csum));
+ }
+ return;
+}
+
+static boolean_t
+bridge_mac_nat_ip_output(struct bridge_softc *sc,
+ struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
+{
+#pragma unused(mnr)
+ struct ether_header *eh;
+ struct in_addr ip;
+ struct ip *iphdr;
+ uint8_t ip_header_len;
+ struct mac_nat_entry *mne = NULL;
+ boolean_t translate = FALSE;
+
+ eh = get_ether_ip_header(data, TRUE);
+ if (eh == NULL) {
+ goto done;
+ }
+ iphdr = (struct ip *)(void *)(eh + 1);
+ ip_header_len = IP_VHL_HL(iphdr->ip_vhl) << 2;
+ if (ip_header_len < sizeof(ip)) {
+ /* bogus IP header */
+ goto done;
+ }
+ bcopy(&iphdr->ip_src, &ip, sizeof(ip));
+ /* XXX validate the source address */
+ if (ip.s_addr != 0) {
+ mne = bridge_update_mac_nat_entry(sc, bif, AF_INET, &ip,
+ eh->ether_shost);
+ }
+ if (mnr != NULL) {
+ if (iphdr->ip_p == IPPROTO_UDP) {
+ /* handle DHCP must broadcast */
+ bridge_mac_nat_udp_output(sc, bif, *data,
+ ip_header_len, mnr);
+ }
+ translate = TRUE;
+ }
+done:
+ return translate;
+}
+
+#define ETHER_IPV6_HEADER_LEN (sizeof(struct ether_header) + \
+ + sizeof(struct ip6_hdr))
+static struct ether_header *
+get_ether_ipv6_header(mbuf_t *data, boolean_t is_output)
+{
+ struct ether_header *eh = NULL;
+ int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
+ size_t minlen = ETHER_IPV6_HEADER_LEN;
+
+ if (mbuf_pkthdr_len(*data) < minlen) {
+ if (IF_BRIDGE_DEBUG(flags)) {
+ printf("%s: IP %s short frame %lu < %lu\n",
+ __func__,
+ get_in_out_string(is_output),
+ mbuf_pkthdr_len(*data), minlen);
+ }
+ goto done;
+ }
+ if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
+ if (IF_BRIDGE_DEBUG(flags)) {
+ printf("%s: IP %s size %lu mbuf_pullup fail\n",
+ __func__,
+ get_in_out_string(is_output),
+ minlen);
+ }
+ *data = NULL;
+ goto done;
+ }
+ eh = mtod(*data, struct ether_header *);
+done:
+ return eh;
+}
+
+#if 0
+static void
+bridge_mac_nat_icmpv6_input(struct bridge_softc *sc, mbuf_t *data,
+ struct ether_header *eh, struct ip6_hdr *hdr)
+{
+#pragma unused(sc)
+#pragma unused(data)
+#pragma unused(eh)
+#pragma unused(hdr)
+ return;
+}
+#endif
+
+#include <netinet/icmp6.h>
+#include <netinet6/nd6.h>
+
+#define ETHER_ND_LLADDR_LEN (ETHER_ADDR_LEN + sizeof(struct nd_opt_hdr))
+
+static void
+bridge_mac_nat_icmpv6_output(struct bridge_softc *sc, struct bridge_iflist *bif,
+ mbuf_t *data, struct ether_header *eh,
+ struct ip6_hdr *ip6h, struct in6_addr *saddrp, struct mac_nat_record *mnr)
+{
+ struct icmp6_hdr *icmp6;
+ unsigned int icmp6len;
+ int lladdrlen = 0;
+ char *lladdr = NULL;
+ mbuf_t m = *data;
+ unsigned int off = sizeof(*ip6h);
+
+ icmp6len = m->m_pkthdr.len - sizeof(*eh) - off;
+ if (icmp6len < sizeof(*icmp6)) {
+ printf("%s: short packet %d < %lu\n", __func__,
+ icmp6len, sizeof(*icmp6));
+ return;
+ }
+ icmp6 = (struct icmp6_hdr *)((caddr_t)ip6h + off);
+ switch (icmp6->icmp6_type) {
+ case ND_NEIGHBOR_SOLICIT: {
+ struct nd_neighbor_solicit *nd_ns;
+ union nd_opts ndopts;
+ boolean_t is_dad_probe;
+ struct in6_addr taddr;
+
+ if (icmp6len < sizeof(*nd_ns)) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+ printf("%s: short nd_ns %d < %lu\n", __func__,
+ icmp6len, sizeof(*nd_ns));
+ }
+ return;
+ }
+
+ nd_ns = (struct nd_neighbor_solicit *)(void *)icmp6;
+ bcopy(&nd_ns->nd_ns_target, &taddr, sizeof(taddr));
+ if (IN6_IS_ADDR_MULTICAST(&taddr) ||
+ IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+ printf("%s: invalid target ignored\n", __func__);
+ }
+ return;
+ }
+ /* parse options */
+ nd6_option_init(nd_ns + 1, icmp6len - sizeof(*nd_ns), &ndopts);
+ if (nd6_options(&ndopts) < 0) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+ printf("%s: invalid ND6 NS option\n", __func__);
+ }
+ return;
+ }
+ if (ndopts.nd_opts_src_lladdr != NULL) {
+ lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
+ lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
+ }
+ is_dad_probe = IN6_IS_ADDR_UNSPECIFIED(saddrp);
+ if (lladdr != NULL) {
+ if (is_dad_probe) {
+ printf("%s: bad ND6 DAD packet\n", __func__);
+ return;
+ }
+ if (lladdrlen != ETHER_ND_LLADDR_LEN) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+ printf("%s: source lladdrlen %d != %lu\n",
+ __func__,
+ lladdrlen, ETHER_ND_LLADDR_LEN);
+ }
+ return;
+ }
+ mnr->mnr_ip6_lladdr_offset = (void *)lladdr -
+ (void *)eh;
+ mnr->mnr_ip6_icmp6_len = icmp6len;
+ mnr->mnr_ip6_icmp6_type = icmp6->icmp6_type;
+ mnr->mnr_ip6_header_len = off;
+ }
+ if (is_dad_probe) {
+ /* node is trying use taddr, create an mne using taddr */
+ *saddrp = taddr;
+ }
+ break;
+ }
+ case ND_NEIGHBOR_ADVERT: {
+ struct nd_neighbor_advert *nd_na;
+ union nd_opts ndopts;
+ struct in6_addr taddr;
+
+
+ nd_na = (struct nd_neighbor_advert *)(void *)icmp6;
+
+ if (icmp6len < sizeof(*nd_na)) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+ printf("%s: short nd_na %d < %lu\n", __func__,
+ icmp6len, sizeof(*nd_na));
+ }
+ return;
+ }
+
+ bcopy(&nd_na->nd_na_target, &taddr, sizeof(taddr));
+ if (IN6_IS_ADDR_MULTICAST(&taddr) ||
+ IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+ printf("%s: invalid target ignored\n", __func__);
+ }
+ return;
+ }
+ /* parse options */
+ nd6_option_init(nd_na + 1, icmp6len - sizeof(*nd_na), &ndopts);
+ if (nd6_options(&ndopts) < 0) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+ printf("%s: invalid ND6 NA option\n", __func__);
+ }
+ return;
+ }
+ if (ndopts.nd_opts_tgt_lladdr == NULL) {
+ /* target linklayer, nothing to do */
+ return;
+ }
+ lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
+ lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
+ if (lladdrlen != ETHER_ND_LLADDR_LEN) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+ printf("%s: target lladdrlen %d != %lu\n",
+ __func__, lladdrlen, ETHER_ND_LLADDR_LEN);
+ }
+ return;
+ }
+ mnr->mnr_ip6_lladdr_offset = (void *)lladdr - (void *)eh;
+ mnr->mnr_ip6_icmp6_len = icmp6len;
+ mnr->mnr_ip6_header_len = off;
+ mnr->mnr_ip6_icmp6_type = icmp6->icmp6_type;
+ break;
+ }
+ case ND_ROUTER_SOLICIT: {
+ struct nd_router_solicit *nd_rs;
+ union nd_opts ndopts;
+
+ if (icmp6len < sizeof(*nd_rs)) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+ printf("%s: short nd_rs %d < %lu\n", __func__,
+ icmp6len, sizeof(*nd_rs));
+ }
+ return;
+ }
+ nd_rs = (struct nd_router_solicit *)(void *)icmp6;
+
+ /* parse options */
+ nd6_option_init(nd_rs + 1, icmp6len - sizeof(*nd_rs), &ndopts);
+ if (nd6_options(&ndopts) < 0) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+ printf("%s: invalid ND6 RS option\n", __func__);
+ }
+ return;
+ }
+ if (ndopts.nd_opts_src_lladdr != NULL) {
+ lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
+ lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
+ }
+ if (lladdr != NULL) {
+ if (lladdrlen != ETHER_ND_LLADDR_LEN) {
+ if (IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+ printf("%s: source lladdrlen %d != %lu\n",
+ __func__,
+ lladdrlen, ETHER_ND_LLADDR_LEN);
+ }
+ return;
+ }
+ mnr->mnr_ip6_lladdr_offset = (void *)lladdr -
+ (void *)eh;
+ mnr->mnr_ip6_icmp6_len = icmp6len;
+ mnr->mnr_ip6_icmp6_type = icmp6->icmp6_type;
+ mnr->mnr_ip6_header_len = off;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ if (mnr->mnr_ip6_lladdr_offset != 0 &&
+ IF_BRIDGE_DEBUG(BR_DBGF_MAC_NAT)) {
+ const char *str;
+
+ switch (mnr->mnr_ip6_icmp6_type) {
+ case ND_ROUTER_SOLICIT:
+ str = "ROUTER SOLICIT";
+ break;
+ case ND_NEIGHBOR_ADVERT:
+ str = "NEIGHBOR ADVERT";
+ break;
+ case ND_NEIGHBOR_SOLICIT:
+ str = "NEIGHBOR SOLICIT";
+ break;
+ default:
+ str = "";
+ break;
+ }
+ printf("%s %s %s ip6len %d icmp6len %d lladdr offset %d\n",
+ sc->sc_if_xname, bif->bif_ifp->if_xname, str,
+ mnr->mnr_ip6_header_len,
+ mnr->mnr_ip6_icmp6_len, mnr->mnr_ip6_lladdr_offset);
+ }
+}
+
+static struct mac_nat_entry *
+bridge_mac_nat_ipv6_input(struct bridge_softc *sc, mbuf_t *data)
+{
+ struct in6_addr dst;
+ struct ether_header *eh;
+ struct ip6_hdr *ip6h;
+ struct mac_nat_entry *mne = NULL;
+
+ eh = get_ether_ipv6_header(data, FALSE);
+ if (eh == NULL) {
+ goto done;
+ }
+ ip6h = (struct ip6_hdr *)(void *)(eh + 1);
+#if 0
+ if (ip6h->ip6_nxt == IPPROTO_ICMPV6) {
+ bridge_mac_nat_icmpv6_input(sc, data, eh, ip6h);
+ }
+#endif
+ bcopy(&ip6h->ip6_dst, &dst, sizeof(dst));
+ /* XXX validate IPv6 address */
+ if (IN6_IS_ADDR_UNSPECIFIED(&dst)) {
+ goto done;
+ }
+ mne = bridge_lookup_mac_nat_entry(sc, AF_INET6, &dst);
+
+done:
+ return mne;
+}
+
+static boolean_t
+bridge_mac_nat_ipv6_output(struct bridge_softc *sc,
+ struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
+{
+ struct ether_header *eh;
+ struct ip6_hdr *ip6h;
+ struct in6_addr saddr;
+ boolean_t translate;
+
+ translate = (bif == sc->sc_mac_nat_bif) ? FALSE : TRUE;
+ eh = get_ether_ipv6_header(data, TRUE);
+ if (eh == NULL) {
+ translate = FALSE;
+ goto done;
+ }
+ ip6h = (struct ip6_hdr *)(void *)(eh + 1);
+ bcopy(&ip6h->ip6_src, &saddr, sizeof(saddr));
+ if (mnr != NULL && ip6h->ip6_nxt == IPPROTO_ICMPV6) {
+ bridge_mac_nat_icmpv6_output(sc, bif, data,
+ eh, ip6h, &saddr, mnr);
+ }
+ if (IN6_IS_ADDR_UNSPECIFIED(&saddr)) {
+ goto done;
+ }
+ (void)bridge_update_mac_nat_entry(sc, bif, AF_INET6, &saddr,
+ eh->ether_shost);
+
+done:
+ return translate;
+}
+
+/*
+ * bridge_mac_nat_input:
+ * Process a packet arriving on the MAC NAT interface (sc_mac_nat_bif).
+ * This interface is the "external" interface with respect to NAT.
+ * The interface is only capable of receiving a single MAC address
+ * (e.g. a Wi-Fi STA interface).
+ *
+ * When a packet arrives on the external interface, look up the destination
+ * IP address in the mac_nat_entry table. If there is a match, *is_input
+ * is set to TRUE if it's for the MAC NAT interface, otherwise *is_input
+ * is set to FALSE and translate the MAC address if necessary.
+ *
+ * Returns:
+ * The internal interface to direct the packet to, or NULL if the packet
+ * should not be redirected.
+ *
+ * *data may be updated to point at a different mbuf chain, or set to NULL
+ * if the chain was deallocated during processing.
+ */
+static ifnet_t
+bridge_mac_nat_input(struct bridge_softc *sc, mbuf_t *data,
+ boolean_t *is_input)
+{
+ ifnet_t dst_if = NULL;
+ struct ether_header *eh;
+ uint16_t ether_type;
+ boolean_t is_unicast;
+ mbuf_t m = *data;
+ struct mac_nat_entry *mne = NULL;
+
+ BRIDGE_LOCK_ASSERT_HELD(sc);
+ *is_input = FALSE;
+ assert(sc->sc_mac_nat_bif != NULL);
+ is_unicast = ((m->m_flags & (M_BCAST | M_MCAST)) == 0);
+ eh = mtod(m, struct ether_header *);
+ ether_type = ntohs(eh->ether_type);
+ switch (ether_type) {
+ case ETHERTYPE_ARP:
+ mne = bridge_mac_nat_arp_input(sc, data);
+ break;
+ case ETHERTYPE_IP:
+ if (is_unicast) {
+ mne = bridge_mac_nat_ip_input(sc, data);
+ }
+ break;
+ case ETHERTYPE_IPV6:
+ if (is_unicast) {
+ mne = bridge_mac_nat_ipv6_input(sc, data);
+ }
+ break;
+ default:
+ break;
+ }
+ if (mne != NULL) {
+ if (is_unicast) {
+ if (m != *data) {
+ /* it may have changed */
+ eh = mtod(*data, struct ether_header *);
+ }
+ bcopy(mne->mne_mac, eh->ether_dhost,
+ sizeof(eh->ether_dhost));
+ }
+ dst_if = mne->mne_bif->bif_ifp;
+ *is_input = (mne->mne_bif == sc->sc_mac_nat_bif);
+ }
+ return dst_if;
+}
+
+/*
+ * bridge_mac_nat_output:
+ * Process a packet destined to the MAC NAT interface (sc_mac_nat_bif)
+ * from the interface 'bif'.
+ *
+ * Create a mac_nat_entry containing the source IP address and MAC address
+ * from the packet. Populate a mac_nat_record with information detailing
+ * how to translate the packet. Translation takes place later when
+ * the bridge lock is no longer held.
+ *
+ * If 'bif' == sc_mac_nat_bif, the stack over the MAC NAT
+ * interface is generating an output packet. No translation is required in this
+ * case, we just record the IP address used to prevent another bif from
+ * claiming our IP address.
+ *
+ * Returns:
+ * TRUE if the packet should be translated (*mnr updated as well),
+ * FALSE otherwise.
+ *
+ * *data may be updated to point at a different mbuf chain or NULL if
+ * the chain was deallocated during processing.
+ */
+
+static boolean_t
+bridge_mac_nat_output(struct bridge_softc *sc,
+ struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
+{
+ struct ether_header *eh;
+ uint16_t ether_type;
+ boolean_t translate = FALSE;
+
+ BRIDGE_LOCK_ASSERT_HELD(sc);
+ assert(sc->sc_mac_nat_bif != NULL);
+
+ eh = mtod(*data, struct ether_header *);
+ ether_type = ntohs(eh->ether_type);
+ if (mnr != NULL) {
+ bzero(mnr, sizeof(*mnr));
+ mnr->mnr_ether_type = ether_type;
+ }
+ switch (ether_type) {
+ case ETHERTYPE_ARP:
+ translate = bridge_mac_nat_arp_output(sc, bif, data, mnr);
+ break;
+ case ETHERTYPE_IP:
+ translate = bridge_mac_nat_ip_output(sc, bif, data, mnr);
+ break;
+ case ETHERTYPE_IPV6:
+ translate = bridge_mac_nat_ipv6_output(sc, bif, data, mnr);
+ break;
+ default:
+ break;
+ }
+ return translate;
+}
+
+static void
+bridge_mac_nat_arp_translate(mbuf_t *data, struct mac_nat_record *mnr,
+ const caddr_t eaddr)
+{
+ errno_t error;
+
+ if (mnr->mnr_arp_offset == 0) {
+ return;
+ }
+ /* replace the source hardware address */
+ error = mbuf_copyback(*data, mnr->mnr_arp_offset,
+ ETHER_ADDR_LEN, eaddr,
+ MBUF_DONTWAIT);
+ if (error != 0) {
+ printf("%s: mbuf_copyback failed\n",
+ __func__);
+ m_freem(*data);
+ *data = NULL;
+ }
+ return;
+}
+
+static void
+bridge_mac_nat_ip_translate(mbuf_t *data, struct mac_nat_record *mnr)
+{
+ errno_t error;
+ size_t offset;
+
+ if (mnr->mnr_ip_header_len == 0) {
+ return;
+ }
+ /* update the UDP checksum */
+ offset = sizeof(struct ether_header) + mnr->mnr_ip_header_len;
+ error = mbuf_copyback(*data, offset + offsetof(struct udphdr, uh_sum),
+ sizeof(mnr->mnr_ip_udp_csum),
+ &mnr->mnr_ip_udp_csum,
+ MBUF_DONTWAIT);
+ if (error != 0) {
+ printf("%s: mbuf_copyback uh_sum failed\n",
+ __func__);
+ m_freem(*data);
+ *data = NULL;
+ }
+ /* update the DHCP must broadcast flag */
+ offset += sizeof(struct udphdr);
+ error = mbuf_copyback(*data, offset + offsetof(struct dhcp, dp_flags),
+ sizeof(mnr->mnr_ip_dhcp_flags),
+ &mnr->mnr_ip_dhcp_flags,
+ MBUF_DONTWAIT);
+ if (error != 0) {
+ printf("%s: mbuf_copyback dp_flags failed\n",
+ __func__);
+ m_freem(*data);
+ *data = NULL;
+ }
+}
+
+static void
+bridge_mac_nat_ipv6_translate(mbuf_t *data, struct mac_nat_record *mnr,
+ const caddr_t eaddr)
+{
+ uint16_t cksum;
+ errno_t error;
+ mbuf_t m = *data;
+
+ if (mnr->mnr_ip6_header_len == 0) {
+ return;
+ }
+ switch (mnr->mnr_ip6_icmp6_type) {
+ case ND_ROUTER_SOLICIT:
+ case ND_NEIGHBOR_SOLICIT:
+ case ND_NEIGHBOR_ADVERT:
+ if (mnr->mnr_ip6_lladdr_offset == 0) {
+ /* nothing to do */
+ return;
+ }
+ break;
+ default:
+ return;
+ }
+
+ /*
+ * replace the lladdr
+ */
+ error = mbuf_copyback(m, mnr->mnr_ip6_lladdr_offset,
+ ETHER_ADDR_LEN, eaddr,
+ MBUF_DONTWAIT);
+ if (error != 0) {
+ printf("%s: mbuf_copyback lladdr failed\n",
+ __func__);
+ m_freem(m);
+ *data = NULL;
+ return;
+ }
+
+ /*
+ * recompute the icmp6 checksum
+ */
+
+ /* skip past the ethernet header */
+ mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,
+ mbuf_len(m) - ETHER_HDR_LEN);
+ mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
+
+#define CKSUM_OFFSET_ICMP6 offsetof(struct icmp6_hdr, icmp6_cksum)
+ /* set the checksum to zero */
+ cksum = 0;
+ error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
+ sizeof(cksum), &cksum, MBUF_DONTWAIT);
+ if (error != 0) {
+ printf("%s: mbuf_copyback cksum=0 failed\n",
+ __func__);
+ m_freem(m);
+ *data = NULL;
+ return;
+ }
+ /* compute and set the new checksum */
+ cksum = in6_cksum(m, IPPROTO_ICMPV6, mnr->mnr_ip6_header_len,
+ mnr->mnr_ip6_icmp6_len);
+ error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
+ sizeof(cksum), &cksum, MBUF_DONTWAIT);
+ if (error != 0) {
+ printf("%s: mbuf_copyback cksum failed\n",
+ __func__);
+ m_freem(m);
+ *data = NULL;
+ return;
+ }
+ /* restore the ethernet header */
+ mbuf_setdata(m, (char *)mbuf_data(m) - ETHER_HDR_LEN,
+ mbuf_len(m) + ETHER_HDR_LEN);
+ mbuf_pkthdr_adjustlen(m, ETHER_HDR_LEN);
+ return;
+}
+
+static void
+bridge_mac_nat_translate(mbuf_t *data, struct mac_nat_record *mnr,
+ const caddr_t eaddr)
+{
+ struct ether_header *eh;
+
+ /* replace the source ethernet address with the single MAC */
+ eh = mtod(*data, struct ether_header *);
+ bcopy(eaddr, eh->ether_shost, sizeof(eh->ether_shost));
+ switch (mnr->mnr_ether_type) {
+ case ETHERTYPE_ARP:
+ bridge_mac_nat_arp_translate(data, mnr, eaddr);
+ break;
+
+ case ETHERTYPE_IP:
+ bridge_mac_nat_ip_translate(data, mnr);
+ break;
+
+ case ETHERTYPE_IPV6:
+ bridge_mac_nat_ipv6_translate(data, mnr, eaddr);
+ break;
+
+ default:
+ break;
+ }
+ return;
+}
+
+/*
+ * bridge packet filtering
+ */
+
+/*
+ * the PF routines expect to be called from ip_input, so we
+ * need to do and undo here some of the same processing.
+ *
+ * XXX : this is heavily inspired on bridge_pfil()
+ */
+static
+int
+bridge_pf(struct mbuf **mp, struct ifnet *ifp, uint32_t sc_filter_flags, int input)
+{
+ /*
+ * XXX : mpetit : heavily inspired by bridge_pfil()
+ */
+
+ int snap, error, i, hlen;
+ struct ether_header *eh1, eh2;
+ struct ip *ip;
+ struct llc llc1;
+ u_int16_t ether_type;
+
+ snap = 0;
+ error = -1; /* Default error if not error == 0 */
+
+ if ((sc_filter_flags & IFBF_FILT_MEMBER) == 0) {
+ return 0; /* filtering is disabled */
+ }
+ i = min((*mp)->m_pkthdr.len, max_protohdr);
+ if ((*mp)->m_len < i) {
+ *mp = m_pullup(*mp, i);
+ if (*mp == NULL) {
+ printf("%s: m_pullup failed\n", __func__);
+ return -1;
+ }
+ }
+
+ eh1 = mtod(*mp, struct ether_header *);
+ ether_type = ntohs(eh1->ether_type);
+
+ /*
+ * Check for SNAP/LLC.
+ */
+ if (ether_type < ETHERMTU) {
+ struct llc *llc2 = (struct llc *)(eh1 + 1);
+
+ if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
+ llc2->llc_dsap == LLC_SNAP_LSAP &&
+ llc2->llc_ssap == LLC_SNAP_LSAP &&
+ llc2->llc_control == LLC_UI) {
+ ether_type = htons(llc2->llc_un.type_snap.ether_type);
+ snap = 1;
+ }
+ }
+
+ /*
+ * If we're trying to filter bridge traffic, don't look at anything
+ * other than IP and ARP traffic. If the filter doesn't understand
+ * IPv6, don't allow IPv6 through the bridge either. This is lame
+ * since if we really wanted, say, an AppleTalk filter, we are hosed,
+ * but of course we don't have an AppleTalk filter to begin with.
+ * (Note that since pfil doesn't understand ARP it will pass *ALL*
+ * ARP traffic.)
+ */
+ switch (ether_type) {
+ case ETHERTYPE_ARP:
+ case ETHERTYPE_REVARP:
+ return 0; /* Automatically pass */
+
+ case ETHERTYPE_IP:
+ case ETHERTYPE_IPV6:
+ break;
+ default:
+ /*
+ * Check to see if the user wants to pass non-ip
+ * packets, these will not be checked by pf and
+ * passed unconditionally so the default is to drop.
+ */
+ if ((sc_filter_flags & IFBF_FILT_ONLYIP)) {
+ goto bad;
+ }
+ break;
+ }
+
+ /* Strip off the Ethernet header and keep a copy. */
+ m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t)&eh2);
+ m_adj(*mp, ETHER_HDR_LEN);
+
+ /* Strip off snap header, if present */
+ if (snap) {
+ m_copydata(*mp, 0, sizeof(struct llc), (caddr_t)&llc1);
+ m_adj(*mp, sizeof(struct llc));
+ }
+
+ /*
+ * Check the IP header for alignment and errors
+ */
+ switch (ether_type) {
+ case ETHERTYPE_IP:
+ error = bridge_ip_checkbasic(mp);
+ break;
+ case ETHERTYPE_IPV6:
+ error = bridge_ip6_checkbasic(mp);
+ break;
+ default:
+ error = 0;
+ break;
+ }
+ if (error) {
+ goto bad;
+ }
+
+ error = 0;
+
+ /*
+ * Run the packet through pf rules
+ */
+ switch (ether_type) {
+ case ETHERTYPE_IP:
+ /*
+ * before calling the firewall, swap fields the same as
+ * IP does. here we assume the header is contiguous
+ */
+ ip = mtod(*mp, struct ip *);
+
+ ip->ip_len = ntohs(ip->ip_len);
+ ip->ip_off = ntohs(ip->ip_off);
+
+ if (ifp != NULL) {
+ error = pf_af_hook(ifp, 0, mp, AF_INET, input, NULL);
+ }
+
+ if (*mp == NULL || error != 0) { /* filter may consume */
+ break;
+ }
+
+ /* Recalculate the ip checksum and restore byte ordering */
+ ip = mtod(*mp, struct ip *);
+ hlen = IP_VHL_HL(ip->ip_vhl) << 2;
+ if (hlen < (int)sizeof(struct ip)) {
+ goto bad;
+ }
+ if (hlen > (*mp)->m_len) {
+ if ((*mp = m_pullup(*mp, hlen)) == 0) {
+ goto bad;
+ }
+ ip = mtod(*mp, struct ip *);
+ if (ip == NULL) {
+ goto bad;
+ }
+ }
+ ip->ip_len = htons(ip->ip_len);
+ ip->ip_off = htons(ip->ip_off);
+ ip->ip_sum = 0;
+ if (hlen == sizeof(struct ip)) {
+ ip->ip_sum = in_cksum_hdr(ip);
+ } else {
+ ip->ip_sum = in_cksum(*mp, hlen);
+ }
+ break;
+
+ case ETHERTYPE_IPV6:
+ if (ifp != NULL) {
+ error = pf_af_hook(ifp, 0, mp, AF_INET6, input, NULL);
+ }
+
+ if (*mp == NULL || error != 0) { /* filter may consume */
+ break;
+ }
+ break;
+ default:
+ error = 0;
+ break;
+ }
+
+ if (*mp == NULL) {
+ return error;
+ }
+ if (error != 0) {
+ goto bad;
+ }
+
+ error = -1;
+
+ /*
+ * Finally, put everything back the way it was and return
+ */
+ if (snap) {
+ M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT, 0);
+ if (*mp == NULL) {
+ return error;
+ }
+ bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
+ }
+
+ M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT, 0);
+ if (*mp == NULL) {
+ return error;
+ }
+ bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
+
+ return 0;
+
+bad:
+ m_freem(*mp);
+ *mp = NULL;
+ return error;
+}
#include <net/if.h>
#include <net/ethernet.h>
+#include <netinet/in.h>
/*
* Commands used in the SIOCSDRVSPEC ioctl. Note the lookup of the
#define BRDGSPROTO 31 /* set protocol (ifbrparam) */
#define BRDGSTXHC 32 /* set tx hold count (ifbrparam) */
#define BRDGSIFAMAX 33 /* set max interface addrs (ifbreq) */
-#define BRDGGHOSTFILTER 34 /* set max interface addrs (ifbrhostfilter) */
-#define BRDGSHOSTFILTER 35 /* set max interface addrs (ifbrhostfilter) */
+#define BRDGGHOSTFILTER 34 /* get host filter (ifbrhostfilter) */
+#define BRDGSHOSTFILTER 35 /* set host filter (ifbrhostfilter) */
+#define BRDGGMACNATLIST 36 /* get MAC NAT list */
+
/*
* Generic bridge control request.
#pragma pack()
-/* BRDGGIFFLAGS, BRDGSIFFLAGS */
+/* BRDGGIFFLGS, BRDGSIFFLGS */
#define IFBIF_LEARNING 0x0001 /* if can learn */
#define IFBIF_DISCOVER 0x0002 /* if sends packets w/ unknown dest. */
#define IFBIF_STP 0x0004 /* if participates in spanning tree */
#define IFBIF_BSTP_ADMEDGE 0x0200 /* member stp admin edge enabled */
#define IFBIF_BSTP_ADMCOST 0x0400 /* member stp admin path cost */
#define IFBIF_PRIVATE 0x0800 /* if is a private segment */
+#define IFBIF_MAC_NAT 0x8000 /* member requires MAC NAT */
#define IFBIFBITS "\020\001LEARNING\002DISCOVER\003STP\004SPAN" \
- "\005STICKY\014PRIVATE\006EDGE\007AUTOEDGE\010PTP" \
- "\011AUTOPTP"
+ "\005STICKY\006EDGE\007AUTOEDGE\010PTP" \
+ "\011AUTOPTP\014PRIVATE" \
+ "\020MACNAT"
+
#define IFBIFMASK ~(IFBIF_BSTP_EDGE|IFBIF_BSTP_AUTOEDGE|IFBIF_BSTP_PTP| \
IFBIF_BSTP_AUTOPTP|IFBIF_BSTP_ADMEDGE| \
IFBIF_BSTP_ADMCOST) /* not saved */
#define IFBF_FLUSHALL 0x01 /* flush all addresses */
/* BRDGSFILT */
-#define IFBF_FILT_USEIPF 0x00000001 /* run pfil hooks on the bridge
+#define IFBF_FILT_USEIPF 0x00000001 /* run pf hooks on the bridge
* interface */
-#define IFBF_FILT_MEMBER 0x00000002 /* run pfil hooks on the member
+#define IFBF_FILT_MEMBER 0x00000002 /* run pf hooks on the member
* interfaces */
#define IFBF_FILT_ONLYIP 0x00000004 /* only pass IP[46] packets when
- * pfil is enabled */
+ * pf is enabled */
#define IFBF_FILT_MASK 0x00000007 /* mask of valid values */
-
-/* APPLE MODIFICATION <jhw@apple.com>: Default is to pass non-IP packets. */
-#define IFBF_FILT_DEFAULT ( IFBF_FILT_USEIPF | IFBF_FILT_MEMBER )
-#if 0
-#define IFBF_FILT_DEFAULT (IFBF_FILT_USEIPF | \
-IFBF_FILT_MEMBER | \
-IFBF_FILT_ONLYIP)
-#endif
-
/*
* Interface list structure.
*/
int bridgeattach(int);
#endif /* XNU_KERNEL_PRIVATE */
+
+
+/*
+ * MAC NAT entry list
+ */
+
+#pragma pack(4)
+
+union ifbrip {
+ struct in_addr ifbrip_addr;
+ struct in6_addr ifbrip_addr6;
+};
+
+struct ifbrmne {
+ char ifbmne_ifname[IFNAMSIZ]; /* member if name */
+ uint64_t ifbmne_expire; /* expiration time */
+ uint8_t ifbmne_mac[ETHER_ADDR_LEN];/* MAC address */
+ uint8_t ifbmne_reserved;
+ uint8_t ifbmne_af; /* AF_INET or AF_INET6 */
+ union ifbrip ifbmne_ip;
+};
+#define ifbmne_ip_addr ifbmne_ip.ifbrip_addr
+#define ifbmne_ip6_addr ifbmne_ip.ifbrip_addr6
+
+#ifndef XNU_KERNEL_PRIVATE
+
+struct ifbrmnelist {
+ uint32_t ifbml_len; /* buffer size (multiple of elsize) */
+ uint16_t ifbml_elsize; /* sizeof(ifbrmacnatent) */
+ uint16_t ifbml_pad;
+ caddr_t ifbml_buf;
+};
+
+#else /* XNU_KERNEL_PRIVATE */
+
+struct ifbrmnelist32 {
+ uint32_t ifbml_len; /* buffer size */
+ uint16_t ifbml_elsize; /* sizeof(ifbrmacnatent) */
+ uint16_t ifbml_pad;
+ user32_addr_t ifbml_buf;
+};
+
+struct ifbrmnelist64 {
+ uint32_t ifbml_len; /* buffer size */
+ uint16_t ifbml_elsize; /* sizeof(ifbrmacnatent) */
+ uint16_t ifbml_pad;
+ user64_addr_t ifbml_buf;
+};
+
+#endif /* XNU_KERNEL_PRIVATE */
+
+#pragma pack()
+
#endif /* PRIVATE */
#endif /* !_NET_IF_BRIDGEVAR_H_ */
#include <sys/_types/_sa_family_t.h>
+#ifndef PRIVATE
+#include <Availability.h>
+#define __NKE_API_DEPRECATED __API_DEPRECATED("Network Kernel Extension KPI is deprecated", macos(10.4, 10.15.4))
+#else
+#define __NKE_API_DEPRECATED
+#endif /* PRIVATE */
+
#ifdef XNU_KERNEL_PRIVATE
#if CONFIG_EMBEDDED
#define KPI_INTERFACE_EMBEDDED 1
ifnet_allocate_internal((init), (interface))
#else
extern errno_t ifnet_allocate(const struct ifnet_init_params *init,
- ifnet_t *interface);
+ ifnet_t *interface)
+__NKE_API_DEPRECATED;
#endif /* KERNEL_PRIVATE */
#ifdef KERNEL_PRIVATE
* @param interface The interface to increment the reference count of.
* @result May return EINVAL if the interface is not valid.
*/
-extern errno_t ifnet_reference(ifnet_t interface);
+extern errno_t ifnet_reference(ifnet_t interface)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_release
* and possibly free.
* @result May return EINVAL if the interface is not valid.
*/
-extern errno_t ifnet_release(ifnet_t interface);
+extern errno_t ifnet_release(ifnet_t interface)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_attach
* interface.
*/
extern errno_t ifnet_attach(ifnet_t interface,
- const struct sockaddr_dl *ll_addr);
+ const struct sockaddr_dl *ll_addr)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_detach
* @param interface The interface to detach.
* @result 0 on success, otherwise errno error.
*/
-extern errno_t ifnet_detach(ifnet_t interface);
+extern errno_t ifnet_detach(ifnet_t interface)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_interface_family_find
* is rebooted.
* @result 0 on success, otherwise errno error.
*/
-extern errno_t ifnet_interface_family_find(const char *module_string, ifnet_family_t *family_id);
+extern errno_t ifnet_interface_family_find(const char *module_string, ifnet_family_t *family_id)
+__NKE_API_DEPRECATED;
/*
* Interface manipulation.
* @param interface Interface to retrieve the storage from.
* @result Driver's private storage.
*/
-extern void *ifnet_softc(ifnet_t interface);
+extern void *ifnet_softc(ifnet_t interface)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_name
* @param interface Interface to retrieve the name from.
* @result Pointer to the name.
*/
-extern const char *ifnet_name(ifnet_t interface);
+extern const char *ifnet_name(ifnet_t interface)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_family
* @param interface Interface to retrieve the family from.
* @result Interface family type.
*/
-extern ifnet_family_t ifnet_family(ifnet_t interface);
+extern ifnet_family_t ifnet_family(ifnet_t interface)
+__NKE_API_DEPRECATED;
#ifdef KERNEL_PRIVATE
/*
* @param interface Interface to retrieve the unit number from.
* @result Unit number.
*/
-extern u_int32_t ifnet_unit(ifnet_t interface);
+extern u_int32_t ifnet_unit(ifnet_t interface)
+__NKE_API_DEPRECATED;
+
/*!
* @function ifnet_index
* @param interface Interface to retrieve the index of.
* @result Index.
*/
-extern u_int32_t ifnet_index(ifnet_t interface);
+extern u_int32_t ifnet_index(ifnet_t interface)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_set_flags
* @result 0 on success otherwise the errno error.
*/
extern errno_t ifnet_set_flags(ifnet_t interface, u_int16_t new_flags,
- u_int16_t mask);
+ u_int16_t mask)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_flags
* @param interface Interface to retrieve the flags from.
* @result Flags. These flags are defined in net/if.h
*/
-extern u_int16_t ifnet_flags(ifnet_t interface);
-
+extern u_int16_t ifnet_flags(ifnet_t interface)
+__NKE_API_DEPRECATED;
#ifdef KERNEL_PRIVATE
/*
* @result 0 on success otherwise the errno error.
*/
extern errno_t ifnet_set_capabilities_supported(ifnet_t interface, u_int32_t new_caps,
- u_int32_t mask);
+ u_int32_t mask)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_capabilities_supported
* @param interface Interface to retrieve the capabilities from.
* @result Flags. Capabilities flags are defined in net/if.h
*/
-extern u_int32_t ifnet_capabilities_supported(ifnet_t interface);
+extern u_int32_t ifnet_capabilities_supported(ifnet_t interface)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_set_capabilities_enabled
* @result 0 on success otherwise the errno error.
*/
extern errno_t ifnet_set_capabilities_enabled(ifnet_t interface, u_int32_t new_caps,
- u_int32_t mask);
+ u_int32_t mask)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_capabilities_enabled
* @param interface Interface to retrieve the capabilities from.
* @result Flags. Capabilities flags are defined in net/if.h
*/
-extern u_int32_t ifnet_capabilities_enabled(ifnet_t interface);
-
+extern u_int32_t ifnet_capabilities_enabled(ifnet_t interface)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_set_offload
* the device supports.
* @result 0 on success otherwise the errno error.
*/
-extern errno_t ifnet_set_offload(ifnet_t interface, ifnet_offload_t offload);
+extern errno_t ifnet_set_offload(ifnet_t interface, ifnet_offload_t offload)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_offload
* @param interface Interface to retrieve the offload from.
* @result Abilities flags, see ifnet_offload_t.
*/
-extern ifnet_offload_t ifnet_offload(ifnet_t interface);
+extern ifnet_offload_t ifnet_offload(ifnet_t interface)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_set_tso_mtu
* @result 0 on success otherwise the errno error.
*/
extern errno_t ifnet_set_tso_mtu(ifnet_t interface, sa_family_t family,
- u_int32_t mtuLen);
+ u_int32_t mtuLen)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_get_tso_mtu
* @result 0 on success otherwise the errno error.
*/
extern errno_t ifnet_get_tso_mtu(ifnet_t interface, sa_family_t family,
- u_int32_t *mtuLen);
+ u_int32_t *mtuLen)
+__NKE_API_DEPRECATED;
/*!
* @enum Interface wake properties
* @param mask Mask of the properties to set of unset.
* @result 0 on success otherwise the errno error.
*/
-extern errno_t ifnet_set_wake_flags(ifnet_t interface, u_int32_t properties, u_int32_t mask);
+extern errno_t ifnet_set_wake_flags(ifnet_t interface, u_int32_t properties, u_int32_t mask)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_get_wake_flags
* @param interface The interface.
* @result The wake properties
*/
-extern u_int32_t ifnet_get_wake_flags(ifnet_t interface);
+extern u_int32_t ifnet_get_wake_flags(ifnet_t interface)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_set_link_mib_data
* @result 0 on success otherwise the errno error.
*/
extern errno_t ifnet_set_link_mib_data(ifnet_t interface, void *mibData,
- u_int32_t mibLen);
+ u_int32_t mibLen)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_get_link_mib_data
* no data.
*/
extern errno_t ifnet_get_link_mib_data(ifnet_t interface, void *mibData,
- u_int32_t *mibLen);
+ u_int32_t *mibLen)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_get_link_mib_data_length
* @result Returns the number of bytes of mib data associated with the
* interface.
*/
-extern u_int32_t ifnet_get_link_mib_data_length(ifnet_t interface);
+extern u_int32_t ifnet_get_link_mib_data_length(ifnet_t interface)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_attach_protocol
*/
extern errno_t ifnet_attach_protocol(ifnet_t interface,
protocol_family_t protocol_family,
- const struct ifnet_attach_proto_param *proto_details);
+ const struct ifnet_attach_proto_param *proto_details)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_attach_protocol_v2
*/
extern errno_t ifnet_attach_protocol_v2(ifnet_t interface,
protocol_family_t protocol_family,
- const struct ifnet_attach_proto_param_v2 *proto_details);
+ const struct ifnet_attach_proto_param_v2 *proto_details)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_detach_protocol
* @result 0 on success otherwise the errno error.
*/
extern errno_t ifnet_detach_protocol(ifnet_t interface,
- protocol_family_t protocol_family);
+ protocol_family_t protocol_family)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_output
*/
extern errno_t ifnet_output(ifnet_t interface,
protocol_family_t protocol_family, mbuf_t packet, void *route,
- const struct sockaddr *dest);
+ const struct sockaddr *dest)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_output_raw
* @result 0 on success otherwise the errno error.
*/
extern errno_t ifnet_output_raw(ifnet_t interface,
- protocol_family_t protocol_family, mbuf_t packet);
+ protocol_family_t protocol_family, mbuf_t packet)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_input
* @result 0 on success otherwise the errno error.
*/
extern errno_t ifnet_input(ifnet_t interface, mbuf_t first_packet,
- const struct ifnet_stat_increment_param *stats);
+ const struct ifnet_stat_increment_param *stats)
+__NKE_API_DEPRECATED;
#ifdef KERNEL_PRIVATE
/*
* @result 0 on success otherwise the errno error.
*/
extern errno_t ifnet_ioctl(ifnet_t interface, protocol_family_t protocol,
- unsigned long ioctl_code, void *ioctl_arg);
+ unsigned long ioctl_code, void *ioctl_arg)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_event
* event.
* @result 0 on success otherwise the errno error.
*/
-extern errno_t ifnet_event(ifnet_t interface, struct kern_event_msg *event_ptr);
+extern errno_t ifnet_event(ifnet_t interface, struct kern_event_msg *event_ptr)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_set_mtu
* @param mtu The new MTU.
* @result 0 on success otherwise the errno error.
*/
-extern errno_t ifnet_set_mtu(ifnet_t interface, u_int32_t mtu);
+extern errno_t ifnet_set_mtu(ifnet_t interface, u_int32_t mtu)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_mtu
* @param interface The interface.
* @result The MTU.
*/
-extern u_int32_t ifnet_mtu(ifnet_t interface);
+extern u_int32_t ifnet_mtu(ifnet_t interface)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_type
* @param interface The interface.
* @result The type. See net/if_types.h.
*/
-extern u_int8_t ifnet_type(ifnet_t interface);
+extern u_int8_t ifnet_type(ifnet_t interface)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_set_addrlen
* @param addrlen The new address length.
* @result 0 on success otherwise the errno error.
*/
-extern errno_t ifnet_set_addrlen(ifnet_t interface, u_int8_t addrlen);
+extern errno_t ifnet_set_addrlen(ifnet_t interface, u_int8_t addrlen)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_addrlen
* @param interface The interface.
* @result The address length.
*/
-extern u_int8_t ifnet_addrlen(ifnet_t interface);
+extern u_int8_t ifnet_addrlen(ifnet_t interface)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_set_hdrlen
* @param hdrlen The new header length.
* @result 0 on success otherwise the errno error.
*/
-extern errno_t ifnet_set_hdrlen(ifnet_t interface, u_int8_t hdrlen);
+extern errno_t ifnet_set_hdrlen(ifnet_t interface, u_int8_t hdrlen)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_hdrlen
* @param interface The interface.
* @result The header length.
*/
-extern u_int8_t ifnet_hdrlen(ifnet_t interface);
+extern u_int8_t ifnet_hdrlen(ifnet_t interface)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_set_metric
* @param metric The new metric.
* @result 0 on success otherwise the errno error.
*/
-extern errno_t ifnet_set_metric(ifnet_t interface, u_int32_t metric);
+extern errno_t ifnet_set_metric(ifnet_t interface, u_int32_t metric)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_metric
* @param interface The interface.
* @result The metric.
*/
-extern u_int32_t ifnet_metric(ifnet_t interface);
+extern u_int32_t ifnet_metric(ifnet_t interface)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_set_baudrate
* @param baudrate The new baudrate.
* @result 0 on success otherwise the errno error.
*/
-extern errno_t ifnet_set_baudrate(ifnet_t interface, u_int64_t baudrate);
+extern errno_t ifnet_set_baudrate(ifnet_t interface, u_int64_t baudrate)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_baudrate
* @param interface The interface.
* @result The baudrate.
*/
-extern u_int64_t ifnet_baudrate(ifnet_t interface);
+extern u_int64_t ifnet_baudrate(ifnet_t interface)
+__NKE_API_DEPRECATED;
#ifdef KERNEL_PRIVATE
typedef struct if_bandwidths if_bandwidths_t;
* @result 0 on success otherwise the errno error.
*/
extern errno_t ifnet_stat_increment(ifnet_t interface,
- const struct ifnet_stat_increment_param *counts);
+ const struct ifnet_stat_increment_param *counts)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_stat_increment_in
* @result 0 on success otherwise the errno error.
*/
extern errno_t ifnet_stat_increment_in(ifnet_t interface,
- u_int32_t packets_in, u_int32_t bytes_in, u_int32_t errors_in);
+ u_int32_t packets_in, u_int32_t bytes_in, u_int32_t errors_in)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_stat_increment_out
* @result 0 on success otherwise the errno error.
*/
extern errno_t ifnet_stat_increment_out(ifnet_t interface,
- u_int32_t packets_out, u_int32_t bytes_out, u_int32_t errors_out);
+ u_int32_t packets_out, u_int32_t bytes_out, u_int32_t errors_out)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_set_stat
* @result 0 on success otherwise the errno error.
*/
extern errno_t ifnet_set_stat(ifnet_t interface,
- const struct ifnet_stats_param *stats);
+ const struct ifnet_stats_param *stats)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_stat
* @result 0 on success otherwise the errno error.
*/
extern errno_t ifnet_stat(ifnet_t interface,
- struct ifnet_stats_param *out_stats);
+ struct ifnet_stats_param *out_stats)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_set_promiscuous
* zero, promiscuous mode will be disabled.
* @result 0 on success otherwise the errno error.
*/
-extern errno_t ifnet_set_promiscuous(ifnet_t interface, int on);
+extern errno_t ifnet_set_promiscuous(ifnet_t interface, int on)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_touch_lastchange
* @param interface The interface.
* @result 0 on success otherwise the errno error.
*/
-extern errno_t ifnet_touch_lastchange(ifnet_t interface);
+extern errno_t ifnet_touch_lastchange(ifnet_t interface)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_lastchange
* @param last_change A timeval struct to copy the last time changed in
* to.
*/
-extern errno_t ifnet_lastchange(ifnet_t interface, struct timeval *last_change);
+extern errno_t ifnet_lastchange(ifnet_t interface, struct timeval *last_change)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_get_address_list
* @param addresses A pointer to a NULL terminated array of ifaddr_ts.
* @result 0 on success otherwise the errno error.
*/
-extern errno_t ifnet_get_address_list(ifnet_t interface, ifaddr_t **addresses);
+extern errno_t ifnet_get_address_list(ifnet_t interface, ifaddr_t **addresses)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_get_address_list_family
* @result 0 on success otherwise the errno error.
*/
extern errno_t ifnet_get_address_list_family(ifnet_t interface,
- ifaddr_t **addresses, sa_family_t family);
+ ifaddr_t **addresses, sa_family_t family)
+__NKE_API_DEPRECATED;
#ifdef KERNEL_PRIVATE
/*!
* memory used for the array of references.
* @param addresses An array of ifaddr_ts.
*/
-extern void ifnet_free_address_list(ifaddr_t *addresses);
+extern void ifnet_free_address_list(ifaddr_t *addresses)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_set_lladdr
* @param lladdr_len The length, in bytes, of the link layer address.
*/
extern errno_t ifnet_set_lladdr(ifnet_t interface, const void *lladdr,
- size_t lladdr_len);
+ size_t lladdr_len)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_lladdr_copy_bytes
* length of the link-layer address.
*/
extern errno_t ifnet_lladdr_copy_bytes(ifnet_t interface, void *lladdr,
- size_t length);
+ size_t length)
+__NKE_API_DEPRECATED;
#ifdef KERNEL_PRIVATE
/*!
* @param interface The interface the link-layer address is on.
*/
extern void *ifnet_lladdr(ifnet_t interface);
+
#endif /* KERNEL_PRIVATE */
/*!
* @param out_len On return, the length of the broadcast address.
*/
extern errno_t ifnet_llbroadcast_copy_bytes(ifnet_t interface, void *addr,
- size_t bufferlen, size_t *out_len);
+ size_t bufferlen, size_t *out_len)
+__NKE_API_DEPRECATED;
#ifdef KERNEL_PRIVATE
/*!
* @param type The link-layer address type.
*/
extern errno_t ifnet_set_lladdr_and_type(ifnet_t interface, const void *lladdr,
- size_t length, u_char type);
+ size_t length, u_char type)
+__NKE_API_DEPRECATED;
#endif /* KERNEL_PRIVATE */
/*!
* indicate other failures.
*/
extern errno_t ifnet_resolve_multicast(ifnet_t ifp,
- const struct sockaddr *proto_addr, struct sockaddr *ll_addr, size_t ll_len);
+ const struct sockaddr *proto_addr, struct sockaddr *ll_addr, size_t ll_len)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_add_multicast
* @result 0 on success otherwise the errno error.
*/
extern errno_t ifnet_add_multicast(ifnet_t interface,
- const struct sockaddr *maddr, ifmultiaddr_t *multicast);
+ const struct sockaddr *maddr, ifmultiaddr_t *multicast)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_remove_multicast
* @param multicast The multicast to be removed.
* @result 0 on success otherwise the errno error.
*/
-extern errno_t ifnet_remove_multicast(ifmultiaddr_t multicast);
+extern errno_t ifnet_remove_multicast(ifmultiaddr_t multicast)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_get_multicast_list
* @result 0 on success otherwise the errno error.
*/
extern errno_t ifnet_get_multicast_list(ifnet_t interface,
- ifmultiaddr_t **addresses);
+ ifmultiaddr_t **addresses)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_free_multicast_list
* multicast address and frees the array.
* @param multicasts An array of references to the multicast addresses.
*/
-extern void ifnet_free_multicast_list(ifmultiaddr_t *multicasts);
+extern void ifnet_free_multicast_list(ifmultiaddr_t *multicasts)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_find_by_name
* filled in if a matching interface is found.
* @result 0 on success otherwise the errno error.
*/
-extern errno_t ifnet_find_by_name(const char *ifname, ifnet_t *interface);
+extern errno_t ifnet_find_by_name(const char *ifname, ifnet_t *interface)
+__NKE_API_DEPRECATED;
/*!
* @function ifnet_list_get
* @result 0 on success otherwise the errno error.
*/
extern errno_t ifnet_list_get(ifnet_family_t family, ifnet_t **interfaces,
- u_int32_t *count);
+ u_int32_t *count)
+__NKE_API_DEPRECATED;
#ifdef KERNEL_PRIVATE
/*!
*/
extern errno_t ifnet_list_get_all(ifnet_family_t family, ifnet_t **interfaces,
u_int32_t *count);
+
#endif /* KERNEL_PRIVATE */
/*!
* ifnet_list_free.
* @param interfaces An array of interface references from ifnet_list_get.
*/
-extern void ifnet_list_free(ifnet_t *interfaces);
+extern void ifnet_list_free(ifnet_t *interfaces)
+__NKE_API_DEPRECATED;
/******************************************************************************/
/* ifaddr_t accessors */
* @param ifaddr The interface address.
* @result 0 upon success
*/
-extern errno_t ifaddr_reference(ifaddr_t ifaddr);
+extern errno_t ifaddr_reference(ifaddr_t ifaddr)
+__NKE_API_DEPRECATED;
/*!
* @function ifaddr_release
* @param ifaddr The interface address.
* @result 0 upon success
*/
-extern errno_t ifaddr_release(ifaddr_t ifaddr);
+extern errno_t ifaddr_release(ifaddr_t ifaddr)
+__NKE_API_DEPRECATED;
/*!
* @function ifaddr_address
* @result 0 upon success
*/
extern errno_t ifaddr_address(ifaddr_t ifaddr, struct sockaddr *out_addr,
- u_int32_t addr_size);
+ u_int32_t addr_size)
+__NKE_API_DEPRECATED;
/*!
* @function ifaddr_address
* @param ifaddr The interface address.
* @result 0 on failure, address family on success.
*/
-extern sa_family_t ifaddr_address_family(ifaddr_t ifaddr);
+extern sa_family_t ifaddr_address_family(ifaddr_t ifaddr)
+__NKE_API_DEPRECATED;
/*!
* @function ifaddr_dstaddress
* @result 0 upon success
*/
extern errno_t ifaddr_dstaddress(ifaddr_t ifaddr, struct sockaddr *out_dstaddr,
- u_int32_t dstaddr_size);
+ u_int32_t dstaddr_size)
+__NKE_API_DEPRECATED;
/*!
* @function ifaddr_netmask
* @result 0 upon success
*/
extern errno_t ifaddr_netmask(ifaddr_t ifaddr, struct sockaddr *out_netmask,
- u_int32_t netmask_size);
+ u_int32_t netmask_size)
+__NKE_API_DEPRECATED;
/*!
* @function ifaddr_ifnet
* @param ifaddr The interface address.
* @result A reference to the interface the address is attached to.
*/
-extern ifnet_t ifaddr_ifnet(ifaddr_t ifaddr);
+extern ifnet_t ifaddr_ifnet(ifaddr_t ifaddr)
+__NKE_API_DEPRECATED;
/*!
* @function ifaddr_withaddr
* @param address The address to search for.
* @result A reference to the interface address.
*/
-extern ifaddr_t ifaddr_withaddr(const struct sockaddr *address);
+extern ifaddr_t ifaddr_withaddr(const struct sockaddr *address)
+__NKE_API_DEPRECATED;
/*!
* @function ifaddr_withdstaddr
* @param destination The destination to search for.
* @result A reference to the interface address.
*/
-extern ifaddr_t ifaddr_withdstaddr(const struct sockaddr *destination);
-
+extern ifaddr_t ifaddr_withdstaddr(const struct sockaddr *destination)
+__NKE_API_DEPRECATED;
/*!
* @function ifaddr_withnet
* @discussion Returns an interface address for the interface with the
* @param net The network to search for.
* @result A reference to the interface address.
*/
-extern ifaddr_t ifaddr_withnet(const struct sockaddr *net);
+extern ifaddr_t ifaddr_withnet(const struct sockaddr *net)
+__NKE_API_DEPRECATED;
/*!
* @function ifaddr_withroute
* @result A reference to the interface address.
*/
extern ifaddr_t ifaddr_withroute(int flags, const struct sockaddr *destination,
- const struct sockaddr *gateway);
+ const struct sockaddr *gateway)
+__NKE_API_DEPRECATED;
/*!
* @function ifaddr_findbestforaddr
* @result A reference to the interface address.
*/
extern ifaddr_t ifaddr_findbestforaddr(const struct sockaddr *addr,
- ifnet_t interface);
+ ifnet_t interface)
+__NKE_API_DEPRECATED;
/******************************************************************************/
/* ifmultiaddr_t accessors */
* @param ifmaddr The interface multicast address.
* @result 0 on success. Only error will be EINVAL if ifmaddr is not valid.
*/
-extern errno_t ifmaddr_reference(ifmultiaddr_t ifmaddr);
+extern errno_t ifmaddr_reference(ifmultiaddr_t ifmaddr)
+__NKE_API_DEPRECATED;
/*!
* @function ifmaddr_release
* @param ifmaddr The interface multicast address.
* @result 0 on success. Only error will be EINVAL if ifmaddr is not valid.
*/
-extern errno_t ifmaddr_release(ifmultiaddr_t ifmaddr);
+extern errno_t ifmaddr_release(ifmultiaddr_t ifmaddr)
+__NKE_API_DEPRECATED;
/*!
* @function ifmaddr_address
* @result 0 on success.
*/
extern errno_t ifmaddr_address(ifmultiaddr_t ifmaddr,
- struct sockaddr *out_multicast, u_int32_t addr_size);
+ struct sockaddr *out_multicast, u_int32_t addr_size)
+__NKE_API_DEPRECATED;
/*!
* @function ifmaddr_lladdress
* @result 0 on success.
*/
extern errno_t ifmaddr_lladdress(ifmultiaddr_t ifmaddr,
- struct sockaddr *out_link_layer_multicast, u_int32_t addr_size);
+ struct sockaddr *out_link_layer_multicast, u_int32_t addr_size)
+__NKE_API_DEPRECATED;
/*!
* @function ifmaddr_ifnet
* @param ifmaddr The interface multicast address.
* @result A reference to the interface.
*/
-extern ifnet_t ifmaddr_ifnet(ifmultiaddr_t ifmaddr);
+extern ifnet_t ifmaddr_ifnet(ifmultiaddr_t ifmaddr)
+__NKE_API_DEPRECATED;
#ifdef KERNEL_PRIVATE
/******************************************************************************/
#include <sys/kernel_types.h>
#include <net/kpi_interface.h>
+#ifndef PRIVATE
+#include <Availability.h>
+#define __NKE_API_DEPRECATED __API_DEPRECATED("Network Kernel Extension KPI is deprecated", macos(10.4, 10.15.4))
+#else
+#define __NKE_API_DEPRECATED
+#endif /* PRIVATE */
+
struct kev_msg;
__BEGIN_DECLS
iflt_attach_internal((interface), (filter), (filter_ref))
#else
extern errno_t iflt_attach(ifnet_t interface, const struct iff_filter *filter,
- interface_filter_t *filter_ref);
+ interface_filter_t *filter_ref)
+__NKE_API_DEPRECATED;
#endif /* KERNEL_PRIVATE */
/*!
* @discussion Detaches an interface filter from an interface.
* @param filter_ref The reference to the filter from iflt_attach.
*/
-extern void iflt_detach(interface_filter_t filter_ref);
+extern void iflt_detach(interface_filter_t filter_ref)
+__NKE_API_DEPRECATED;
__END_DECLS
#endif /* __KPI_INTERFACEFILTER__ */
#include <sys/kernel_types.h>
#include <net/kpi_interface.h>
+#ifndef PRIVATE
+#include <Availability.h>
+#define __NKE_API_DEPRECATED __API_DEPRECATED("Network Kernel Extension KPI is deprecated", macos(10.4, 10.15.4))
+#else
+#define __NKE_API_DEPRECATED
+#endif /* PRIVATE */
+
__BEGIN_DECLS
/******************************************************************************/
* @result A errno error on failure. Unless proto_input returns zero,
* the caller is responsible for freeing the mbuf.
*/
-extern errno_t proto_input(protocol_family_t protocol, mbuf_t packet);
+extern errno_t proto_input(protocol_family_t protocol, mbuf_t packet)
+__NKE_API_DEPRECATED;
/*!
* @function proto_inject
* @result A errno error on failure. Unless proto_inject returns zero,
* the caller is responsible for freeing the mbuf.
*/
-extern errno_t proto_inject(protocol_family_t protocol, mbuf_t packet);
+extern errno_t proto_inject(protocol_family_t protocol, mbuf_t packet)
+__NKE_API_DEPRECATED;
/******************************************************************************/
*/
extern errno_t proto_register_plumber(protocol_family_t proto_fam,
ifnet_family_t if_fam, proto_plumb_handler plumb,
- proto_unplumb_handler unplumb);
+ proto_unplumb_handler unplumb)
+__NKE_API_DEPRECATED;
/*!
* @function proto_unregister_plumber
* @param if_fam The interface family these plumbing functions handle.
*/
extern void proto_unregister_plumber(protocol_family_t proto_fam,
- ifnet_family_t if_fam);
+ ifnet_family_t if_fam)
+__NKE_API_DEPRECATED;
#ifdef BSD_KERNEL_PRIVATE
/*
errno_t cred_result;
unsigned has_client : 1;
unsigned is_platform_binary : 1;
- unsigned __pad_bits : 6;
+ unsigned used_responsible_pid : 1;
+ unsigned __pad_bits : 5;
};
static lck_grp_attr_t *necp_kernel_policy_grp_attr = NULL;
// Enforce special session priorities with entitlements
if (requested_session_priority == NECP_SESSION_PRIORITY_CONTROL ||
- requested_session_priority == NECP_SESSION_PRIORITY_PRIVILEGED_TUNNEL) {
+ requested_session_priority == NECP_SESSION_PRIORITY_PRIVILEGED_TUNNEL ||
+ requested_session_priority == NECP_SESSION_PRIORITY_HIGH_RESTRICTED) {
errno_t cred_result = priv_check_cred(kauth_cred_get(), PRIV_NET_PRIVILEGED_NECP_POLICIES, 0);
if (cred_result != 0) {
NECPLOG(LOG_ERR, "Session does not hold necessary entitlement to claim priority level %d", requested_session_priority);
u_int32_t parameter_length = necp_policy_result_get_parameter_length_from_buffer(buffer, length);
switch (type) {
case NECP_POLICY_RESULT_PASS:
+ if (parameter_length == 0 || parameter_length == sizeof(u_int32_t)) {
+ validated = TRUE;
+ }
+ break;
case NECP_POLICY_RESULT_DROP:
case NECP_POLICY_RESULT_ROUTE_RULES:
case NECP_POLICY_RESULT_SCOPED_DIRECT:
ultimate_result = necp_policy_get_result_type(policy);
switch (ultimate_result) {
case NECP_POLICY_RESULT_PASS: {
+ u_int32_t pass_flags = 0;
+ if (necp_policy_result_get_parameter_length_from_buffer(policy->result, policy->result_size) > 0) {
+ if (necp_policy_get_result_parameter(policy, (u_int8_t *)&pass_flags, sizeof(pass_flags))) {
+ ultimate_result_parameter.pass_flags = pass_flags;
+ }
+ }
if (socket_only_conditions) { // socket_ip_conditions can be TRUE or FALSE
socket_layer_non_id_conditions = TRUE;
ip_output_layer_id_condition = TRUE;
break;
}
case NECP_KERNEL_POLICY_RESULT_PASS: {
- snprintf(result_string, MAX_RESULT_STRING_LEN, "Pass");
+ snprintf(result_string, MAX_RESULT_STRING_LEN, "Pass (%X)", result_parameter.pass_flags);
break;
}
case NECP_KERNEL_POLICY_RESULT_SKIP: {
#define NECP_KERNEL_ADDRESS_TYPE_CONDITIONS (NECP_KERNEL_CONDITION_LOCAL_START | NECP_KERNEL_CONDITION_LOCAL_END | NECP_KERNEL_CONDITION_LOCAL_PREFIX | NECP_KERNEL_CONDITION_REMOTE_START | NECP_KERNEL_CONDITION_REMOTE_END | NECP_KERNEL_CONDITION_REMOTE_PREFIX | NECP_KERNEL_CONDITION_LOCAL_EMPTY | NECP_KERNEL_CONDITION_REMOTE_EMPTY | NECP_KERNEL_CONDITION_LOCAL_NETWORKS)
static void
-necp_application_fillout_info_locked(uuid_t application_uuid, uuid_t real_application_uuid, char *account, char *domain, pid_t pid, uid_t uid, u_int16_t protocol, u_int32_t bound_interface_index, u_int32_t traffic_class, union necp_sockaddr_union *local_addr, union necp_sockaddr_union *remote_addr, u_int16_t local_port, u_int16_t remote_port, bool has_client, proc_t proc, u_int32_t drop_order, u_int32_t client_flags, struct necp_socket_info *info)
+necp_application_fillout_info_locked(uuid_t application_uuid, uuid_t real_application_uuid, uuid_t responsible_application_uuid, char *account, char *domain, pid_t pid, uid_t uid, u_int16_t protocol, u_int32_t bound_interface_index, u_int32_t traffic_class, union necp_sockaddr_union *local_addr, union necp_sockaddr_union *remote_addr, u_int16_t local_port, u_int16_t remote_port, bool has_client, proc_t proc, u_int32_t drop_order, u_int32_t client_flags, struct necp_socket_info *info)
{
memset(info, 0, sizeof(struct necp_socket_info));
}
}
+ if (necp_kernel_application_policies_condition_mask & NECP_KERNEL_CONDITION_APP_ID && !uuid_is_null(responsible_application_uuid)) {
+ struct necp_uuid_id_mapping *existing_mapping = necp_uuid_lookup_app_id_locked(responsible_application_uuid);
+ if (existing_mapping != NULL) {
+ info->real_application_id = info->application_id;
+ info->application_id = existing_mapping->id;
+ info->used_responsible_pid = true;
+ }
+ }
+
if (necp_kernel_application_policies_condition_mask & NECP_KERNEL_CONDITION_ACCOUNT_ID && account != NULL) {
struct necp_string_id_mapping *existing_mapping = necp_lookup_string_to_id_locked(&necp_account_id_list, account);
if (existing_mapping) {
struct necp_client_endpoint *returned_v4_gateway,
struct necp_client_endpoint *returned_v6_gateway,
struct rtentry **returned_route, bool ignore_address,
- bool has_client)
+ bool has_client,
+ uuid_t *returned_override_euuid)
{
int error = 0;
size_t offset = 0;
uuid_clear(real_application_uuid);
proc_getexecutableuuid(proc, real_application_uuid, sizeof(real_application_uuid));
uuid_copy(application_uuid, real_application_uuid);
+ uuid_t responsible_application_uuid;
+ uuid_clear(responsible_application_uuid);
char *domain = NULL;
char *account = NULL;
bool has_checked_delegation_entitlement = FALSE;
bool has_delegation_entitlement = FALSE;
+#if defined(XNU_TARGET_OS_OSX)
+ proc_t effective_proc = proc;
+ bool release_eproc = false;
+#endif /* defined(XNU_TARGET_OS_OSX) */
+
if (returned_result == NULL) {
return EINVAL;
}
memset(returned_v6_gateway, 0, sizeof(struct necp_client_endpoint));
}
+ if (returned_override_euuid != NULL) {
+ uuid_clear(*returned_override_euuid);
+ }
+
memset(returned_result, 0, sizeof(struct necp_aggregate_result));
u_int32_t drop_order = necp_process_drop_order(proc_ucred(proc));
return 0;
}
+#if defined(XNU_TARGET_OS_OSX)
+ if (proc_pid(effective_proc) != pid) {
+ proc_t found_proc = proc_find(pid);
+ if (found_proc != PROC_NULL) {
+ effective_proc = found_proc;
+ release_eproc = true;
+ }
+ }
+ if (effective_proc->p_responsible_pid > 0 && effective_proc->p_responsible_pid != pid) {
+ proc_t responsible_proc = proc_find(effective_proc->p_responsible_pid);
+ if (responsible_proc != PROC_NULL) {
+ proc_getexecutableuuid(responsible_proc, responsible_application_uuid, sizeof(responsible_application_uuid));
+ proc_rele(responsible_proc);
+ }
+ }
+ if (release_eproc && effective_proc != PROC_NULL) {
+ proc_rele(effective_proc);
+ }
+#endif /* defined(XNU_TARGET_OS_OSX) */
+
// Lock
lck_rw_lock_shared(&necp_kernel_policy_lock);
u_int32_t route_rule_id_array[MAX_AGGREGATE_ROUTE_RULES];
size_t route_rule_id_array_count = 0;
- necp_application_fillout_info_locked(application_uuid, real_application_uuid, account, domain, pid, uid, protocol, bound_interface_index, traffic_class, &local_addr, &remote_addr, local_port, remote_port, has_client, proc, drop_order, client_flags, &info);
+ necp_application_fillout_info_locked(application_uuid, real_application_uuid, responsible_application_uuid, account, domain, pid, uid, protocol, bound_interface_index, traffic_class, &local_addr, &remote_addr, local_port, remote_port, has_client, proc, drop_order, client_flags, &info);
matched_policy = necp_socket_find_policy_match_with_info_locked(necp_kernel_socket_policies_app_layer_map, &info, &filter_control_unit, route_rule_id_array, &route_rule_id_array_count, MAX_AGGREGATE_ROUTE_RULES, &service_action, &service, netagent_ids, netagent_use_flags, NECP_MAX_NETAGENTS, required_agent_types, num_required_agent_types, proc, NULL, NULL, &drop_dest_policy_result, &drop_all_bypass);
if (matched_policy) {
returned_result->policy_id = matched_policy->id;
returned_result->routing_result = matched_policy->result;
memcpy(&returned_result->routing_result_parameter, &matched_policy->result_parameter, sizeof(returned_result->routing_result_parameter));
+ if (returned_override_euuid != NULL && info.used_responsible_pid && !(matched_policy->condition_mask & NECP_KERNEL_CONDITION_REAL_APP_ID)) {
+ uuid_copy(*returned_override_euuid, responsible_application_uuid);
+ }
} else {
bool drop_all = false;
if (necp_drop_all_order > 0 || info.drop_order > 0 || drop_dest_policy_result == NECP_KERNEL_POLICY_RESULT_DROP) {
}
if (inp->inp_flags2 & INP2_WANT_APP_POLICY && necp_kernel_socket_policies_condition_mask & NECP_KERNEL_CONDITION_APP_ID) {
+ u_int32_t responsible_application_id = 0;
+
struct necp_uuid_id_mapping *existing_mapping = necp_uuid_lookup_app_id_locked(((so->so_flags & SOF_DELEGATED) ? so->e_uuid : so->last_uuid));
if (existing_mapping) {
info->application_id = existing_mapping->id;
}
- if (!(so->so_flags & SOF_DELEGATED)) {
+#if defined(XNU_TARGET_OS_OSX)
+ if (so->so_rpid > 0) {
+ existing_mapping = necp_uuid_lookup_app_id_locked(so->so_ruuid);
+ if (existing_mapping != NULL) {
+ responsible_application_id = existing_mapping->id;
+ }
+ }
+#endif
+
+ if (responsible_application_id > 0) {
+ info->real_application_id = info->application_id;
+ info->application_id = responsible_application_id;
+ info->used_responsible_pid = true;
+ } else if (!(so->so_flags & SOF_DELEGATED)) {
info->real_application_id = info->application_id;
} else if (necp_kernel_socket_policies_condition_mask & NECP_KERNEL_CONDITION_REAL_APP_ID) {
struct necp_uuid_id_mapping *real_existing_mapping = necp_uuid_lookup_app_id_locked(so->last_uuid);
if (policy_search_array[i]->result == NECP_KERNEL_POLICY_RESULT_SKIP) {
skip_order = policy_search_array[i]->result_parameter.skip_policy_order;
skip_session_order = policy_search_array[i]->session_order + 1;
- if (skip_policy_id) {
+ if (skip_policy_id && *skip_policy_id == NECP_KERNEL_POLICY_ID_NONE) {
*skip_policy_id = policy_search_array[i]->id;
}
continue;
// Check for loopback exception
if (necp_socket_bypass(override_local_addr, override_remote_addr, inp)) {
+ if (inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_SOCKET_SCOPED) {
+ // If the previous policy result was "socket scoped", un-scope the socket.
+ inp->inp_flags &= ~INP_BOUND_IF;
+ inp->inp_boundifp = NULL;
+ }
// Mark socket as a pass
inp->inp_policyresult.policy_id = NECP_KERNEL_POLICY_ID_NO_MATCH;
inp->inp_policyresult.skip_policy_id = NECP_KERNEL_POLICY_ID_NO_MATCH;
lck_rw_lock_shared(&necp_kernel_policy_lock);
necp_socket_fillout_info_locked(inp, override_local_addr, override_remote_addr, override_bound_interface, drop_order, &info);
- inp->inp_policyresult.app_id = info.application_id;
// Check info
u_int32_t flowhash = necp_socket_calc_flowhash_locked(&info);
return inp->inp_policyresult.policy_id;
}
+ inp->inp_policyresult.app_id = info.application_id;
+
// Match socket to policy
- necp_kernel_policy_id skip_policy_id;
+ necp_kernel_policy_id skip_policy_id = NECP_KERNEL_POLICY_ID_NONE;
u_int32_t route_rule_id_array[MAX_AGGREGATE_ROUTE_RULES];
size_t route_rule_id_array_count = 0;
matched_policy = necp_socket_find_policy_match_with_info_locked(necp_kernel_socket_policies_map[NECP_SOCKET_MAP_APP_ID_TO_BUCKET(info.application_id)], &info, &filter_control_unit, route_rule_id_array, &route_rule_id_array_count, MAX_AGGREGATE_ROUTE_RULES, &service_action, &service, netagent_ids, NULL, NECP_MAX_NETAGENTS, NULL, 0, current_proc(), &skip_policy_id, inp->inp_route.ro_rt, &drop_dest_policy_result, &drop_all_bypass);
inp->inp_policyresult.results.result = matched_policy->result;
memcpy(&inp->inp_policyresult.results.result_parameter, &matched_policy->result_parameter, sizeof(matched_policy->result_parameter));
+ if (info.used_responsible_pid && (matched_policy->condition_mask & NECP_KERNEL_CONDITION_REAL_APP_ID)) {
+ inp->inp_policyresult.app_id = info.real_application_id;
+ }
+
if (necp_socket_is_connected(inp) &&
(matched_policy->result == NECP_KERNEL_POLICY_RESULT_DROP ||
(matched_policy->result == NECP_KERNEL_POLICY_RESULT_IP_TUNNEL && !necp_socket_uses_interface(inp, matched_policy->result_parameter.tunnel_interface_index)))) {
} else {
packet->m_pkthdr.necp_mtag.necp_route_rule_id = inp->inp_policyresult.results.route_rule_id;
}
- packet->m_pkthdr.necp_mtag.necp_app_id = inp->inp_policyresult.app_id;
+ packet->m_pkthdr.necp_mtag.necp_app_id = (inp->inp_policyresult.app_id > UINT16_MAX ? (inp->inp_policyresult.app_id - UINT16_MAX) : inp->inp_policyresult.app_id);
if (skip_policy_id != NECP_KERNEL_POLICY_ID_NONE &&
skip_policy_id != NECP_KERNEL_POLICY_ID_NO_MATCH) {
bool found_mapping = FALSE;
if (packet->m_pkthdr.necp_mtag.necp_app_id != 0) {
lck_rw_lock_shared(&necp_kernel_policy_lock);
- struct necp_uuid_id_mapping *entry = necp_uuid_lookup_uuid_with_app_id_locked(packet->m_pkthdr.necp_mtag.necp_app_id);
+ necp_app_id app_id = (packet->m_pkthdr.necp_mtag.necp_app_id < UINT16_MAX ? (packet->m_pkthdr.necp_mtag.necp_app_id + UINT16_MAX) : packet->m_pkthdr.necp_mtag.necp_app_id);
+ struct necp_uuid_id_mapping *entry = necp_uuid_lookup_uuid_with_app_id_locked(app_id);
if (entry != NULL) {
uuid_copy(app_uuid, entry->uuid);
found_mapping = true;
case NECP_SESSION_PRIORITY_CONTROL:
case NECP_SESSION_PRIORITY_PRIVILEGED_TUNNEL:
case NECP_SESSION_PRIORITY_HIGH:
+ case NECP_SESSION_PRIORITY_HIGH_1:
+ case NECP_SESSION_PRIORITY_HIGH_2:
+ case NECP_SESSION_PRIORITY_HIGH_3:
+ case NECP_SESSION_PRIORITY_HIGH_4:
+ case NECP_SESSION_PRIORITY_HIGH_RESTRICTED:
case NECP_SESSION_PRIORITY_DEFAULT:
case NECP_SESSION_PRIORITY_LOW:
if (tmp_drop_dest_policy.entry_count == 0) {
#define NECP_POLICY_RESULT_USE_NETAGENT 14 // netagent uuid_t
#define NECP_POLICY_RESULT_NETAGENT_SCOPED 15 // netagent uuid_t
#define NECP_POLICY_RESULT_SCOPED_DIRECT 16 // N/A, scopes to primary physical interface
-#define NECP_POLICY_RESULT_ALLOW_UNENTITLED 17 // N/A
+#define NECP_POLICY_RESULT_ALLOW_UNENTITLED 17 // N/A
-#define NECP_POLICY_RESULT_MAX NECP_POLICY_RESULT_ALLOW_UNENTITLED
+#define NECP_POLICY_RESULT_MAX NECP_POLICY_RESULT_ALLOW_UNENTITLED
+
+/*
+ * PASS Result Flags
+ */
+#define NECP_POLICY_PASS_NO_SKIP_IPSEC 0x01
/*
* Route Rules
#define NECP_SESSION_PRIORITY_UNKNOWN 0
#define NECP_SESSION_PRIORITY_CONTROL 1
-#define NECP_SESSION_PRIORITY_PRIVILEGED_TUNNEL 2
-#define NECP_SESSION_PRIORITY_HIGH 3
-#define NECP_SESSION_PRIORITY_DEFAULT 4
-#define NECP_SESSION_PRIORITY_LOW 5
-
+#define NECP_SESSION_PRIORITY_PRIVILEGED_TUNNEL 2
+#define NECP_SESSION_PRIORITY_HIGH 3
+#define NECP_SESSION_PRIORITY_HIGH_1 4
+#define NECP_SESSION_PRIORITY_HIGH_2 5
+#define NECP_SESSION_PRIORITY_HIGH_3 6
+#define NECP_SESSION_PRIORITY_HIGH_4 7
+#define NECP_SESSION_PRIORITY_HIGH_RESTRICTED 8
+#define NECP_SESSION_PRIORITY_DEFAULT 9
+#define NECP_SESSION_PRIORITY_LOW 10
#define NECP_SESSION_NUM_PRIORITIES NECP_SESSION_PRIORITY_LOW
typedef u_int32_t necp_policy_id;
u_int32_t index;
};
+#define NECP_USES_INTERFACE_OPTIONS_FOR_BROWSE 1
+
struct necp_client_interface_option {
u_int32_t interface_index;
u_int32_t interface_generation;
struct necp_client_endpoint *returned_v4_gateway,
struct necp_client_endpoint *returned_v6_gateway,
struct rtentry **returned_route, bool ignore_address,
- bool has_client);
+ bool has_client,
+ uuid_t *returned_override_euuid);
/*
* TLV utilities
*
#define NECP_KERNEL_POLICY_RESULT_USE_NETAGENT NECP_POLICY_RESULT_USE_NETAGENT
#define NECP_KERNEL_POLICY_RESULT_NETAGENT_SCOPED NECP_POLICY_RESULT_NETAGENT_SCOPED
#define NECP_KERNEL_POLICY_RESULT_SCOPED_DIRECT NECP_POLICY_RESULT_SCOPED_DIRECT
-#define NECP_KERNEL_POLICY_RESULT_ALLOW_UNENTITLED NECP_POLICY_RESULT_ALLOW_UNENTITLED
+#define NECP_KERNEL_POLICY_RESULT_ALLOW_UNENTITLED NECP_POLICY_RESULT_ALLOW_UNENTITLED
+
+#define NECP_KERNEL_POLICY_PASS_NO_SKIP_IPSEC NECP_POLICY_PASS_NO_SKIP_IPSEC
typedef struct {
u_int32_t identifier;
u_int32_t skip_policy_order;
u_int32_t route_rule_id;
u_int32_t netagent_id;
+ u_int32_t pass_flags;
necp_kernel_policy_service service;
} necp_kernel_policy_result_parameter;
void *agent_handle;
+ uuid_t override_euuid;
+
size_t parameters_length;
u_int8_t parameters[0];
&result, &flow->necp_flow_flags, NULL,
flow->interface_index,
&flow->local_addr, &flow->remote_addr, NULL, NULL,
- NULL, ignore_address, true);
+ NULL, ignore_address, true, NULL);
// Check for blocking agents
for (int i = 0; i < NECP_MAX_NETAGENTS; i++) {
client->interface_option_count = 0;
}
+static inline bool
+necp_netagent_is_required(const struct necp_client_parsed_parameters *parameters,
+ uuid_t *netagent_uuid)
+{
+ // Specific use agents only apply when required
+ bool required = false;
+ if (parameters != NULL) {
+ // Check required agent UUIDs
+ for (int i = 0; i < NECP_MAX_AGENT_PARAMETERS; i++) {
+ if (uuid_is_null(parameters->required_netagents[i])) {
+ break;
+ }
+ if (uuid_compare(parameters->required_netagents[i], *netagent_uuid) == 0) {
+ required = true;
+ break;
+ }
+ }
+
+ if (!required) {
+ // Check required agent types
+ bool fetched_type = false;
+ char netagent_domain[NETAGENT_DOMAINSIZE];
+ char netagent_type[NETAGENT_TYPESIZE];
+ memset(&netagent_domain, 0, NETAGENT_DOMAINSIZE);
+ memset(&netagent_type, 0, NETAGENT_TYPESIZE);
+
+ for (int i = 0; i < NECP_MAX_AGENT_PARAMETERS; i++) {
+ if (strlen(parameters->required_netagent_types[i].netagent_domain) == 0 ||
+ strlen(parameters->required_netagent_types[i].netagent_type) == 0) {
+ break;
+ }
+
+ if (!fetched_type) {
+ if (netagent_get_agent_domain_and_type(*netagent_uuid, netagent_domain, netagent_type)) {
+ fetched_type = TRUE;
+ } else {
+ break;
+ }
+ }
+
+ if ((strlen(parameters->required_netagent_types[i].netagent_domain) == 0 ||
+ strncmp(netagent_domain, parameters->required_netagent_types[i].netagent_domain, NETAGENT_DOMAINSIZE) == 0) &&
+ (strlen(parameters->required_netagent_types[i].netagent_type) == 0 ||
+ strncmp(netagent_type, parameters->required_netagent_types[i].netagent_type, NETAGENT_TYPESIZE) == 0)) {
+ required = true;
+ break;
+ }
+ }
+ }
+ }
+
+ return required;
+}
+
static bool
necp_netagent_applies_to_client(struct necp_client *client,
const struct necp_client_parsed_parameters *parameters,
if (flags & NETAGENT_FLAG_SPECIFIC_USE_ONLY) {
// Specific use agents only apply when required
- bool required = FALSE;
- if (parameters != NULL) {
- // Check required agent UUIDs
- for (int i = 0; i < NECP_MAX_AGENT_PARAMETERS; i++) {
- if (uuid_is_null(parameters->required_netagents[i])) {
- break;
- }
- if (uuid_compare(parameters->required_netagents[i], *netagent_uuid) == 0) {
- required = TRUE;
- break;
- }
- }
-
- if (!required) {
- // Check required agent types
- bool fetched_type = FALSE;
- char netagent_domain[NETAGENT_DOMAINSIZE];
- char netagent_type[NETAGENT_TYPESIZE];
- memset(&netagent_domain, 0, NETAGENT_DOMAINSIZE);
- memset(&netagent_type, 0, NETAGENT_TYPESIZE);
-
- for (int i = 0; i < NECP_MAX_AGENT_PARAMETERS; i++) {
- if (strlen(parameters->required_netagent_types[i].netagent_domain) == 0 ||
- strlen(parameters->required_netagent_types[i].netagent_type) == 0) {
- break;
- }
-
- if (!fetched_type) {
- if (netagent_get_agent_domain_and_type(*netagent_uuid, netagent_domain, netagent_type)) {
- fetched_type = TRUE;
- } else {
- break;
- }
- }
-
- if ((strlen(parameters->required_netagent_types[i].netagent_domain) == 0 ||
- strncmp(netagent_domain, parameters->required_netagent_types[i].netagent_domain, NETAGENT_DOMAINSIZE) == 0) &&
- (strlen(parameters->required_netagent_types[i].netagent_type) == 0 ||
- strncmp(netagent_type, parameters->required_netagent_types[i].netagent_type, NETAGENT_TYPESIZE) == 0)) {
- required = TRUE;
- break;
- }
- }
- }
- }
-
- applies = required;
+ applies = necp_netagent_is_required(parameters, netagent_uuid);
} else {
applies = TRUE;
}
}
}
+static void
+necp_client_add_browse_interface_options(struct necp_client *client,
+ const struct necp_client_parsed_parameters *parsed_parameters,
+ ifnet_t ifp)
+{
+ if (ifp != NULL && ifp->if_agentids != NULL) {
+ for (u_int32_t i = 0; i < ifp->if_agentcount; i++) {
+ if (uuid_is_null(ifp->if_agentids[i])) {
+ continue;
+ }
+
+ u_int32_t flags = netagent_get_flags(ifp->if_agentids[i]);
+ if ((flags & NETAGENT_FLAG_REGISTERED) &&
+ (flags & NETAGENT_FLAG_ACTIVE) &&
+ (flags & NETAGENT_FLAG_SUPPORTS_BROWSE) &&
+ (!(flags & NETAGENT_FLAG_SPECIFIC_USE_ONLY) ||
+ necp_netagent_is_required(parsed_parameters, &ifp->if_agentids[i]))) {
+ necp_client_add_interface_option_if_needed(client, ifp->if_index, ifnet_get_generation(ifp), &ifp->if_agentids[i]);
+
+ // Finding one is enough
+ break;
+ }
+ }
+ }
+}
+
static inline bool
necp_client_address_is_valid(struct sockaddr *address)
{
}
error = necp_application_find_policy_match_internal(proc, client->parameters, (u_int32_t)client->parameters_length,
- &result, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, true, true);
+ &result, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, true, true, NULL);
proc_rele(proc);
proc = PROC_NULL;
u_int32_t *flags,
u_int32_t *reason,
struct necp_client_endpoint *v4_gateway,
- struct necp_client_endpoint *v6_gateway)
+ struct necp_client_endpoint *v6_gateway,
+ uuid_t *override_euuid)
{
struct rtentry *route = NULL;
result, flags, reason, matching_if_index,
NULL, NULL,
v4_gateway, v6_gateway,
- &route, false, true);
+ &route, false, true,
+ override_euuid);
if (error != 0) {
if (route != NULL) {
rtfree(route);
// Calculate the policy result
struct necp_client_endpoint v4_gateway = {};
struct necp_client_endpoint v6_gateway = {};
- if (!necp_calculate_client_result(proc, client, parsed_parameters, &result, &flags, &reason, &v4_gateway, &v6_gateway)) {
+ uuid_t override_euuid;
+ uuid_clear(override_euuid);
+ if (!necp_calculate_client_result(proc, client, parsed_parameters, &result, &flags, &reason, &v4_gateway, &v6_gateway, &override_euuid)) {
FREE(parsed_parameters, M_NECP);
return FALSE;
}
if (necp_update_parsed_parameters(parsed_parameters, &result)) {
// Changed the parameters based on result, try again (only once)
- if (!necp_calculate_client_result(proc, client, parsed_parameters, &result, &flags, &reason, &v4_gateway, &v6_gateway)) {
+ if (!necp_calculate_client_result(proc, client, parsed_parameters, &result, &flags, &reason, &v4_gateway, &v6_gateway, &override_euuid)) {
FREE(parsed_parameters, M_NECP);
return FALSE;
}
// Save the last policy id on the client
client->policy_id = result.policy_id;
+ uuid_copy(client->override_euuid, override_euuid);
if ((parsed_parameters->flags & NECP_CLIENT_PARAMETER_FLAG_MULTIPATH) ||
+ (parsed_parameters->flags & NECP_CLIENT_PARAMETER_FLAG_BROWSE) ||
((parsed_parameters->flags & NECP_CLIENT_PARAMETER_FLAG_LISTENER) &&
result.routing_result != NECP_KERNEL_POLICY_RESULT_SOCKET_SCOPED)) {
client->allow_multiple_flows = TRUE;
}
}
}
+ } else if (parsed_parameters->flags & NECP_CLIENT_PARAMETER_FLAG_BROWSE) {
+ if (result.routing_result == NECP_KERNEL_POLICY_RESULT_SOCKET_SCOPED) {
+ if (direct_interface != NULL) {
+ // Add browse option if it has an agent
+ necp_client_add_browse_interface_options(client, parsed_parameters, direct_interface);
+ }
+ } else {
+ // Get browse interface options from global list
+ struct ifnet *browse_interface = NULL;
+ TAILQ_FOREACH(browse_interface, &ifnet_head, if_link) {
+ if (necp_ifnet_matches_parameters(browse_interface, parsed_parameters, 0, NULL, true, false)) {
+ necp_client_add_browse_interface_options(client, parsed_parameters, browse_interface);
+ }
+ }
+ }
}
// Add agents
}
parameters->ethertype = parsed_parameters.ethertype;
parameters->traffic_class = parsed_parameters.traffic_class;
- uuid_copy(parameters->euuid, parsed_parameters.effective_uuid);
+ if (uuid_is_null(client->override_euuid)) {
+ uuid_copy(parameters->euuid, parsed_parameters.effective_uuid);
+ } else {
+ uuid_copy(parameters->euuid, client->override_euuid);
+ }
parameters->is_listener = (parsed_parameters.flags & NECP_CLIENT_PARAMETER_FLAG_LISTENER) ? 1 : 0;
parameters->is_interpose = (parsed_parameters.flags & NECP_CLIENT_PARAMETER_FLAG_INTERPOSE) ? 1 : 0;
parameters->is_custom_ether = (parsed_parameters.flags & NECP_CLIENT_PARAMETER_FLAG_CUSTOM_ETHER) ? 1 : 0;
}
error = necp_application_find_policy_match_internal(p, parameters, uap->parameters_size,
- &returned_result, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, false, false);
+ &returned_result, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, false, false, NULL);
if (error) {
goto done;
}
{
lck_rw_lock_exclusive(&netagent_lock);
+ if (session->wrapper != NULL) {
+ lck_rw_done(&netagent_lock);
+ return EINVAL;
+ }
+
new_wrapper->control_unit = session->control_unit;
new_wrapper->event_handler = session->event_handler;
new_wrapper->event_context = session->event_context;
{
int data_size = 0;
struct netagent_wrapper *new_wrapper = NULL;
+ uuid_t registered_uuid;
struct netagent_session *session = (struct netagent_session *)_session;
if (session == NULL) {
memset(new_wrapper, 0, sizeof(*new_wrapper) + data_size);
__nochk_memcpy(&new_wrapper->netagent, agent, sizeof(struct netagent) + data_size);
+ uuid_copy(registered_uuid, new_wrapper->netagent.netagent_uuid);
+
int error = netagent_handle_register_inner(session, new_wrapper);
if (error != 0) {
FREE(new_wrapper, M_NETAGENT);
}
NETAGENTLOG0(LOG_DEBUG, "Registered new agent");
- netagent_post_event(new_wrapper->netagent.netagent_uuid, KEV_NETAGENT_REGISTERED, TRUE, false);
+ netagent_post_event(registered_uuid, KEV_NETAGENT_REGISTERED, TRUE, false);
return 0;
}
struct netagent_wrapper *new_wrapper = NULL;
u_int32_t response_error = 0;
struct netagent *register_netagent = (struct netagent *)(void *)payload;
+ uuid_t registered_uuid;
if (session == NULL) {
NETAGENTLOG0(LOG_ERR, "Failed to find session");
memset(new_wrapper, 0, sizeof(*new_wrapper) + data_size);
__nochk_memcpy(&new_wrapper->netagent, register_netagent, sizeof(struct netagent) + data_size);
+ uuid_copy(registered_uuid, new_wrapper->netagent.netagent_uuid);
+
response_error = netagent_handle_register_inner(session, new_wrapper);
if (response_error != 0) {
FREE(new_wrapper, M_NETAGENT);
}
NETAGENTLOG0(LOG_DEBUG, "Registered new agent");
- netagent_post_event(new_wrapper->netagent.netagent_uuid, KEV_NETAGENT_REGISTERED, TRUE, false);
+ netagent_post_event(registered_uuid, KEV_NETAGENT_REGISTERED, TRUE, false);
done:
return response_error;
int data_size = 0;
struct netagent_wrapper *new_wrapper = NULL;
u_int32_t response_error = NETAGENT_MESSAGE_ERROR_INTERNAL;
- uuid_t netagent_uuid;
- uuid_clear(netagent_uuid);
+ uuid_t registered_uuid;
if (session == NULL) {
NETAGENTLOG0(LOG_ERR, "Failed to find session");
goto fail;
}
- (void)netagent_handle_register_inner(session, new_wrapper);
+ uuid_copy(registered_uuid, new_wrapper->netagent.netagent_uuid);
+
+ error = netagent_handle_register_inner(session, new_wrapper);
+ if (error) {
+ NETAGENTLOG(LOG_ERR, "Failed to register agent: %d", error);
+ FREE(new_wrapper, M_NETAGENT);
+ response_error = NETAGENT_MESSAGE_ERROR_INTERNAL;
+ goto fail;
+ }
NETAGENTLOG0(LOG_DEBUG, "Registered new agent");
netagent_send_success_response(session, NETAGENT_MESSAGE_TYPE_REGISTER, message_id);
- netagent_post_event(new_wrapper->netagent.netagent_uuid, KEV_NETAGENT_REGISTERED, TRUE, false);
+ netagent_post_event(registered_uuid, KEV_NETAGENT_REGISTERED, TRUE, false);
return;
fail:
netagent_send_error_response(session, NETAGENT_MESSAGE_TYPE_REGISTER, message_id, response_error);
u_int8_t agent_changed;
int data_size = 0;
struct netagent_wrapper *new_wrapper = NULL;
+ bool should_update_immediately;
+ uuid_t updated_uuid;
struct netagent_session *session = (struct netagent_session *)_session;
if (session == NULL) {
memset(new_wrapper, 0, sizeof(*new_wrapper) + data_size);
__nochk_memcpy(&new_wrapper->netagent, agent, sizeof(struct netagent) + data_size);
+ uuid_copy(updated_uuid, new_wrapper->netagent.netagent_uuid);
+ should_update_immediately = (NETAGENT_FLAG_UPDATE_IMMEDIATELY == (new_wrapper->netagent.netagent_flags & NETAGENT_FLAG_UPDATE_IMMEDIATELY));
+
int error = netagent_handle_update_inner(session, new_wrapper, data_size, &agent_changed, kNetagentErrorDomainPOSIX);
if (error == 0) {
- bool should_update_immediately = (NETAGENT_FLAG_UPDATE_IMMEDIATELY == (session->wrapper->netagent.netagent_flags & NETAGENT_FLAG_UPDATE_IMMEDIATELY));
- netagent_post_event(session->wrapper->netagent.netagent_uuid, KEV_NETAGENT_UPDATED, agent_changed, should_update_immediately);
+ netagent_post_event(updated_uuid, KEV_NETAGENT_UPDATED, agent_changed, should_update_immediately);
if (agent_changed == FALSE) {
// The session wrapper does not need the "new_wrapper" as nothing changed
FREE(new_wrapper, M_NETAGENT);
errno_t response_error = 0;
struct netagent *update_netagent = (struct netagent *)(void *)payload;
u_int8_t agent_changed;
+ bool should_update_immediately;
+ uuid_t updated_uuid;
if (session == NULL) {
NETAGENTLOG0(LOG_ERR, "Failed to find session");
memset(new_wrapper, 0, sizeof(*new_wrapper) + data_size);
__nochk_memcpy(&new_wrapper->netagent, update_netagent, sizeof(struct netagent) + data_size);
+ uuid_copy(updated_uuid, new_wrapper->netagent.netagent_uuid);
+ should_update_immediately = (NETAGENT_FLAG_UPDATE_IMMEDIATELY == (new_wrapper->netagent.netagent_flags & NETAGENT_FLAG_UPDATE_IMMEDIATELY));
+
response_error = netagent_handle_update_inner(session, new_wrapper, data_size, &agent_changed, kNetagentErrorDomainPOSIX);
if (response_error == 0) {
- bool should_update_immediately = (NETAGENT_FLAG_UPDATE_IMMEDIATELY == (session->wrapper->netagent.netagent_flags & NETAGENT_FLAG_UPDATE_IMMEDIATELY));
- netagent_post_event(session->wrapper->netagent.netagent_uuid, KEV_NETAGENT_UPDATED, agent_changed, should_update_immediately);
+ netagent_post_event(updated_uuid, KEV_NETAGENT_UPDATED, agent_changed, should_update_immediately);
if (agent_changed == FALSE) {
// The session wrapper does not need the "new_wrapper" as nothing changed
FREE(new_wrapper, M_NETAGENT);
struct netagent_wrapper *new_wrapper = NULL;
u_int32_t response_error = NETAGENT_MESSAGE_ERROR_INTERNAL;
u_int8_t agent_changed;
+ uuid_t updated_uuid;
+ bool should_update_immediately;
if (session == NULL) {
NETAGENTLOG0(LOG_ERR, "Failed to find session");
goto fail;
}
+ uuid_copy(updated_uuid, new_wrapper->netagent.netagent_uuid);
+ should_update_immediately = (NETAGENT_FLAG_UPDATE_IMMEDIATELY == (new_wrapper->netagent.netagent_flags & NETAGENT_FLAG_UPDATE_IMMEDIATELY));
+
response_error = netagent_handle_update_inner(session, new_wrapper, data_size, &agent_changed, kNetagentErrorDomainUserDefined);
if (response_error != 0) {
FREE(new_wrapper, M_NETAGENT);
}
netagent_send_success_response(session, NETAGENT_MESSAGE_TYPE_UPDATE, message_id);
- bool should_update_immediately = (NETAGENT_FLAG_UPDATE_IMMEDIATELY == (session->wrapper->netagent.netagent_flags & NETAGENT_FLAG_UPDATE_IMMEDIATELY));
- netagent_post_event(session->wrapper->netagent.netagent_uuid, KEV_NETAGENT_UPDATED, agent_changed, should_update_immediately);
+ netagent_post_event(updated_uuid, KEV_NETAGENT_UPDATED, agent_changed, should_update_immediately);
if (agent_changed == FALSE) {
// The session wrapper does not need the "new_wrapper" as nothing changed
#define DHCP_INFINITE_LEASE ((dhcp_lease_t)-1)
#define DHCP_INFINITE_TIME ((dhcp_time_secs_t)-1)
-#define DHCP_FLAGS_BROADCAST ((u_short)0x0001)
+#define DHCP_FLAGS_BROADCAST ((u_int16_t)0x8000)
#endif /* _NETINET_DHCP_H */
#include <net/route.h>
#include <net/flowhash.h>
#include <net/ntstat.h>
+#include <net/content_filter.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/tcp.h>
#define APPLE_WEBCLIP_ID_PREFIX "com.apple.webapp"
static uint16_t
-flow_divert_trie_search(struct flow_divert_trie *trie, uint8_t *string_bytes)
+flow_divert_trie_search(struct flow_divert_trie *trie, const uint8_t *string_bytes)
{
uint16_t current = trie->root;
uint16_t string_idx = 0;
return current; /* Got an exact match */
} else if (string_idx == strlen(APPLE_WEBCLIP_ID_PREFIX) &&
0 == strncmp((const char *)string_bytes, APPLE_WEBCLIP_ID_PREFIX, string_idx)) {
- string_bytes[string_idx] = '\0';
return current; /* Got an apple webclip id prefix match */
} else if (TRIE_NODE(trie, current).child_map != NULL_TRIE_IDX) {
next = TRIE_CHILD(trie, current, string_bytes[string_idx]);
}
static int
-flow_divert_get_src_proc(struct socket *so, proc_t *proc)
+flow_divert_add_proc_info(struct flow_divert_pcb *fd_cb, proc_t proc, const char *signing_id, mbuf_t connect_packet, bool is_effective)
{
- int release = 0;
+ int error = 0;
+ int cdhash_error = 0;
+ unsigned char cdhash[SHA1_RESULTLEN] = { 0 };
+ audit_token_t audit_token = {};
+ const char *proc_cs_id = signing_id;
- if (so->so_flags & SOF_DELEGATED) {
- if ((*proc)->p_pid != so->e_pid) {
- *proc = proc_find(so->e_pid);
- release = 1;
- } else if (uuid_compare((*proc)->p_uuid, so->e_uuid)) {
- *proc = flow_divert_find_proc_by_uuid(so->e_uuid);
- release = 1;
+ proc_lock(proc);
+
+ if (proc_cs_id == NULL) {
+ if (proc->p_csflags & (CS_VALID | CS_DEBUGGED)) {
+ proc_cs_id = cs_identity_get(proc);
+ } else {
+ FDLOG0(LOG_ERR, fd_cb, "Signature of proc is invalid");
+ }
+ }
+
+ if (is_effective) {
+ lck_rw_lock_shared(&fd_cb->group->lck);
+ if (!(fd_cb->group->flags & FLOW_DIVERT_GROUP_FLAG_NO_APP_MAP)) {
+ if (proc_cs_id != NULL) {
+ uint16_t result = flow_divert_trie_search(&fd_cb->group->signing_id_trie, (const uint8_t *)proc_cs_id);
+ if (result == NULL_TRIE_IDX) {
+ FDLOG(LOG_WARNING, fd_cb, "%s did not match", proc_cs_id);
+ error = EPERM;
+ } else {
+ FDLOG(LOG_INFO, fd_cb, "%s matched", proc_cs_id);
+ }
+ } else {
+ error = EPERM;
+ }
+ }
+ lck_rw_done(&fd_cb->group->lck);
+ }
+
+ if (error != 0) {
+ goto done;
+ }
+
+ /*
+ * If signing_id is not NULL then it came from the flow divert token and will be added
+ * as part of the token, so there is no need to add it here.
+ */
+ if (signing_id == NULL && proc_cs_id != NULL) {
+ error = flow_divert_packet_append_tlv(connect_packet,
+ (is_effective ? FLOW_DIVERT_TLV_SIGNING_ID : FLOW_DIVERT_TLV_APP_REAL_SIGNING_ID),
+ strlen(proc_cs_id),
+ proc_cs_id);
+ if (error != 0) {
+ FDLOG(LOG_ERR, fd_cb, "failed to append the signing ID: %d", error);
+ goto done;
}
- } else if (*proc == PROC_NULL) {
- *proc = current_proc();
}
- if (*proc != PROC_NULL) {
- if ((*proc)->p_pid == 0) {
- if (release) {
- proc_rele(*proc);
+ cdhash_error = proc_getcdhash(proc, cdhash);
+ if (cdhash_error == 0) {
+ error = flow_divert_packet_append_tlv(connect_packet,
+ (is_effective ? FLOW_DIVERT_TLV_CDHASH : FLOW_DIVERT_TLV_APP_REAL_CDHASH),
+ sizeof(cdhash),
+ cdhash);
+ if (error) {
+ FDLOG(LOG_ERR, fd_cb, "failed to append the cdhash: %d", error);
+ goto done;
+ }
+ } else {
+ FDLOG(LOG_ERR, fd_cb, "failed to get the cdhash: %d", cdhash_error);
+ }
+
+ task_t task = proc_task(proc);
+ if (task != TASK_NULL) {
+ mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
+ kern_return_t rc = task_info(task, TASK_AUDIT_TOKEN, (task_info_t)&audit_token, &count);
+ if (rc == KERN_SUCCESS) {
+ int append_error = flow_divert_packet_append_tlv(connect_packet,
+ (is_effective ? FLOW_DIVERT_TLV_APP_AUDIT_TOKEN : FLOW_DIVERT_TLV_APP_REAL_AUDIT_TOKEN),
+ sizeof(audit_token_t),
+ &audit_token);
+ if (append_error) {
+ FDLOG(LOG_ERR, fd_cb, "failed to append app audit token: %d", append_error);
}
- release = 0;
- *proc = PROC_NULL;
}
}
- return release;
+done:
+ proc_unlock(proc);
+
+ return error;
+}
+
+static int
+flow_divert_add_all_proc_info(struct flow_divert_pcb *fd_cb, struct socket *so, proc_t proc, const char *signing_id, mbuf_t connect_packet)
+{
+ int error = 0;
+ proc_t effective_proc = PROC_NULL;
+ proc_t responsible_proc = PROC_NULL;
+ proc_t real_proc = proc_find(so->last_pid);
+ bool release_real_proc = true;
+
+ proc_t src_proc = PROC_NULL;
+ proc_t real_src_proc = PROC_NULL;
+
+ if (real_proc == PROC_NULL) {
+ FDLOG(LOG_ERR, fd_cb, "failed to find the real proc record for %d", so->last_pid);
+ release_real_proc = false;
+ real_proc = proc;
+ if (real_proc == PROC_NULL) {
+ real_proc = current_proc();
+ }
+ }
+
+ if (so->so_flags & SOF_DELEGATED) {
+ if (real_proc->p_pid != so->e_pid) {
+ effective_proc = proc_find(so->e_pid);
+ } else if (uuid_compare(real_proc->p_uuid, so->e_uuid)) {
+ effective_proc = flow_divert_find_proc_by_uuid(so->e_uuid);
+ }
+ }
+
+#if defined(XNU_TARGET_OS_OSX)
+ lck_rw_lock_shared(&fd_cb->group->lck);
+ if (!(fd_cb->group->flags & FLOW_DIVERT_GROUP_FLAG_NO_APP_MAP)) {
+ if (so->so_rpid > 0) {
+ responsible_proc = proc_find(so->so_rpid);
+ }
+ }
+ lck_rw_done(&fd_cb->group->lck);
+#endif
+
+ real_src_proc = real_proc;
+
+ if (responsible_proc != PROC_NULL) {
+ src_proc = responsible_proc;
+ if (effective_proc != NULL) {
+ real_src_proc = effective_proc;
+ }
+ } else if (effective_proc != PROC_NULL) {
+ src_proc = effective_proc;
+ } else {
+ src_proc = real_proc;
+ }
+
+ error = flow_divert_add_proc_info(fd_cb, src_proc, signing_id, connect_packet, true);
+ if (error != 0) {
+ goto done;
+ }
+
+ if (real_src_proc != NULL && real_src_proc != src_proc) {
+ error = flow_divert_add_proc_info(fd_cb, real_src_proc, NULL, connect_packet, false);
+ if (error != 0) {
+ goto done;
+ }
+ }
+
+done:
+ if (responsible_proc != PROC_NULL) {
+ proc_rele(responsible_proc);
+ }
+
+ if (effective_proc != PROC_NULL) {
+ proc_rele(effective_proc);
+ }
+
+ if (real_proc != PROC_NULL && release_real_proc) {
+ proc_rele(real_proc);
+ }
+
+ return error;
}
static int
static int
flow_divert_create_connect_packet(struct flow_divert_pcb *fd_cb, struct sockaddr *to, struct socket *so, proc_t p, mbuf_t *out_connect_packet)
{
- int error = 0;
- int flow_type = 0;
+ int error = 0;
+ int flow_type = 0;
char *signing_id = NULL;
- int free_signing_id = 0;
mbuf_t connect_packet = NULL;
- proc_t src_proc = p;
- int release_proc = 0;
+ cfil_sock_id_t cfil_sock_id = CFIL_SOCK_ID_NONE;
+ const void *cfil_id = NULL;
+ size_t cfil_id_size = 0;
+ struct inpcb *inp = sotoinpcb(so);
+ struct ifnet *ifp = NULL;
error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CONNECT, &connect_packet);
if (error) {
goto done;
}
- error = EPERM;
if (fd_cb->connect_token != NULL && (fd_cb->flags & FLOW_DIVERT_HAS_HMAC)) {
uint32_t sid_size = 0;
if (signing_id != NULL) {
flow_divert_packet_get_tlv(fd_cb->connect_token, 0, FLOW_DIVERT_TLV_SIGNING_ID, sid_size, signing_id, NULL);
FDLOG(LOG_INFO, fd_cb, "Got %s from token", signing_id);
- free_signing_id = 1;
}
}
}
socket_unlock(so, 0);
- release_proc = flow_divert_get_src_proc(so, &src_proc);
- if (src_proc != PROC_NULL) {
- proc_lock(src_proc);
- if (signing_id == NULL) {
- if (src_proc->p_csflags & (CS_VALID | CS_DEBUGGED)) {
- const char * cs_id;
- cs_id = cs_identity_get(src_proc);
- signing_id = __DECONST(char *, cs_id);
- } else {
- FDLOG0(LOG_WARNING, fd_cb, "Signature is invalid");
- }
- }
- } else {
- FDLOG0(LOG_WARNING, fd_cb, "Failed to determine the current proc");
- }
-
- if (signing_id != NULL) {
- uint16_t result = NULL_TRIE_IDX;
- lck_rw_lock_shared(&fd_cb->group->lck);
- if (fd_cb->group->flags & FLOW_DIVERT_GROUP_FLAG_NO_APP_MAP) {
- result = 1;
- } else {
- result = flow_divert_trie_search(&fd_cb->group->signing_id_trie, (uint8_t *)signing_id);
- }
- lck_rw_done(&fd_cb->group->lck);
- if (result != NULL_TRIE_IDX) {
- error = 0;
- FDLOG(LOG_INFO, fd_cb, "%s matched", signing_id);
-
- error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_SIGNING_ID, strlen(signing_id), signing_id);
- if (error == 0) {
- if (src_proc != PROC_NULL) {
- unsigned char cdhash[SHA1_RESULTLEN];
- error = proc_getcdhash(src_proc, cdhash);
- if (error == 0) {
- error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_CDHASH, sizeof(cdhash), cdhash);
- if (error) {
- FDLOG(LOG_ERR, fd_cb, "failed to append the cdhash: %d", error);
- }
- } else {
- FDLOG(LOG_ERR, fd_cb, "failed to get the cdhash: %d", error);
- }
- }
- } else {
- FDLOG(LOG_ERR, fd_cb, "failed to append the signing ID: %d", error);
- }
- } else {
- FDLOG(LOG_WARNING, fd_cb, "%s did not match", signing_id);
- }
- } else {
- FDLOG0(LOG_WARNING, fd_cb, "Failed to get the code signing identity");
- if (fd_cb->group->flags & FLOW_DIVERT_GROUP_FLAG_NO_APP_MAP) {
- error = 0;
- }
- }
+ error = flow_divert_add_all_proc_info(fd_cb, so, p, signing_id, connect_packet);
- if (error == 0 && src_proc != PROC_NULL) {
- task_t task = proc_task(src_proc);
- if (task != TASK_NULL) {
- audit_token_t audit_token;
- mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
- kern_return_t rc = task_info(task, TASK_AUDIT_TOKEN, (task_info_t)&audit_token, &count);
- if (rc == KERN_SUCCESS) {
- error = flow_divert_packet_append_tlv(connect_packet,
- FLOW_DIVERT_TLV_APP_AUDIT_TOKEN,
- sizeof(audit_token_t),
- &audit_token);
- if (error) {
- FDLOG(LOG_ERR, fd_cb, "failed to append app audit token: %d", error);
- error = 0; /* do not treat this as fatal error, proceed */
- }
- } else {
- FDLOG(LOG_ERR, fd_cb, "failed to retrieve app audit token: %d", rc);
- }
- }
- }
-
- if (src_proc != PROC_NULL) {
- proc_unlock(src_proc);
- if (release_proc) {
- proc_rele(src_proc);
- }
- }
socket_lock(so, 0);
- if (free_signing_id) {
+ if (signing_id != NULL) {
FREE(signing_id, M_TEMP);
}
if (error) {
+ FDLOG(LOG_ERR, fd_cb, "Failed to add source proc info: %d", error);
goto done;
}
goto done;
}
- if (fd_cb->so->so_flags & SOF_DELEGATED) {
- error = flow_divert_packet_append_tlv(connect_packet,
- FLOW_DIVERT_TLV_PID,
- sizeof(fd_cb->so->e_pid),
- &fd_cb->so->e_pid);
- if (error) {
- goto done;
- }
-
- error = flow_divert_packet_append_tlv(connect_packet,
- FLOW_DIVERT_TLV_UUID,
- sizeof(fd_cb->so->e_uuid),
- &fd_cb->so->e_uuid);
- if (error) {
- goto done;
- }
- } else {
- error = flow_divert_packet_append_tlv(connect_packet,
- FLOW_DIVERT_TLV_PID,
- sizeof(fd_cb->so->e_pid),
- &fd_cb->so->last_pid);
- if (error) {
- goto done;
- }
-
- error = flow_divert_packet_append_tlv(connect_packet,
- FLOW_DIVERT_TLV_UUID,
- sizeof(fd_cb->so->e_uuid),
- &fd_cb->so->last_uuid);
- if (error) {
- goto done;
- }
- }
-
if (fd_cb->connect_token != NULL) {
unsigned int token_len = m_length(fd_cb->connect_token);
mbuf_concatenate(connect_packet, fd_cb->connect_token);
error = EALREADY;
goto done;
} else {
- struct inpcb *inp = sotoinpcb(so);
if (flow_divert_has_pcb_local_address(inp)) {
error = flow_divert_inp_to_sockaddr(inp, &fd_cb->local_address);
if (error) {
}
}
+ if ((inp->inp_flags | INP_BOUND_IF) && inp->inp_boundifp != NULL) {
+ ifp = inp->inp_boundifp;
+ } else if (inp->inp_last_outifp != NULL) {
+ ifp = inp->inp_last_outifp;
+ }
+
+ if (ifp != NULL) {
+ uint32_t flow_if_index = ifp->if_index;
+ error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_OUT_IF_INDEX,
+ sizeof(flow_if_index), &flow_if_index);
+ if (error) {
+ goto done;
+ }
+ }
+
if (so->so_flags1 & SOF1_DATA_IDEMPOTENT) {
uint32_t flags = FLOW_DIVERT_TOKEN_FLAG_TFO;
error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_FLAGS, sizeof(flags), &flags);
}
}
+ cfil_sock_id = cfil_sock_id_from_socket(so);
+ if (cfil_sock_id != CFIL_SOCK_ID_NONE) {
+ cfil_id = &cfil_sock_id;
+ cfil_id_size = sizeof(cfil_sock_id);
+ } else if (so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) {
+ cfil_id = &inp->necp_client_uuid;
+ cfil_id_size = sizeof(inp->necp_client_uuid);
+ }
+
+ if (cfil_id != NULL && cfil_id_size > 0 && cfil_id_size <= sizeof(uuid_t)) {
+ error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_CFIL_ID, cfil_id_size, cfil_id);
+ if (error) {
+ goto done;
+ }
+ }
+
done:
if (!error) {
*out_connect_packet = connect_packet;
flow_divert_disconnect_socket(fd_cb->so);
} else if (!(fd_cb->so->so_state & SS_CANTRCVMORE)) {
if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
- if (sbappendstream(&fd_cb->so->so_rcv, data)) {
- fd_cb->bytes_received += data_size;
- flow_divert_add_data_statistics(fd_cb, data_size, FALSE);
- fd_cb->sb_size = fd_cb->so->so_rcv.sb_cc;
+ int appended = sbappendstream(&fd_cb->so->so_rcv, data);
+ fd_cb->bytes_received += data_size;
+ flow_divert_add_data_statistics(fd_cb, data_size, FALSE);
+ fd_cb->sb_size += data_size;
+ if (appended) {
sorwakeup(fd_cb->so);
- data = NULL;
- } else {
- FDLOG0(LOG_ERR, fd_cb, "received data, but appendstream failed");
}
+ data = NULL;
} else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
struct sockaddr *append_sa;
mbuf_t mctl;
mctl = flow_divert_get_control_mbuf(fd_cb);
int append_error = 0;
- if (sbappendaddr(&fd_cb->so->so_rcv, append_sa, data, mctl, &append_error)) {
+ if (sbappendaddr(&fd_cb->so->so_rcv, append_sa, data, mctl, &append_error) || append_error == EJUSTRETURN) {
fd_cb->bytes_received += data_size;
flow_divert_add_data_statistics(fd_cb, data_size, FALSE);
- fd_cb->sb_size = fd_cb->so->so_rcv.sb_cc;
- sorwakeup(fd_cb->so);
+ fd_cb->sb_size += data_size;
+ if (append_error == 0) {
+ sorwakeup(fd_cb->so);
+ }
data = NULL;
- } else if (append_error != EJUSTRETURN) {
- FDLOG0(LOG_ERR, fd_cb, "received data, but sbappendaddr failed");
}
if (!error) {
FREE(append_sa, M_TEMP);
static boolean_t
flow_divert_has_pcb_local_address(const struct inpcb *inp)
{
- return inp->inp_lport != 0
- && (inp->inp_laddr.s_addr != INADDR_ANY || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr));
+ return inp->inp_lport != 0;
}
static errno_t
#define FLOW_DIVERT_TLV_TARGET_PORT 23
#define FLOW_DIVERT_TLV_CDHASH 24
#define FLOW_DIVERT_TLV_SIGNING_ID 25
-#define FLOW_DIVERT_TLV_PID 26
-#define FLOW_DIVERT_TLV_UUID 27
+
+
#define FLOW_DIVERT_TLV_PREFIX_COUNT 28
#define FLOW_DIVERT_TLV_FLAGS 29
#define FLOW_DIVERT_TLV_FLOW_TYPE 30
#define FLOW_DIVERT_TLV_APP_DATA 31
#define FLOW_DIVERT_TLV_APP_AUDIT_TOKEN 32
+#define FLOW_DIVERT_TLV_APP_REAL_SIGNING_ID 33
+#define FLOW_DIVERT_TLV_APP_REAL_CDHASH 34
+#define FLOW_DIVERT_TLV_APP_REAL_AUDIT_TOKEN 35
+#define FLOW_DIVERT_TLV_CFIL_ID 36
#define FLOW_DIVERT_FLOW_TYPE_TCP 1
#define FLOW_DIVERT_FLOW_TYPE_UDP 3
uint32_t pflags = 0;
int32_t ogencnt;
int err = 0;
+ uint8_t *lookup_uuid = NULL;
if (!net_io_policy_uuid ||
so == NULL || inp->inp_state == INPCB_STATE_DEAD) {
return 0;
}
+#if defined(XNU_TARGET_OS_OSX)
+ if (so->so_rpid > 0) {
+ lookup_uuid = so->so_ruuid;
+ }
+#endif
+ if (lookup_uuid == NULL) {
+ lookup_uuid = ((so->so_flags & SOF_DELEGATED) ? so->e_uuid : so->last_uuid);
+ }
+
ogencnt = so->so_policy_gencnt;
- err = proc_uuid_policy_lookup(((so->so_flags & SOF_DELEGATED) ?
- so->e_uuid : so->last_uuid), &pflags, &so->so_policy_gencnt);
+ err = proc_uuid_policy_lookup(lookup_uuid, &pflags, &so->so_policy_gencnt);
/*
* Discard cached generation count if the entry is gone (ENOENT),
necp_mark_packet_from_ip(m, necp_matched_policy_id);
switch (necp_result) {
case NECP_KERNEL_POLICY_RESULT_PASS:
+ if (necp_result_parameter.pass_flags & NECP_KERNEL_POLICY_PASS_NO_SKIP_IPSEC) {
+ break;
+ }
/* Check if the interface is allowed */
if (!necp_packet_is_allowed_over_interface(m, ifp)) {
error = EHOSTUNREACH;
CTLFLAG_RW | CTLFLAG_LOCKED, u_int32_t, tcp_do_autorcvbuf, 1,
"Enable automatic socket buffer tuning");
+SYSCTL_SKMEM_TCP_INT(OID_AUTO, autotunereorder,
+ CTLFLAG_RW | CTLFLAG_LOCKED, u_int32_t, tcp_autotune_reorder, 1,
+ "Enable automatic socket buffer tuning even when reordering is present");
+
SYSCTL_SKMEM_TCP_INT(OID_AUTO, autorcvbufmax,
CTLFLAG_RW | CTLFLAG_LOCKED, u_int32_t, tcp_autorcvbuf_max, 512 * 1024,
"Maximum receive socket buffer size");
* - the high water mark already reached the maximum
* - the stream is in background and receive side is being
* throttled
- * - if there are segments in reassembly queue indicating loss,
- * do not need to increase recv window during recovery as more
- * data is not going to be sent. A duplicate ack sent during
- * recovery should not change the receive window
*/
if (tcp_do_autorcvbuf == 0 ||
(sbrcv->sb_flags & SB_AUTOSIZE) == 0 ||
sbrcv->sb_hiwat >= rcvbuf_max ||
(tp->t_flagsext & TF_RECV_THROTTLE) ||
(so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) ||
- !LIST_EMPTY(&tp->t_segq)) {
+ (!tcp_autotune_reorder && !LIST_EMPTY(&tp->t_segq))) {
/* Can not resize the socket buffer, just return */
goto out;
}
if (TSTMP_GEQ(to->to_tsecr, tp->rfbuf_ts)) {
if (tp->rfbuf_cnt + pktlen > (sbrcv->sb_hiwat -
(sbrcv->sb_hiwat >> 1))) {
- tp->rfbuf_cnt += pktlen;
int32_t rcvbuf_inc, min_incr;
+
+ tp->rfbuf_cnt += pktlen;
/*
* Increment the receive window by a
* multiple of maximum sized segments.
memcpy(&saved_hdr, ip, ip->ip_hl << 2);
ip = (struct ip *)&saved_hdr[0];
}
+
+ if (tcp_autotune_reorder) {
+ tcp_sbrcv_grow(tp, &so->so_rcv, &to, tlen, TCP_AUTORCVBUF_MAX(ifp));
+ }
+
memcpy(&saved_tcphdr, th, sizeof(struct tcphdr));
thflags = tcp_reass(tp, th, &tlen, m, ifp, &read_wakeup);
th = &saved_tcphdr;
* know that foreign host supports TAO, suppress sending segment.
*/
if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) {
- if (tp->t_state != TCPS_SYN_RECEIVED || tfo_enabled(tp))
- flags &= ~TH_SYN;
- off--;
- len++;
- if (len > 0 && tp->t_state == TCPS_SYN_SENT) {
- while (inp->inp_sndinprog_cnt == 0 &&
- tp->t_pktlist_head != NULL) {
- packetlist = tp->t_pktlist_head;
- packchain_listadd = tp->t_lastchain;
- packchain_sent++;
- TCP_PKTLIST_CLEAR(tp);
-
- error = tcp_ip_output(so, tp, packetlist,
- packchain_listadd, tp_inp_options,
- (so_options & SO_DONTROUTE),
- (sack_rxmit || (sack_bytes_rxmt != 0)),
- isipv6);
+ if (tp->t_state == TCPS_SYN_RECEIVED && tfo_enabled(tp) && tp->snd_nxt == tp->snd_una + 1) {
+ /* We are sending the SYN again! */
+ off--;
+ len++;
+ } else {
+ if (tp->t_state != TCPS_SYN_RECEIVED || tfo_enabled(tp)) {
+ flags &= ~TH_SYN;
}
- /*
- * tcp was closed while we were in ip,
- * resume close
- */
- if (inp->inp_sndinprog_cnt == 0 &&
- (tp->t_flags & TF_CLOSING)) {
- tp->t_flags &= ~TF_CLOSING;
- (void) tcp_close(tp);
- } else {
- tcp_check_timer_state(tp);
+ off--;
+ len++;
+ if (len > 0 && tp->t_state == TCPS_SYN_SENT) {
+ while (inp->inp_sndinprog_cnt == 0 &&
+ tp->t_pktlist_head != NULL) {
+ packetlist = tp->t_pktlist_head;
+ packchain_listadd = tp->t_lastchain;
+ packchain_sent++;
+ TCP_PKTLIST_CLEAR(tp);
+
+ error = tcp_ip_output(so, tp, packetlist,
+ packchain_listadd, tp_inp_options,
+ (so_options & SO_DONTROUTE),
+ (sack_rxmit || (sack_bytes_rxmt != 0)),
+ isipv6);
+ }
+
+ /*
+ * tcp was closed while we were in ip,
+ * resume close
+ */
+ if (inp->inp_sndinprog_cnt == 0 &&
+ (tp->t_flags & TF_CLOSING)) {
+ tp->t_flags &= ~TF_CLOSING;
+ (void) tcp_close(tp);
+ } else {
+ tcp_check_timer_state(tp);
+ }
+ KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END,
+ 0,0,0,0,0);
+ return 0;
}
- KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END,
- 0,0,0,0,0);
- return 0;
}
}
return error;
}
- tt = s * TCP_RETRANSHZ / 1000;
+ tt = temp * TCP_RETRANSHZ / 1000;
if (tt < 1 || tt > INT_MAX) {
return EINVAL;
}
switch (necp_result) {
case NECP_KERNEL_POLICY_RESULT_PASS:
+ if (necp_result_parameter.pass_flags & NECP_KERNEL_POLICY_PASS_NO_SKIP_IPSEC) {
+ break;
+ }
goto skip_ipsec;
case NECP_KERNEL_POLICY_RESULT_DROP:
error = EHOSTUNREACH;
* Apply routing function on the affected upstream and downstream prefixes,
* i.e. either set or clear RTF_PROXY on the cloning prefix route; all route
* entries that were cloned off these prefixes will be blown away. Caller
- * must have acquried proxy6_lock and must not be holding nd6_mutex.
+ * must have acquired proxy6_lock and must not be holding nd6_mutex.
*/
static void
nd6_prproxy_prelist_setroute(boolean_t enable,
static LIST_HEAD(_sptree, secpolicy) sptree[IPSEC_DIR_MAX]; /* SPD */
static LIST_HEAD(_sahtree, secashead) sahtree; /* SAD */
static LIST_HEAD(_regtree, secreg) regtree[SADB_SATYPE_MAX + 1];
+static LIST_HEAD(_custom_sahtree, secashead) custom_sahtree;
/* registed list */
#define SPIHASHSIZE 128
u_int8_t, u_int32_t, u_int32_t);
static u_int key_getspreqmsglen(struct secpolicy *);
static int key_spdexpire(struct secpolicy *);
-static struct secashead *key_newsah(struct secasindex *, ifnet_t, u_int, u_int8_t);
+static struct secashead *key_newsah(struct secasindex *, ifnet_t, u_int, u_int8_t, u_int16_t);
static struct secasvar *key_newsav(struct mbuf *,
const struct sadb_msghdr *, struct secashead *, int *,
struct socket *);
-static struct secashead *key_getsah(struct secasindex *);
+static struct secashead *key_getsah(struct secasindex *, u_int16_t);
static struct secasvar *key_checkspidup(struct secasindex *, u_int32_t);
static void key_setspi __P((struct secasvar *, u_int32_t));
static struct secasvar *key_getsavbyspi(struct secashead *, u_int32_t);
ipsec_policy_count = 0;
LIST_INIT(&sahtree);
+ LIST_INIT(&custom_sahtree);
for (i = 0; i <= SADB_SATYPE_MAX; i++) {
LIST_INIT(®tree[i]);
key_newsah(struct secasindex *saidx,
ifnet_t ipsec_if,
u_int outgoing_if,
- u_int8_t dir)
+ u_int8_t dir,
+ u_int16_t flags)
{
struct secashead *newsah;
panic("key_newsaidx: NULL pointer is passed.\n");
}
+ VERIFY(flags == SECURITY_ASSOCIATION_PFKEY || flags == SECURITY_ASSOCIATION_CUSTOM_IPSEC);
+
newsah = keydb_newsecashead();
if (newsah == NULL) {
return NULL;
newsah->dir = dir;
/* add to saidxtree */
newsah->state = SADB_SASTATE_MATURE;
- LIST_INSERT_HEAD(&sahtree, newsah, chain);
+ newsah->flags = flags;
+
+ if (flags == SECURITY_ASSOCIATION_PFKEY) {
+ LIST_INSERT_HEAD(&sahtree, newsah, chain);
+ } else {
+ LIST_INSERT_HEAD(&custom_sahtree, newsah, chain);
+ }
key_start_timehandler();
return newsah;
* others : found, pointer to a SA.
*/
static struct secashead *
-key_getsah(struct secasindex *saidx)
+key_getsah(struct secasindex *saidx, u_int16_t flags)
{
struct secashead *sah;
LCK_MTX_ASSERT(sadb_mutex, LCK_MTX_ASSERT_OWNED);
- LIST_FOREACH(sah, &sahtree, chain) {
- if (sah->state == SADB_SASTATE_DEAD) {
- continue;
+ if ((flags & SECURITY_ASSOCIATION_ANY) == SECURITY_ASSOCIATION_ANY ||
+ (flags & SECURITY_ASSOCIATION_PFKEY) == SECURITY_ASSOCIATION_PFKEY) {
+ LIST_FOREACH(sah, &sahtree, chain) {
+ if (sah->state == SADB_SASTATE_DEAD) {
+ continue;
+ }
+ if (key_cmpsaidx(&sah->saidx, saidx, CMP_REQID)) {
+ return sah;
+ }
}
- if (key_cmpsaidx(&sah->saidx, saidx, CMP_REQID)) {
- return sah;
+ }
+
+ if ((flags & SECURITY_ASSOCIATION_ANY) == SECURITY_ASSOCIATION_ANY ||
+ (flags & SECURITY_ASSOCIATION_PFKEY) == SECURITY_ASSOCIATION_CUSTOM_IPSEC) {
+ LIST_FOREACH(sah, &custom_sahtree, chain) {
+ if (sah->state == SADB_SASTATE_DEAD) {
+ continue;
+ }
+ if (key_cmpsaidx(&sah->saidx, saidx, 0)) {
+ return sah;
+ }
}
}
LCK_MTX_ASSERT(sadb_mutex, LCK_MTX_ASSERT_OWNED);
- sah = key_getsah(saidx);
+ sah = key_getsah(saidx, SECURITY_ASSOCIATION_ANY);
if (!sah) {
- return key_newsah(saidx, NULL, 0, dir);
+ return key_newsah(saidx, NULL, 0, dir, SECURITY_ASSOCIATION_PFKEY);
}
return sah;
}
}
/* get a SA index */
- if ((newsah = key_getsah(&saidx)) == NULL) {
+ if ((newsah = key_getsah(&saidx, SECURITY_ASSOCIATION_ANY)) == NULL) {
/* create a new SA index: key_addspi is always used for inbound spi */
- if ((newsah = key_newsah(&saidx, ipsec_if, key_get_outgoing_ifindex_from_message(mhp, SADB_X_EXT_IPSECIF), IPSEC_DIR_INBOUND)) == NULL) {
+ if ((newsah = key_newsah(&saidx, ipsec_if, key_get_outgoing_ifindex_from_message(mhp, SADB_X_EXT_IPSECIF), IPSEC_DIR_INBOUND, SECURITY_ASSOCIATION_PFKEY)) == NULL) {
lck_mtx_unlock(sadb_mutex);
ipseclog((LOG_DEBUG, "key_getspi: No more memory.\n"));
return key_senderror(so, m, ENOBUFS);
}
}
+ if ((newsah->flags & SECURITY_ASSOCIATION_CUSTOM_IPSEC) == SECURITY_ASSOCIATION_CUSTOM_IPSEC) {
+ lck_mtx_unlock(sadb_mutex);
+ ipseclog((LOG_ERR, "key_getspi: custom ipsec exists\n"));
+ return key_senderror(so, m, EEXIST);
+ }
+
/* get a new SA */
/* XXX rewrite */
newsav = key_newsav(m, mhp, newsah, &error, so);
lck_mtx_lock(sadb_mutex);
/* get a SA header */
- if ((sah = key_getsah(&saidx)) == NULL) {
+ if ((sah = key_getsah(&saidx, SECURITY_ASSOCIATION_PFKEY)) == NULL) {
lck_mtx_unlock(sadb_mutex);
ipseclog((LOG_DEBUG, "key_update: no SA index found.\n"));
return key_senderror(so, m, ENOENT);
/* Find or create new SAH */
KEY_SETSECASIDX(proto, sah->saidx.mode, sah->saidx.reqid, src1 + 1, dst1 + 1, ipsec_if1 ? ipsec_if1->if_index : 0, &saidx1);
- if ((newsah = key_getsah(&saidx1)) == NULL) {
- if ((newsah = key_newsah(&saidx1, ipsec_if1, key_get_outgoing_ifindex_from_message(mhp, SADB_X_EXT_MIGRATE_IPSECIF), sah->dir)) == NULL) {
+ if ((newsah = key_getsah(&saidx1, SECURITY_ASSOCIATION_ANY)) == NULL) {
+ if ((newsah = key_newsah(&saidx1, ipsec_if1, key_get_outgoing_ifindex_from_message(mhp, SADB_X_EXT_MIGRATE_IPSECIF), sah->dir, SECURITY_ASSOCIATION_PFKEY)) == NULL) {
lck_mtx_unlock(sadb_mutex);
ipseclog((LOG_DEBUG, "key_migrate: No more memory.\n"));
return key_senderror(so, m, ENOBUFS);
}
}
+ if ((newsah->flags & SECURITY_ASSOCIATION_CUSTOM_IPSEC) == SECURITY_ASSOCIATION_CUSTOM_IPSEC) {
+ lck_mtx_unlock(sadb_mutex);
+ ipseclog((LOG_ERR, "key_migrate: custom ipsec exists\n"));
+ return key_senderror(so, m, EEXIST);
+ }
+
/* Migrate SAV in to new SAH */
if (key_migratesav(sav, newsah) != 0) {
lck_mtx_unlock(sadb_mutex);
lck_mtx_lock(sadb_mutex);
/* get a SA header */
- if ((newsah = key_getsah(&saidx)) == NULL) {
+ if ((newsah = key_getsah(&saidx, SECURITY_ASSOCIATION_ANY)) == NULL) {
/* create a new SA header: key_addspi is always used for outbound spi */
- if ((newsah = key_newsah(&saidx, ipsec_if, key_get_outgoing_ifindex_from_message(mhp, SADB_X_EXT_IPSECIF), IPSEC_DIR_OUTBOUND)) == NULL) {
+ if ((newsah = key_newsah(&saidx, ipsec_if, key_get_outgoing_ifindex_from_message(mhp, SADB_X_EXT_IPSECIF), IPSEC_DIR_OUTBOUND, SECURITY_ASSOCIATION_PFKEY)) == NULL) {
lck_mtx_unlock(sadb_mutex);
ipseclog((LOG_DEBUG, "key_add: No more memory.\n"));
bzero_keys(mhp);
}
}
+ if ((newsah->flags & SECURITY_ASSOCIATION_CUSTOM_IPSEC) == SECURITY_ASSOCIATION_CUSTOM_IPSEC) {
+ lck_mtx_unlock(sadb_mutex);
+ ipseclog((LOG_ERR, "key_add: custom ipsec exists\n"));
+ bzero_keys(mhp);
+ return key_senderror(so, m, EEXIST);
+ }
+
/* set spidx if there */
/* XXX rewrite */
error = key_setident(newsah, m, mhp);
return frame_index;
}
+
+#pragma mark Custom IPsec
+
+__private_extern__ bool
+key_custom_ipsec_token_is_valid(void *ipsec_token)
+{
+ if (ipsec_token == NULL) {
+ return false;
+ }
+
+ struct secashead *sah = (struct secashead *)ipsec_token;
+
+ return (sah->flags & SECURITY_ASSOCIATION_CUSTOM_IPSEC) == SECURITY_ASSOCIATION_CUSTOM_IPSEC;
+}
+
+__private_extern__ int
+key_reserve_custom_ipsec(void **ipsec_token, union sockaddr_in_4_6 *src, union sockaddr_in_4_6 *dst,
+ u_int8_t proto)
+{
+ if (src == NULL || dst == NULL) {
+ ipseclog((LOG_ERR, "register custom ipsec: invalid address\n"));
+ return EINVAL;
+ }
+
+ if (src->sa.sa_family != dst->sa.sa_family) {
+ ipseclog((LOG_ERR, "register custom ipsec: address family mismatched\n"));
+ return EINVAL;
+ }
+
+ if (src->sa.sa_len != dst->sa.sa_len) {
+ ipseclog((LOG_ERR, "register custom ipsec: address struct size mismatched\n"));
+ return EINVAL;
+ }
+
+ if (ipsec_token == NULL) {
+ ipseclog((LOG_ERR, "register custom ipsec: invalid ipsec token\n"));
+ return EINVAL;
+ }
+
+ switch (src->sa.sa_family) {
+ case AF_INET:
+ if (src->sa.sa_len != sizeof(struct sockaddr_in)) {
+ ipseclog((LOG_ERR, "register custom esp: invalid address length\n"));
+ return EINVAL;
+ }
+ break;
+ case AF_INET6:
+ if (src->sa.sa_len != sizeof(struct sockaddr_in6)) {
+ ipseclog((LOG_ERR, "register custom esp: invalid address length\n"));
+ return EINVAL;
+ }
+ break;
+ default:
+ ipseclog((LOG_ERR, "register custom esp: invalid address length\n"));
+ return EAFNOSUPPORT;
+ }
+
+ if (proto != IPPROTO_ESP && proto != IPPROTO_AH) {
+ ipseclog((LOG_ERR, "register custom esp: invalid proto %u\n", proto));
+ return EINVAL;
+ }
+
+ struct secasindex saidx = {};
+ KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, &src->sa, &dst->sa, 0, &saidx);
+
+ lck_mtx_lock(sadb_mutex);
+
+ struct secashead *sah = NULL;
+ if ((sah = key_getsah(&saidx, SECURITY_ASSOCIATION_ANY)) != NULL) {
+ lck_mtx_unlock(sadb_mutex);
+ ipseclog((LOG_ERR, "register custom esp: SA exists\n"));
+ return EEXIST;
+ }
+
+ if ((sah = key_newsah(&saidx, NULL, 0, IPSEC_DIR_ANY, SECURITY_ASSOCIATION_CUSTOM_IPSEC)) == NULL) {
+ lck_mtx_unlock(sadb_mutex);
+ ipseclog((LOG_DEBUG, "register custom esp: No more memory.\n"));
+ return ENOBUFS;
+ }
+
+ *ipsec_token = (void *)sah;
+
+ lck_mtx_unlock(sadb_mutex);
+ return 0;
+}
+
+__private_extern__ void
+key_release_custom_ipsec(void **ipsec_token)
+{
+ struct secashead *sah = *ipsec_token;
+ VERIFY(sah != NULL);
+
+ lck_mtx_lock(sadb_mutex);
+
+ VERIFY((sah->flags & SECURITY_ASSOCIATION_CUSTOM_IPSEC) == SECURITY_ASSOCIATION_CUSTOM_IPSEC);
+
+ bool sa_present = true;
+ if (LIST_FIRST(&sah->savtree[SADB_SASTATE_LARVAL]) == NULL &&
+ LIST_FIRST(&sah->savtree[SADB_SASTATE_MATURE]) == NULL &&
+ LIST_FIRST(&sah->savtree[SADB_SASTATE_DYING]) == NULL &&
+ LIST_FIRST(&sah->savtree[SADB_SASTATE_DEAD]) == NULL) {
+ sa_present = false;
+ }
+ VERIFY(sa_present == false);
+
+ key_delsah(sah);
+
+ lck_mtx_unlock(sadb_mutex);
+
+ *ipsec_token = NULL;
+ return;
+}
extern u_int32_t key_fill_offload_frames_for_savs(struct ifnet *,
struct ifnet_keepalive_offload_frame *frames_array, u_int32_t, size_t);
-
+extern bool key_custom_ipsec_token_is_valid(void *);
+extern int key_reserve_custom_ipsec(void **, union sockaddr_in_4_6 *, union sockaddr_in_4_6 *, u_int8_t proto);
+extern void key_release_custom_ipsec(void **);
#endif /* BSD_KERNEL_PRIVATE */
#endif /* _NETKEY_KEY_H_ */
u_int ipsec_ifindex;
};
+#define SECURITY_ASSOCIATION_ANY 0x0000
+#define SECURITY_ASSOCIATION_PFKEY 0x0001
+#define SECURITY_ASSOCIATION_CUSTOM_IPSEC 0x0010
+
/* Security Association Data Base */
struct secashead {
LIST_ENTRY(secashead) chain;
/* The first of this list is newer SA */
struct route_in6 sa_route; /* route cache */
+
+ uint16_t flags;
};
#define MAX_REPLAY_WINDOWS 4
*
*/
+#include <nfs/nfs_conf.h>
+#if CONFIG_NFS_CLIENT
+
#include <sys/param.h>
#include <sys/conf.h>
#include <sys/ioctl.h>
}
return error;
}
+
+#endif /* CONFIG_NFS_CLIENT */
struct nfs_socket;
struct nfs_socket_search;
struct nfsrv_uc_arg;
+struct direntry;
/*
* The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts.
void nfs3_pathconf_cache(struct nfsmount *, struct nfs_fsattr *);
int nfs3_mount_rpc(struct nfsmount *, struct sockaddr *, int, int, char *, vfs_context_t, int, fhandle_t *, struct nfs_sec *);
void nfs3_umount_rpc(struct nfsmount *, vfs_context_t, int);
+void nfs_rdirplus_update_node_attrs(nfsnode_t, struct direntry *, fhandle_t *, struct nfs_vattr *, uint64_t *);
int nfs_node_access_slot(nfsnode_t, uid_t, int);
void nfs_vnode_notify(nfsnode_t, uint32_t);
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
+#include <nfs/nfs_conf.h>
+#if CONFIG_NFS_CLIENT
+
/*
* miscellaneous support functions for NFSv4
*/
vfs_statfs(nmp->nm_mountp)->f_mntfromname, nmp->nm_stategenid, error);
}
}
+
+#endif /* CONFIG_NFS_CLIENT */
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
+#include <nfs/nfs_conf.h>
+#if CONFIG_NFS_CLIENT
+
/*
* vnode op calls for NFS version 4
*/
if (rdirplus) {
microuptime(&now);
+ if (lastcookie == 0) {
+ dnp->n_rdirplusstamp_sof = now.tv_sec;
+ dnp->n_rdirplusstamp_eof = 0;
+ }
}
/* loop through the entries packing them into the buffer */
}
*(time_t*)(&dp->d_name[dp->d_namlen + 1 + fhlen]) = now.tv_sec;
dp->d_reclen = reclen;
+ nfs_rdirplus_update_node_attrs(dnp, dp, &fh, nvattrp, &savedxid);
}
padstart = dp->d_name + dp->d_namlen + 1 + xlen;
ndbhp->ndbh_count++;
ndbhp->ndbh_flags |= (NDB_FULL | NDB_EOF);
nfs_node_lock_force(dnp);
dnp->n_eofcookie = lastcookie;
+ if (rdirplus) {
+ dnp->n_rdirplusstamp_eof = now.tv_sec;
+ }
nfs_node_unlock(dnp);
} else {
more_entries = 1;
#endif
#endif /* CONFIG_NFS4 */
+
+#endif /* CONFIG_NFS_CLIENT */
* @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95
* FreeBSD-Id: nfs_bio.c,v 1.44 1997/09/10 19:52:25 phk Exp $
*/
+
+#include <nfs/nfs_conf.h>
+#if CONFIG_NFS_CLIENT
+
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/resourcevar.h>
}
return error;
}
+
+#endif /* CONFIG_NFS_CLIENT */
* - replaced big automatic arrays with MALLOC'd data
*/
+#include <nfs/nfs_conf.h>
+#if CONFIG_NFS_CLIENT
+
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
}
#endif /* NETHER */
+
+#endif /* CONFIG_NFS_CLIENT */
--- /dev/null
+/*
+ * Copyright (c) 2019 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _BSD_NFS_NFS_CONF_H_
+#define _BSD_NFS_NFS_CONF_H_
+
+#if NFSCLIENT
+#define CONFIG_NFS_CLIENT 1
+#endif /* NFSCLIENT */
+
+#if NFSSERVER
+#define CONFIG_NFS_SERVER 1
+#endif /* NFSSERVER */
+
+
+#if CONFIG_NFS_CLIENT || CONFIG_NFS_SERVER
+#define CONFIG_NFS 1
+#endif /* CONFIG_NFS_CLIENT || CONFIG_NFS_SERVER */
+
+#endif /* _BSD_NFS_NFS_CONF_H_ */
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
+#include <nfs/nfs_conf.h>
+#if CONFIG_NFS
+
/*************
* These functions implement RPCSEC_GSS security for the NFS client and server.
* The code is specific to the use of Kerberos v5 and the use of DES MAC MD5
#define NFS_GSS_ISDBG (NFS_DEBUG_FACILITY & NFS_FAC_GSS)
-#if NFSSERVER
+#if CONFIG_NFS_SERVER
u_long nfs_gss_svc_ctx_hash;
struct nfs_gss_svc_ctx_hashhead *nfs_gss_svc_ctx_hashtbl;
lck_mtx_t *nfs_gss_svc_ctx_mutex;
lck_grp_t *nfs_gss_svc_grp;
uint32_t nfsrv_gss_context_ttl = GSS_CTX_EXPIRE;
#define GSS_SVC_CTX_TTL ((uint64_t)max(2*GSS_CTX_PEND, nfsrv_gss_context_ttl) * NSEC_PER_SEC)
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
-#if NFSCLIENT
+#if CONFIG_NFS_CLIENT
lck_grp_t *nfs_gss_clnt_grp;
-#endif /* NFSCLIENT */
+#endif /* CONFIG_NFS_CLIENT */
#define KRB5_MAX_MIC_SIZE 128
uint8_t krb5_mech_oid[11] = { 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x12, 0x01, 0x02, 0x02 };
static uint8_t xdrpad[] = { 0x00, 0x00, 0x00, 0x00};
-#if NFSCLIENT
+#if CONFIG_NFS_CLIENT
static int nfs_gss_clnt_ctx_find(struct nfsreq *);
static int nfs_gss_clnt_ctx_init(struct nfsreq *, struct nfs_gss_clnt_ctx *);
static int nfs_gss_clnt_ctx_init_retry(struct nfsreq *, struct nfs_gss_clnt_ctx *);
static int nfs_gss_clnt_ctx_copy(struct nfs_gss_clnt_ctx *, struct nfs_gss_clnt_ctx **);
static void nfs_gss_clnt_ctx_destroy(struct nfs_gss_clnt_ctx *);
static void nfs_gss_clnt_log_error(struct nfsreq *, struct nfs_gss_clnt_ctx *, uint32_t, uint32_t);
-#endif /* NFSCLIENT */
+#endif /* CONFIG_NFS_CLIENT */
-#if NFSSERVER
+#if CONFIG_NFS_SERVER
static struct nfs_gss_svc_ctx *nfs_gss_svc_ctx_find(uint32_t);
static void nfs_gss_svc_ctx_insert(struct nfs_gss_svc_ctx *);
static void nfs_gss_svc_ctx_timer(void *, void *);
/* This is only used by server code */
static void nfs_gss_nfsm_chain(struct nfsm_chain *, mbuf_t);
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
static void host_release_special_port(mach_port_t);
static mach_port_t host_copy_special_port(mach_port_t);
static int nfs_gss_mchain_length(mbuf_t);
static int nfs_gss_append_chain(struct nfsm_chain *, mbuf_t);
-#if NFSSERVER
+#if CONFIG_NFS_SERVER
thread_call_t nfs_gss_svc_ctx_timer_call;
int nfs_gss_timer_on = 0;
uint32_t nfs_gss_ctx_count = 0;
const uint32_t nfs_gss_ctx_max = GSS_SVC_MAXCONTEXTS;
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
/*
* Initialization when NFS starts
void
nfs_gss_init(void)
{
-#if NFSCLIENT
+#if CONFIG_NFS_CLIENT
nfs_gss_clnt_grp = lck_grp_alloc_init("rpcsec_gss_clnt", LCK_GRP_ATTR_NULL);
-#endif /* NFSCLIENT */
+#endif /* CONFIG_NFS_CLIENT */
-#if NFSSERVER
+#if CONFIG_NFS_SERVER
nfs_gss_svc_grp = lck_grp_alloc_init("rpcsec_gss_svc", LCK_GRP_ATTR_NULL);
nfs_gss_svc_ctx_hashtbl = hashinit(SVC_CTX_HASHSZ, M_TEMP, &nfs_gss_svc_ctx_hash);
nfs_gss_svc_ctx_mutex = lck_mtx_alloc_init(nfs_gss_svc_grp, LCK_ATTR_NULL);
nfs_gss_svc_ctx_timer_call = thread_call_allocate(nfs_gss_svc_ctx_timer, NULL);
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
}
/*
return error;
}
-#if NFSCLIENT
+#if CONFIG_NFS_CLIENT
/*
* Restore the argument or result from an rpc_gss_integ_data mbuf chain
nfs_gss_clnt_ctx_unref(&req);
return error;
}
-#endif /* NFSCLIENT */
+#endif /* CONFIG_NFS_CLIENT */
/*************
*
* Server functions
*/
-#if NFSSERVER
+#if CONFIG_NFS_SERVER
/*
* Find a server context based on a handle value received
lck_mtx_unlock(nfs_gss_svc_ctx_mutex);
}
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
/*************
return 0;
}
-#if NFSSERVER /* Only used by NFSSERVER */
+#if CONFIG_NFS_SERVER /* Only used by CONFIG_NFS_SERVER */
/*
* Convert an mbuf chain to an NFS mbuf chain
*/
nmc->nmc_left = mbuf_trailingspace(tail);
nmc->nmc_flags = 0;
}
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
#if 0
}
}
#endif
+
+#endif /* CONFIG_NFS */
* from BSDI nfs_lock.c,v 2.4 1998/12/14 23:49:56 jch Exp
*/
+#include <nfs/nfs_conf.h>
+#if CONFIG_NFS_CLIENT
+
#include <sys/cdefs.h>
#include <sys/param.h>
#include <sys/systm.h>
return error;
}
+
+#endif /* CONFIG_NFS_CLIENT */
* FreeBSD-Id: nfs_node.c,v 1.22 1997/10/28 14:06:20 bde Exp $
*/
+#include <nfs/nfs_conf.h>
+#if CONFIG_NFS_CLIENT
#include <sys/param.h>
#include <sys/kernel.h>
return i <= nfsnodehash;
}
+
+#endif /* CONFIG_NFS_CLIENT */
* FreeBSD-Id: nfs_serv.c,v 1.52 1997/10/28 15:59:05 bde Exp $
*/
+#include <nfs/nfs_conf.h>
+#if CONFIG_NFS_SERVER
+
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <security/mac_framework.h>
#endif
-#if NFSSERVER
-
/*
* NFS server globals
*/
VFSATTR_INIT(&va);
VFSATTR_WANTED(&va, f_blocks);
+ VFSATTR_WANTED(&va, f_bfree);
VFSATTR_WANTED(&va, f_bavail);
VFSATTR_WANTED(&va, f_files);
VFSATTR_WANTED(&va, f_ffree);
return error;
}
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
* FreeBSD-Id: nfs_socket.c,v 1.30 1997/10/28 15:59:07 bde Exp $
*/
+#include <nfs/nfs_conf.h>
+#if CONFIG_NFS
+
/*
* Socket operations for use by nfs
*/
kern_return_t thread_terminate(thread_t);
-#if NFSSERVER
+#if CONFIG_NFS_SERVER
int nfsrv_sock_max_rec_queue_length = 128; /* max # RPC records queued on (UDP) socket */
int nfsrv_getstream(struct nfsrv_sock *, int);
int nfsrv_getreq(struct nfsrv_descript *);
extern int nfsv3_procid[NFS_NPROCS];
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
/*
* compare two sockaddr structures
return -1;
}
-#if NFSCLIENT
+#if CONFIG_NFS_CLIENT
int nfs_connect_search_new_socket(struct nfsmount *, struct nfs_socket_search *, struct timeval *);
int nfs_connect_search_socket_connect(struct nfsmount *, struct nfs_socket *, int);
}
-#endif /* NFSCLIENT */
+#endif /* CONFIG_NFS_CLIENT */
-#if NFSSERVER
+#if CONFIG_NFS_SERVER
/*
* Generate the rpc reply header
wakeup(nd);
}
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
+
+#endif /* CONFIG_NFS */
* FreeBSD-Id: nfs_srvcache.c,v 1.15 1997/10/12 20:25:46 phk Exp $
*/
-#if NFSSERVER
+#include <nfs/nfs_conf.h>
+#if CONFIG_NFS_SERVER
+
/*
* Reference: Chet Juszczak, "Improving the Performance and Correctness
* of an NFS Server", in Proc. Winter 1989 USENIX Conference,
lck_mtx_unlock(nfsrv_reqcache_mutex);
}
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
* FreeBSD-Id: nfs_subs.c,v 1.47 1997/11/07 08:53:24 phk Exp $
*/
+#include <nfs/nfs_conf.h>
+#if CONFIG_NFS
+
/*
* These functions support the macros and help fiddle mbuf chains for
* the nfs op functions. They do things like create the rpc header and
#include <nfs/nfsproto.h>
#include <nfs/nfs.h>
#include <nfs/nfsnode.h>
-#if NFSCLIENT
+#if CONFIG_NFS_CLIENT
#define _NFS_XDR_SUBS_FUNCS_ /* define this to get xdrbuf function definitions */
#endif
#include <nfs/xdr_subs.h>
}
}
-#if NFSSERVER
+#if CONFIG_NFS_SERVER
/*
* Mapping of old NFS Version 2 RPC numbers to generic numbers.
NFSPROC_NOOP
};
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
/*
* and the reverse mapping from generic to Version 2 procedure numbers
nfs_mbuf_minclsize = ms.minclsize;
}
-#if NFSSERVER
+#if CONFIG_NFS_SERVER
/*
* allocate a list of mbufs to hold the given amount of data
return error;
}
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
/*
* nfsm_chain_new_mbuf()
return error;
}
-#if NFSCLIENT
+#if CONFIG_NFS_CLIENT
int
nfsm_chain_add_string_nfc(struct nfsm_chain *nmc, const uint8_t *s, uint32_t slen)
return c > buflen ? ENOMEM : 0;
}
-#endif /* NFSCLIENT */
+#endif /* CONFIG_NFS_CLIENT */
/*
* Schedule a callout thread to run an NFS timer function
}
-#if NFSSERVER
+#if CONFIG_NFS_SERVER
int nfsrv_cmp_secflavs(struct nfs_sec *, struct nfs_sec *);
int nfsrv_hang_addrlist(struct nfs_export *, struct user_nfs_export_args *);
vnode_t mvp = NULL, xvp = NULL;
mount_t mp = NULL;
char path[MAXPATHLEN];
+ char fl_pathbuff[MAXPATHLEN];
+ int fl_pathbuff_len = MAXPATHLEN;
int expisroot;
if (unxa->nxa_flags == NXA_CHECK) {
goto unlock_out;
}
if ((unxa->nxa_flags & (NXA_ADD | NXA_OFFLINE)) == NXA_ADD) {
- /* if adding, verify that the mount is still what we expect */
- mp = vfs_getvfs_by_mntonname(nxfs->nxfs_path);
- if (mp) {
- mount_ref(mp, 0);
- mount_iterdrop(mp);
- }
/* find exported FS root vnode */
NDINIT(&mnd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
UIO_SYSSPACE, CAST_USER_ADDR_T(nxfs->nxfs_path), ctx);
error = EINVAL;
goto out;
}
+ /* if adding, verify that the mount is still what we expect */
+ mp = vfs_getvfs_by_mntonname(nxfs->nxfs_path);
+ if (!mp) {
+ /* check for firmlink-free path */
+ if (vn_getpath_no_firmlink(mvp, fl_pathbuff, &fl_pathbuff_len) == 0 &&
+ fl_pathbuff_len > 0 &&
+ !strncmp(nxfs->nxfs_path, fl_pathbuff, MAXPATHLEN)) {
+ mp = vfs_getvfs_by_mntonname(vnode_mount(mvp)->mnt_vfsstat.f_mntonname);
+ }
+ }
+ if (mp) {
+ mount_ref(mp, 0);
+ mount_iterdrop(mp);
+ }
/* sanity check: this should be same mount */
if (mp != vnode_mount(mvp)) {
error = EINVAL;
return (int)*defaulterrp;
}
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
+
+#endif /* CONFIG_NFS */
* @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95
* FreeBSD-Id: nfs_syscalls.c,v 1.32 1997/11/07 08:53:25 phk Exp $
*/
+
+#include <nfs/nfs_conf.h>
+
/*
* NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
* support for mandatory and extensible security protections. This notice
kern_return_t thread_terminate(thread_t); /* XXX */
-#if NFSSERVER
+#if CONFIG_NFS_SERVER
extern const nfsrv_proc_t nfsrv_procs[NFS_NPROCS];
void nfsrv_slpderef(struct nfsrv_sock *);
void nfsrv_slpfree(struct nfsrv_sock *);
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
+#if CONFIG_NFS
/*
* sysctl stuff
*/
SYSCTL_DECL(_vfs_generic);
SYSCTL_NODE(_vfs_generic, OID_AUTO, nfs, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "nfs hinge");
+#endif /* CONFIG_NFS */
-#if NFSCLIENT
+#if CONFIG_NFS_CLIENT
SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, client, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "nfs client hinge");
SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, initialdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_initial_delay, 0, "");
SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nextdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_delay, 0, "");
#if CONFIG_NFS4
SYSCTL_STRING(_vfs_generic_nfs_client, OID_AUTO, default_nfs4domain, CTLFLAG_RW | CTLFLAG_LOCKED, nfs4_default_domain, sizeof(nfs4_default_domain), "");
#endif
-#endif /* NFSCLIENT */
+#endif /* CONFIG_NFS_CLIENT */
-#if NFSSERVER
+#if CONFIG_NFS_SERVER
SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, server, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "nfs server hinge");
SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay, 0, "");
SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay_v3, 0, "");
SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_max_seen, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_max_seen, 0, "");
SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_count, CTLFLAG_RD | CTLFLAG_LOCKED, __DECONST(int *, &nfsrv_uc_queue_count), 0, "");
#endif
-#endif /* NFSSERVER */
-
+#endif /* CONFIG_NFS_SERVER */
-#if NFSCLIENT
-
-#if CONFIG_NFS4
+#if CONFIG_NFS_CLIENT && CONFIG_NFS4
static int
mapname2id(struct nfs_testmapid *map)
{
return error ? error : coerror;
}
+#endif /* CONFIG_NFS_CLIENT && CONFIG_NFS4 */
+
+#if !CONFIG_NFS_CLIENT
+#define __no_nfs_client_unused __unused
+#else
+#define __no_nfs_client_unused /* nothing */
#endif
int
-nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval)
+nfsclnt(
+ proc_t p __no_nfs_client_unused,
+ struct nfsclnt_args *uap __no_nfs_client_unused,
+ __unused int *retval)
{
+#if CONFIG_NFS_CLIENT
struct lockd_ans la;
int error;
error = EINVAL;
}
return error;
+#else
+ return ENOSYS;
+#endif /* CONFIG_NFS_CLIENT */
}
+#if CONFIG_NFS_CLIENT
/*
* Asynchronous I/O threads for client NFS.
return 0;
}
-#endif /* NFSCLIENT */
-
+#endif /* CONFIG_NFS_CLIENT */
-#if NFSSERVER
+#if !CONFIG_NFS_SERVER
+#define __no_nfs_server_unused __unused
+#else
+#define __no_nfs_server_unused /* nothing */
+#endif
/*
* NFS server system calls
* getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
*/
+#if CONFIG_NFS_SERVER
static struct nfs_exportfs *
nfsrv_find_exportfs(const char *ptr)
{
* Get file handle system call
*/
int
-getfh(proc_t p, struct getfh_args *uap, __unused int *retval)
+getfh(
+ proc_t p __no_nfs_server_unused,
+ struct getfh_args *uap __no_nfs_server_unused,
+ __unused int *retval)
{
vnode_t vp;
struct nfs_filehandle nfh;
error = copyout((caddr_t)&nfh, uap->fhp, sizeof(fhandle_t));
return error;
}
+#endif /* CONFIG_NFS_SERVER */
+#if CONFIG_NFS_SERVER
extern const struct fileops vnops;
/*
* security hole.
*/
int
-fhopen( proc_t p,
- struct fhopen_args *uap,
- int32_t *retval)
+fhopen(proc_t p __no_nfs_server_unused,
+ struct fhopen_args *uap __no_nfs_server_unused,
+ int32_t *retval __no_nfs_server_unused)
{
vnode_t vp;
struct nfs_filehandle nfh;
vnode_put(vp);
return error;
}
+#endif /* CONFIG_NFS_SERVER */
+#if CONFIG_NFS_SERVER
/*
* NFS server pseudo system call
*/
int
-nfssvc(proc_t p, struct nfssvc_args *uap, __unused int *retval)
+nfssvc(proc_t p __no_nfs_server_unused,
+ struct nfssvc_args *uap __no_nfs_server_unused,
+ __unused int *retval)
{
mbuf_t nam;
struct user_nfsd_args user_nfsdarg;
}
return error;
}
+#endif /* CONFIG_NFS_SERVER */
+
+#if CONFIG_NFS_SERVER
/*
* Adds a socket to the list for servicing by nfsds.
nfsrv_udp6sock = NULL;
}
-#endif /* NFS_NOSERVER */
+#endif /* CONFIG_NFS_SERVER */
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
+
+#include <nfs/nfs_conf.h>
+#if CONFIG_NFS_SERVER
+
#include <stdint.h>
#include <sys/param.h>
#include <sys/mount_internal.h>
return;
}
+
+#endif /* CONFIG_NFS_SERVER */
* @(#)nfs_vfsops.c 8.12 (Berkeley) 5/20/95
* FreeBSD-Id: nfs_vfsops.c,v 1.52 1997/11/12 05:42:21 julian Exp $
*/
+
+#include <nfs/nfs_conf.h>
+#if CONFIG_NFS_CLIENT
+
/*
* NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
* support for mandatory and extensible security protections. This notice
nmp->nm_iodlink.tqe_next = NFSNOLIST;
nmp->nm_deadtimeout = 0;
nmp->nm_curdeadtimeout = 0;
+ NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_RDIRPLUS); /* enable RDIRPLUS by default. It will be reverted later in case NFSv2 is used */
NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_NOACL);
nmp->nm_realm = NULL;
nmp->nm_principal = NULL;
struct netfs_status *nsp = NULL;
int timeoutmask;
uint totlen, count, numThreads;
-#if NFSSERVER
+#if CONFIG_NFS_SERVER
uint pos;
struct nfs_exportfs *nxfs;
struct nfs_export *nx;
struct nfs_user_stat_path_rec upath_rec;
uint bytes_avail, bytes_total, recs_copied;
uint numExports, numRecs;
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
/*
* All names at this level are terminal.
*oldlenp = xb.xb_u.xb_buffer.xbb_len;
xb_cleanup(&xb);
break;
-#if NFSSERVER
+#if CONFIG_NFS_SERVER
case NFS_EXPORTSTATS:
/* setup export stat descriptor */
stat_desc.rec_vers = NFS_EXPORT_STAT_REC_VERSION;
error = copyout(&nfsrv_user_stat_node_count, oldp, sizeof(nfsrv_user_stat_node_count));
break;
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
case VFS_CTL_NOLOCKS:
if (req->oldptr != USER_ADDR_NULL) {
lck_mtx_lock(&nmp->nm_lock);
}
return error;
}
+
+#endif /* CONFIG_NFS_CLIENT */
* FreeBSD-Id: nfs_vnops.c,v 1.72 1997/11/07 09:20:48 phk Exp $
*/
+#include <nfs/nfs_conf.h>
+#if CONFIG_NFS_CLIENT
/*
* vnode op calls for Sun NFS version 2 and 3
int nfs_refresh_fh(nfsnode_t, vfs_context_t);
+/*
+ * Update nfsnode attributes to avoid extra getattr calls for each direntry.
+ * This function should be called only if RDIRPLUS flag is enabled.
+ */
+void
+nfs_rdirplus_update_node_attrs(nfsnode_t dnp, struct direntry *dp, fhandle_t *fhp, struct nfs_vattr *nvattrp, uint64_t *savedxidp)
+{
+ nfsnode_t np;
+ struct componentname cn;
+ int isdot = (dp->d_namlen == 1) && (dp->d_name[0] == '.');
+ int isdotdot = (dp->d_namlen == 2) && (dp->d_name[0] == '.') && (dp->d_name[1] == '.');
+
+ if (isdot || isdotdot) {
+ return;
+ }
+
+ np = NULL;
+ bzero(&cn, sizeof(cn));
+ cn.cn_nameptr = dp->d_name;
+ cn.cn_namelen = dp->d_namlen;
+ cn.cn_nameiop = LOOKUP;
+
+ nfs_nget(NFSTOMP(dnp), dnp, &cn, fhp->fh_data, fhp->fh_len, nvattrp, savedxidp, RPCAUTH_UNKNOWN, NG_NOCREATE, &np);
+ if (np) {
+ nfs_node_unlock(np);
+ vnode_put(NFSTOV(np));
+ }
+}
+
/*
* Find the slot in the access cache for this UID.
* If adding and no existing slot is found, reuse slots in FIFO order.
* } */*ap)
{
int error;
+ nfsnode_t np;
+ uint64_t supported_attrs;
struct nfs_vattr nva;
struct vnode_attr *vap = ap->a_vap;
struct nfsmount *nmp;
/* Return the io size no matter what, since we don't go over the wire for this */
VATTR_RETURN(vap, va_iosize, nfs_iosize);
- if ((vap->va_active & NFS3_SUPPORTED_VATTRS) == 0) {
+ supported_attrs = NFS3_SUPPORTED_VATTRS;
+
+ if ((vap->va_active & supported_attrs) == 0) {
return 0;
}
(uint64_t)VM_KERNEL_ADDRPERM(ap->a_vp),
ap->a_vp->v_name ? ap->a_vp->v_name : "empty");
}
+
+ /*
+ * We should not go over the wire if only fileid was requested and has ever been populated.
+ */
+ if ((vap->va_active & supported_attrs) == VNODE_ATTR_va_fileid) {
+ np = VTONFS(ap->a_vp);
+ if (np->n_attrstamp) {
+ VATTR_RETURN(vap, va_fileid, np->n_vattr.nva_fileid);
+ return 0;
+ }
+ }
+
error = nfs_getattr(VTONFS(ap->a_vp), &nva, ap->a_context, NGA_CACHED);
if (error) {
return error;
out:
nfs_node_lock_force(np);
np->n_wrbusy--;
+ if ((ioflag & IO_SYNC) && !np->n_wrbusy && !np->n_numoutput) {
+ np->n_flag &= ~NMODIFIED;
+ }
nfs_node_unlock(np);
nfs_data_unlock(np);
FSDBG_BOT(515, np, uio_offset(uio), uio_resid(uio), error);
nfsnode_t dnp = VTONFS(dvp);
struct nfsmount *nmp;
uio_t uio = ap->a_uio;
- int error, nfsvers, extended, numdirent, bigcookies, ptc, done;
+ int error, nfsvers, extended, numdirent, bigcookies, ptc, done, attrcachetimeout;
uint16_t i, iptc, rlen, nlen;
uint64_t cookie, nextcookie, lbn = 0;
struct nfsbuf *bp = NULL;
struct direntry *dp, *dpptc;
struct dirent dent;
char *cp = NULL;
+ struct timeval now;
thread_t thd;
nmp = VTONMP(dvp);
}
}
+ if (dnp->n_rdirplusstamp_eof && dnp->n_rdirplusstamp_sof) {
+ attrcachetimeout = nfs_attrcachetimeout(dnp);
+ microuptime(&now);
+ if (attrcachetimeout && (now.tv_sec - dnp->n_rdirplusstamp_sof > attrcachetimeout - 1)) {
+ dnp->n_rdirplusstamp_eof = dnp->n_rdirplusstamp_sof = 0;
+ nfs_invaldir(dnp);
+ nfs_node_unlock(dnp);
+ error = nfs_vinvalbuf(dvp, 0, ctx, 1);
+ if (!error) {
+ error = nfs_node_lock(dnp);
+ }
+ if (error) {
+ goto out;
+ }
+ }
+ }
+
/*
* check for need to invalidate when (re)starting at beginning
*/
struct nfsbuflists blist;
daddr64_t lbn, nextlbn;
int dotunder = (cnp->cn_namelen > 2) && (cnp->cn_nameptr[0] == '.') && (cnp->cn_nameptr[1] == '_');
+ int isdot = (cnp->cn_namelen == 1) && (cnp->cn_nameptr[0] == '.');
+ int isdotdot = (cnp->cn_namelen == 2) && (cnp->cn_nameptr[0] == '.') && (cnp->cn_nameptr[1] == '.');
nmp = NFSTONMP(dnp);
if (nfs_mount_gone(nmp)) {
*npp = NULL;
}
+ if (isdot || isdotdot) {
+ return 0;
+ }
+
/* first check most recent buffer (and next one too) */
lbn = dnp->n_lastdbl;
for (i = 0; i < 2; i++) {
if (rdirplus) {
microuptime(&now);
+ if (lastcookie == 0) {
+ dnp->n_rdirplusstamp_sof = now.tv_sec;
+ dnp->n_rdirplusstamp_eof = 0;
+ }
}
/* loop through the entries packing them into the buffer */
}
*(time_t*)(&dp->d_name[dp->d_namlen + 1 + fhlen]) = now.tv_sec;
dp->d_reclen = reclen;
+ nfs_rdirplus_update_node_attrs(dnp, dp, &fh, nvattrp, &savedxid);
}
padstart = dp->d_name + dp->d_namlen + 1 + xlen;
ndbhp->ndbh_count++;
ndbhp->ndbh_flags |= (NDB_FULL | NDB_EOF);
nfs_node_lock_force(dnp);
dnp->n_eofcookie = lastcookie;
+ if (rdirplus) {
+ dnp->n_rdirplusstamp_eof = now.tv_sec;
+ }
nfs_node_unlock(dnp);
} else {
more_entries = 1;
vnode_notify(NFSTOV(np), events, vap);
}
+#endif /* CONFIG_NFS_CLIENT */
#ifdef __APPLE_API_PRIVATE
+#include <nfs/nfs_conf.h>
+
int nfsm_rpchead(struct nfsreq *, mbuf_t, u_int64_t *, mbuf_t *);
int nfsm_rpchead2(struct nfsmount *, int, int, int, int, int, kauth_cred_t, struct nfsreq *, mbuf_t, u_int64_t *, mbuf_t *);
int nfsm_chain_get_wcc_data_f(struct nfsm_chain *, nfsnode_t, struct timespec *, int *, u_int64_t *);
int nfsm_chain_get_secinfo(struct nfsm_chain *, uint32_t *, int *);
-#if NFSSERVER
+#if CONFIG_NFS_SERVER
void nfsm_adj(mbuf_t, int, int);
int nfsm_mbuf_get_list(size_t, mbuf_t *, int *);
int nfsm_chain_get_path_namei(struct nfsm_chain *, uint32_t, struct nameidata *);
int nfsm_chain_get_sattr(struct nfsrv_descript *, struct nfsm_chain *, struct vnode_attr *);
int nfsm_chain_trim_data(struct nfsm_chain *, int, int *);
-#endif /* NFSSERVER */
+#endif /* CONFIG_NFS_SERVER */
/* check name length */
#define nfsm_name_len_check(E, ND, LEN) \
u_int8_t n_access[NFS_ACCESS_CACHE_SIZE + 1]; /* ACCESS cache */
uid_t n_accessuid[NFS_ACCESS_CACHE_SIZE]; /* credentials having access */
time_t n_accessstamp[NFS_ACCESS_CACHE_SIZE]; /* access cache timestamp */
+ time_t n_rdirplusstamp_sof; /* Readdirplus sof timestamp */
+ time_t n_rdirplusstamp_eof; /* Readdirplus eof timestamp */
union {
struct {
struct timespec n3_mtime; /* Prev modify time. */
#ifndef _FD_SET
#define _FD_SET
-#include <machine/types.h> /* __int32_t */
+#include <machine/types.h> /* __int32_t and uintptr_t */
+#if !KERNEL
+#include <Availability.h>
+#endif
/*
* Select uses bit masks of file descriptors in longs. These macros
typedef struct fd_set {
__int32_t fds_bits[__DARWIN_howmany(__DARWIN_FD_SETSIZE, __DARWIN_NFDBITS)];
} fd_set;
+
+#if !KERNEL
+int __darwin_check_fd_set_overflow(int, const void *, int) __attribute__((__weak_import__));
+#endif
__END_DECLS
+#if !KERNEL
+__header_always_inline int
+__darwin_check_fd_set(int _a, const void *_b)
+{
+ if ((uintptr_t)&__darwin_check_fd_set_overflow != (uintptr_t) 0) {
+#if defined(_DARWIN_UNLIMITED_SELECT) || defined(_DARWIN_C_SOURCE)
+ return __darwin_check_fd_set_overflow(_a, _b, 1);
+#else
+ return __darwin_check_fd_set_overflow(_a, _b, 0);
+#endif
+ } else {
+ return 1;
+ }
+}
+
/* This inline avoids argument side-effect issues with FD_ISSET() */
-static __inline int
-__darwin_fd_isset(int _n, const struct fd_set *_p)
+__header_always_inline int
+__darwin_fd_isset(int _fd, const struct fd_set *_p)
+{
+ if (__darwin_check_fd_set(_fd, (const void *) _p)) {
+ return _p->fds_bits[(unsigned long)_fd / __DARWIN_NFDBITS] & ((__int32_t)(((unsigned long)1) << ((unsigned long)_fd % __DARWIN_NFDBITS)));
+ }
+
+ return 0;
+}
+
+__header_always_inline void
+__darwin_fd_set(int _fd, struct fd_set *const _p)
+{
+ if (__darwin_check_fd_set(_fd, (const void *) _p)) {
+ (_p->fds_bits[(unsigned long)_fd / __DARWIN_NFDBITS] |= ((__int32_t)(((unsigned long)1) << ((unsigned long)_fd % __DARWIN_NFDBITS))));
+ }
+}
+
+__header_always_inline void
+__darwin_fd_clr(int _fd, struct fd_set *const _p)
+{
+ if (__darwin_check_fd_set(_fd, (const void *) _p)) {
+ (_p->fds_bits[(unsigned long)_fd / __DARWIN_NFDBITS] &= ~((__int32_t)(((unsigned long)1) << ((unsigned long)_fd % __DARWIN_NFDBITS))));
+ }
+}
+
+#else /* KERNEL */
+
+__header_always_inline int
+__darwin_fd_isset(int _fd, const struct fd_set *_p)
+{
+ return _p->fds_bits[(unsigned long)_fd / __DARWIN_NFDBITS] & ((__int32_t)(((unsigned long)1) << ((unsigned long)_fd % __DARWIN_NFDBITS)));
+}
+
+__header_always_inline void
+__darwin_fd_set(int _fd, struct fd_set *const _p)
+{
+ (_p->fds_bits[(unsigned long)_fd / __DARWIN_NFDBITS] |= ((__int32_t)(((unsigned long)1) << ((unsigned long)_fd % __DARWIN_NFDBITS))));
+}
+
+__header_always_inline void
+__darwin_fd_clr(int _fd, struct fd_set *const _p)
{
- return _p->fds_bits[(unsigned long)_n / __DARWIN_NFDBITS] & ((__int32_t)(((unsigned long)1) << ((unsigned long)_n % __DARWIN_NFDBITS)));
+ (_p->fds_bits[(unsigned long)_fd / __DARWIN_NFDBITS] &= ~((__int32_t)(((unsigned long)1) << ((unsigned long)_fd % __DARWIN_NFDBITS))));
}
+#endif /* KERNEL */
-#define __DARWIN_FD_SET(n, p) do { int __fd = (n); ((p)->fds_bits[(unsigned long)__fd/__DARWIN_NFDBITS] |= ((__int32_t)(((unsigned long)1)<<((unsigned long)__fd % __DARWIN_NFDBITS)))); } while(0)
-#define __DARWIN_FD_CLR(n, p) do { int __fd = (n); ((p)->fds_bits[(unsigned long)__fd/__DARWIN_NFDBITS] &= ~((__int32_t)(((unsigned long)1)<<((unsigned long)__fd % __DARWIN_NFDBITS)))); } while(0)
+#define __DARWIN_FD_SET(n, p) __darwin_fd_set((n), (p))
+#define __DARWIN_FD_CLR(n, p) __darwin_fd_clr((n), (p))
#define __DARWIN_FD_ISSET(n, p) __darwin_fd_isset((n), (p))
#if __GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ >= 3
#define ATTR_CMNEXT_NOFIRMLINKPATH 0x00000020
#define ATTR_CMNEXT_REALDEVID 0x00000040
#define ATTR_CMNEXT_REALFSID 0x00000080
+#define ATTR_CMNEXT_CLONEID 0x00000100
+#define ATTR_CMNEXT_EXT_FLAGS 0x00000200
-#define ATTR_CMNEXT_VALIDMASK 0x000000fc
+#define ATTR_CMNEXT_VALIDMASK 0x000003fc
#define ATTR_CMNEXT_SETMASK 0x00000000
/* Deprecated fork attributes */
} dtrace_module_symbols_t;
#define DTRACE_MODULE_SYMBOLS_SIZE(count) (sizeof(dtrace_module_symbols_t) + ((count - 1) * sizeof(dtrace_symbol_t)))
+#define DTRACE_MODULE_SYMBOLS_COUNT(size) ((size - sizeof(dtrace_module_symbols_t)) / sizeof(dtrace_symbol_t) + 1)
typedef struct dtrace_module_uuids_list {
uint64_t dtmul_count;
uint64_t ip_dyld_fsid;
uint64_t ip_dyld_fsobjid;
unsigned int ip_simulator_binary; /* simulator binary flags */
+
+ ipc_port_t ip_sc_port; /* SUID port. */
};
/*
/* The Kernel Debug Sub Classes for DBG_MISC */
#define DBG_MISC_COREBRIGHTNESS 0x01
+#define DBG_MISC_VIDEOENG 0x02
#define DBG_EVENT 0x10
#define DBG_MISC_INSTRUMENTS 0x11
#define DBG_MISC_INSTRUMENTSBT 0x12
#ifdef XNU_KERNEL_PRIVATE
extern unsigned long freeze_threshold_percentage;
-extern unsigned int memorystatus_frozen_count;
+extern unsigned int memorystatus_frozen_count; /* # of processes that are currently frozen. */
extern unsigned int memorystatus_frozen_processes_max;
extern unsigned int memorystatus_frozen_shared_mb;
extern unsigned int memorystatus_frozen_shared_mb_max;
extern unsigned int memorystatus_freeze_shared_mb_per_process_max; /* Max. MB allowed per process to be freezer-eligible. */
extern unsigned int memorystatus_freeze_private_shared_pages_ratio; /* Ratio of private:shared pages for a process to be freezer-eligible. */
extern unsigned int memorystatus_suspended_count;
-extern unsigned int memorystatus_thaw_count;
+extern unsigned int memorystatus_thaw_count; /* # of processes that have been thawed in the current interval. */
extern unsigned int memorystatus_refreeze_eligible_count; /* # of processes currently thawed i.e. have state on disk & in-memory */
void memorystatus_freeze_init(void);
#define __KPI_MBUF__
#include <sys/kernel_types.h>
#include <mach/vm_types.h>
+
+#ifndef PRIVATE
+#include <Availability.h>
+#define __NKE_API_DEPRECATED __API_DEPRECATED("Network Kernel Extension KPI is deprecated", macos(10.4, 10.15.4))
+#else
+#define __NKE_API_DEPRECATED
+#endif /* PRIVATE */
+
#ifdef KERNEL_PRIVATE
#include <mach/kern_return.h>
#endif /* KERNEL_PRIVATE */
* @param mbuf The mbuf.
* @result A pointer to the data in the mbuf.
*/
-extern void *mbuf_data(mbuf_t mbuf);
+extern void *mbuf_data(mbuf_t mbuf)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_datastart
* @param mbuf The mbuf.
* @result A pointer to smallest possible value for data.
*/
-extern void *mbuf_datastart(mbuf_t mbuf);
+extern void *mbuf_datastart(mbuf_t mbuf)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_setdata
* @param len The new length of data in the mbuf.
* @result 0 on success, errno error on failure.
*/
-extern errno_t mbuf_setdata(mbuf_t mbuf, void *data, size_t len);
+extern errno_t mbuf_setdata(mbuf_t mbuf, void *data, size_t len)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_align_32
* data location.
* @result 0 on success, errno error on failure.
*/
-extern errno_t mbuf_align_32(mbuf_t mbuf, size_t len);
+extern errno_t mbuf_align_32(mbuf_t mbuf, size_t len)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_data_to_physical
* @result The 64 bit physical address of the mbuf data or NULL if ptr
* does not point to data stored in an mbuf.
*/
-extern addr64_t mbuf_data_to_physical(void *ptr);
+extern addr64_t mbuf_data_to_physical(void *ptr)
+__NKE_API_DEPRECATED;
/* Allocation */
* @param mbuf The mbuf.
* @result 0 on success, errno error on failure.
*/
-extern errno_t mbuf_get(mbuf_how_t how, mbuf_type_t type, mbuf_t *mbuf);
+extern errno_t mbuf_get(mbuf_how_t how, mbuf_type_t type, mbuf_t *mbuf)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_gethdr
* @param mbuf The mbuf.
* @result 0 on success, errno error on failure.
*/
-extern errno_t mbuf_gethdr(mbuf_how_t how, mbuf_type_t type, mbuf_t *mbuf);
+extern errno_t mbuf_gethdr(mbuf_how_t how, mbuf_type_t type, mbuf_t *mbuf)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_attachcluster
*/
extern errno_t mbuf_attachcluster(mbuf_how_t how, mbuf_type_t type,
mbuf_t *mbuf, caddr_t extbuf, void (*extfree)(caddr_t, u_int, caddr_t),
- size_t extsize, caddr_t extarg);
+ size_t extsize, caddr_t extarg)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_alloccluster
* In this case, the caller is advised to use 4096 bytes or
* smaller during subseqent requests.
*/
-extern errno_t mbuf_alloccluster(mbuf_how_t how, size_t *size, caddr_t *addr);
+extern errno_t mbuf_alloccluster(mbuf_how_t how, size_t *size, caddr_t *addr)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_freecluster
* @param addr The address of the cluster.
* @param size The actual size of the cluster.
*/
-extern void mbuf_freecluster(caddr_t addr, size_t size);
+extern void mbuf_freecluster(caddr_t addr, size_t size)
+__NKE_API_DEPRECATED;
#ifdef BSD_KERNEL_PRIVATE
/*
*/
extern errno_t mbuf_getcluster(mbuf_how_t how, mbuf_type_t type, size_t size,
mbuf_t *mbuf);
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_mclget
* will be freed. If you specify an mbuf value in *mbuf,
* mbuf_mclget will not free it.
*/
-extern errno_t mbuf_mclget(mbuf_how_t how, mbuf_type_t type, mbuf_t *mbuf);
+extern errno_t mbuf_mclget(mbuf_how_t how, mbuf_type_t type, mbuf_t *mbuf)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_allocpacket
* chunks requested
*/
extern errno_t mbuf_allocpacket(mbuf_how_t how, size_t packetlen,
- unsigned int * maxchunks, mbuf_t *mbuf);
+ unsigned int * maxchunks, mbuf_t *mbuf)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_allocpacket_list
* chunks requested
*/
extern errno_t mbuf_allocpacket_list(unsigned int numpkts, mbuf_how_t how,
- size_t packetlen, unsigned int * maxchunks, mbuf_t *mbuf);
-
+ size_t packetlen, unsigned int * maxchunks, mbuf_t *mbuf)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_getpacket
* @param mbuf Upon success, *mbuf will be a reference to the new mbuf.
* @result 0 on success, errno error on failure.
*/
-extern errno_t mbuf_getpacket(mbuf_how_t how, mbuf_t *mbuf);
+extern errno_t mbuf_getpacket(mbuf_how_t how, mbuf_t *mbuf)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_free
* @param mbuf The mbuf to free.
* @result The next mbuf in the chain.
*/
-extern mbuf_t mbuf_free(mbuf_t mbuf);
+extern mbuf_t mbuf_free(mbuf_t mbuf)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_freem
* @discussion Frees a chain of mbufs link through mnext.
* @param mbuf The first mbuf in the chain to free.
*/
-extern void mbuf_freem(mbuf_t mbuf);
+extern void mbuf_freem(mbuf_t mbuf)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_freem_list
* @param mbuf The first mbuf in the linked list to free.
* @result The number of mbufs freed.
*/
-extern int mbuf_freem_list(mbuf_t mbuf);
+extern int mbuf_freem_list(mbuf_t mbuf)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_leadingspace
* @param mbuf The mbuf.
* @result The number of unused bytes at the start of the mbuf.
*/
-extern size_t mbuf_leadingspace(const mbuf_t mbuf);
+extern size_t mbuf_leadingspace(const mbuf_t mbuf)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_trailingspace
* @param mbuf The mbuf.
* @result The number of unused bytes following the current data.
*/
-extern size_t mbuf_trailingspace(const mbuf_t mbuf);
+extern size_t mbuf_trailingspace(const mbuf_t mbuf)
+__NKE_API_DEPRECATED;
/* Manipulation */
* @result 0 upon success otherwise the errno error.
*/
extern errno_t mbuf_copym(const mbuf_t src, size_t offset, size_t len,
- mbuf_how_t how, mbuf_t *new_mbuf);
+ mbuf_how_t how, mbuf_t *new_mbuf)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_dup
* @param new_mbuf Upon success, the newly allocated mbuf.
* @result 0 upon success otherwise the errno error.
*/
-extern errno_t mbuf_dup(const mbuf_t src, mbuf_how_t how, mbuf_t *new_mbuf);
+extern errno_t mbuf_dup(const mbuf_t src, mbuf_how_t how, mbuf_t *new_mbuf)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_prepend
* @param how Blocking or non-blocking.
* @result 0 upon success otherwise the errno error.
*/
-extern errno_t mbuf_prepend(mbuf_t *mbuf, size_t len, mbuf_how_t how);
+extern errno_t mbuf_prepend(mbuf_t *mbuf, size_t len, mbuf_how_t how)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_split
* preserved.
*/
extern errno_t mbuf_split(mbuf_t src, size_t offset, mbuf_how_t how,
- mbuf_t *new_mbuf);
+ mbuf_t *new_mbuf)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_pullup
* @result 0 upon success otherwise the errno error. In the case of an
* error, the mbuf chain has been freed.
*/
-extern errno_t mbuf_pullup(mbuf_t *mbuf, size_t len);
+extern errno_t mbuf_pullup(mbuf_t *mbuf, size_t len)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_pulldown
* @result 0 upon success otherwise the errno error.
*/
extern errno_t mbuf_pulldown(mbuf_t src, size_t *offset, size_t length,
- mbuf_t *location);
+ mbuf_t *location)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_adj
* @param mbuf The mbuf chain to trim.
* @param len The number of bytes to trim from the mbuf chain.
*/
-extern void mbuf_adj(mbuf_t mbuf, int len);
+extern void mbuf_adj(mbuf_t mbuf, int len)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_adjustlen
* @param amount The number of bytes increment the length by.
* @result 0 upon success otherwise the errno error.
*/
-extern errno_t mbuf_adjustlen(mbuf_t mbuf, int amount);
+extern errno_t mbuf_adjustlen(mbuf_t mbuf, int amount)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_concatenate
* chain. Otherwise it returns NULL if the original dst mbuf
* chain is NULL.
*/
-extern mbuf_t mbuf_concatenate(mbuf_t dst, mbuf_t src);
+extern mbuf_t mbuf_concatenate(mbuf_t dst, mbuf_t src)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_copydata
* @result 0 upon success otherwise the errno error.
*/
extern errno_t mbuf_copydata(const mbuf_t mbuf, size_t offset, size_t length,
- void *out_data);
+ void *out_data)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_copyback
* @result 0 upon success, EINVAL or ENOBUFS upon failure.
*/
extern errno_t mbuf_copyback(mbuf_t mbuf, size_t offset, size_t length,
- const void *data, mbuf_how_t how);
+ const void *data, mbuf_how_t how)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_mclhasreference
* @param mbuf The mbuf with the cluster to test.
* @result 0 if there is no reference by another mbuf, 1 otherwise.
*/
-extern int mbuf_mclhasreference(mbuf_t mbuf);
+extern int mbuf_mclhasreference(mbuf_t mbuf)
+__NKE_API_DEPRECATED;
/* mbuf header */
* @param mbuf The mbuf.
* @result The next mbuf in the chain.
*/
-extern mbuf_t mbuf_next(const mbuf_t mbuf);
+extern mbuf_t mbuf_next(const mbuf_t mbuf)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_setnext
* @param next The new next mbuf.
* @result 0 upon success otherwise the errno error.
*/
-extern errno_t mbuf_setnext(mbuf_t mbuf, mbuf_t next);
+extern errno_t mbuf_setnext(mbuf_t mbuf, mbuf_t next)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_nextpkt
* @param mbuf The mbuf.
* @result The nextpkt.
*/
-extern mbuf_t mbuf_nextpkt(const mbuf_t mbuf);
+extern mbuf_t mbuf_nextpkt(const mbuf_t mbuf)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_setnextpkt
* @param mbuf The mbuf.
* @param nextpkt The new next packet.
*/
-extern void mbuf_setnextpkt(mbuf_t mbuf, mbuf_t nextpkt);
+extern void mbuf_setnextpkt(mbuf_t mbuf, mbuf_t nextpkt)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_len
* @param mbuf The mbuf.
* @result The length.
*/
-extern size_t mbuf_len(const mbuf_t mbuf);
+extern size_t mbuf_len(const mbuf_t mbuf)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_setlen
* @param mbuf The mbuf.
* @param len The new length.
*/
-extern void mbuf_setlen(mbuf_t mbuf, size_t len);
+extern void mbuf_setlen(mbuf_t mbuf, size_t len)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_maxlen
* @param mbuf The mbuf.
* @result The maximum lenght of data for this mbuf.
*/
-extern size_t mbuf_maxlen(const mbuf_t mbuf);
+extern size_t mbuf_maxlen(const mbuf_t mbuf)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_type
* @param mbuf The mbuf.
* @result The type.
*/
-extern mbuf_type_t mbuf_type(const mbuf_t mbuf);
+extern mbuf_type_t mbuf_type(const mbuf_t mbuf)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_settype
* @param new_type The new type.
* @result 0 upon success otherwise the errno error.
*/
-extern errno_t mbuf_settype(mbuf_t mbuf, mbuf_type_t new_type);
+extern errno_t mbuf_settype(mbuf_t mbuf, mbuf_type_t new_type)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_flags
* @param mbuf The mbuf.
* @result The flags.
*/
-extern mbuf_flags_t mbuf_flags(const mbuf_t mbuf);
+extern mbuf_flags_t mbuf_flags(const mbuf_t mbuf)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_setflags
* cleared. Certain flags such as MBUF_EXT cannot be altered.
* @result 0 upon success otherwise the errno error.
*/
-extern errno_t mbuf_setflags(mbuf_t mbuf, mbuf_flags_t flags);
+extern errno_t mbuf_setflags(mbuf_t mbuf, mbuf_flags_t flags)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_setflags_mask
* @result 0 upon success otherwise the errno error.
*/
extern errno_t mbuf_setflags_mask(mbuf_t mbuf, mbuf_flags_t flags,
- mbuf_flags_t mask);
+ mbuf_flags_t mask)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_copy_pkthdr
* @param dest The mbuf to which the packet header will be copied.
* @result 0 upon success otherwise the errno error.
*/
-extern errno_t mbuf_copy_pkthdr(mbuf_t dest, const mbuf_t src);
+extern errno_t mbuf_copy_pkthdr(mbuf_t dest, const mbuf_t src)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_pkthdr_len
* @param mbuf The mbuf containing the packet header
* @result The length, in bytes, of the packet.
*/
-extern size_t mbuf_pkthdr_len(const mbuf_t mbuf);
+extern size_t mbuf_pkthdr_len(const mbuf_t mbuf)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_pkthdr_setlen
* @param mbuf The mbuf containing the packet header.
* @param len The new length of the packet.
*/
-extern void mbuf_pkthdr_setlen(mbuf_t mbuf, size_t len);
+extern void mbuf_pkthdr_setlen(mbuf_t mbuf, size_t len)
+__NKE_API_DEPRECATED;
#ifdef XNU_KERNEL_PRIVATE
/*!
* @param amount The number of bytes to adjust the packet header length
* field by.
*/
-extern void mbuf_pkthdr_adjustlen(mbuf_t mbuf, int amount);
+extern void mbuf_pkthdr_adjustlen(mbuf_t mbuf, int amount)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_pkthdr_rcvif
* @param mbuf The mbuf containing the packet header.
* @result A reference to the interface.
*/
-extern ifnet_t mbuf_pkthdr_rcvif(const mbuf_t mbuf);
+extern ifnet_t mbuf_pkthdr_rcvif(const mbuf_t mbuf)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_pkthdr_setrcvif
* @param ifp A reference to an interface.
* @result 0 upon success otherwise the errno error.
*/
-extern errno_t mbuf_pkthdr_setrcvif(mbuf_t mbuf, ifnet_t ifp);
+extern errno_t mbuf_pkthdr_setrcvif(mbuf_t mbuf, ifnet_t ifp)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_pkthdr_header
* @param mbuf The mbuf containing the packet header.
* @result A pointer to the packet header.
*/
-extern void *mbuf_pkthdr_header(const mbuf_t mbuf);
+extern void *mbuf_pkthdr_header(const mbuf_t mbuf)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_pkthdr_setheader
* @param mbuf The mbuf containing the packet header.
* @param header A pointer to the header.
*/
-extern void mbuf_pkthdr_setheader(mbuf_t mbuf, void *header);
+extern void mbuf_pkthdr_setheader(mbuf_t mbuf, void *header)
+__NKE_API_DEPRECATED;
/* Checksums */
* original checksum was valid.
* @param mbuf The mbuf that has been modified.
*/
-extern void mbuf_inbound_modified(mbuf_t mbuf);
+extern void mbuf_inbound_modified(mbuf_t mbuf)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_outbound_finalize
* would be the length of an ethernet header.
*/
extern void mbuf_outbound_finalize(mbuf_t mbuf, u_int32_t protocol_family,
- size_t protocol_offset);
+ size_t protocol_offset)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_set_vlan_tag
* @param vlan The protocol family of the aux data to add.
* @result 0 upon success otherwise the errno error.
*/
-extern errno_t mbuf_set_vlan_tag(mbuf_t mbuf, u_int16_t vlan);
+extern errno_t mbuf_set_vlan_tag(mbuf_t mbuf, u_int16_t vlan)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_get_vlan_tag
* @result 0 upon success otherwise the errno error. ENXIO indicates
* that the vlan tag is not set.
*/
-extern errno_t mbuf_get_vlan_tag(mbuf_t mbuf, u_int16_t *vlan);
+extern errno_t mbuf_get_vlan_tag(mbuf_t mbuf, u_int16_t *vlan)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_clear_vlan_tag
* @param mbuf The mbuf containing the packet.
* @result 0 upon success otherwise the errno error.
*/
-extern errno_t mbuf_clear_vlan_tag(mbuf_t mbuf);
+extern errno_t mbuf_clear_vlan_tag(mbuf_t mbuf)
+__NKE_API_DEPRECATED;
#ifdef KERNEL_PRIVATE
/*!
* @result 0 upon success otherwise the errno error.
*/
extern errno_t mbuf_get_csum_requested(mbuf_t mbuf,
- mbuf_csum_request_flags_t *request, u_int32_t *value);
+ mbuf_csum_request_flags_t *request, u_int32_t *value)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_get_tso_requested
* @result 0 upon success otherwise the errno error.
*/
extern errno_t mbuf_get_tso_requested(mbuf_t mbuf,
- mbuf_tso_request_flags_t *request, u_int32_t *value);
+ mbuf_tso_request_flags_t *request, u_int32_t *value)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_clear_csum_requested
* @param mbuf The mbuf containing the packet.
* @result 0 upon success otherwise the errno error.
*/
-extern errno_t mbuf_clear_csum_requested(mbuf_t mbuf);
+extern errno_t mbuf_clear_csum_requested(mbuf_t mbuf)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_set_csum_performed
* @result 0 upon success otherwise the errno error.
*/
extern errno_t mbuf_set_csum_performed(mbuf_t mbuf,
- mbuf_csum_performed_flags_t flags, u_int32_t value);
+ mbuf_csum_performed_flags_t flags, u_int32_t value)
+__NKE_API_DEPRECATED;
#ifdef KERNEL_PRIVATE
/*
* legacy MLEN macro.
* @result The number of bytes of available data.
*/
-extern u_int32_t mbuf_get_mlen(void);
+extern u_int32_t mbuf_get_mlen(void)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_get_mhlen
* header mbuf. This is equivalent to the legacy MHLEN macro.
* @result The number of bytes of available data.
*/
-extern u_int32_t mbuf_get_mhlen(void);
+extern u_int32_t mbuf_get_mhlen(void)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_get_minclsize
* legacy MINCLSIZE macro.
* @result The minimum number of bytes before a cluster will be used.
*/
-extern u_int32_t mbuf_get_minclsize(void);
+extern u_int32_t mbuf_get_minclsize(void)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_clear_csum_performed
* @param mbuf The mbuf containing the packet.
* @result 0 upon success otherwise the errno error.
*/
-extern errno_t mbuf_clear_csum_performed(mbuf_t mbuf);
+extern errno_t mbuf_clear_csum_performed(mbuf_t mbuf)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_inet_cksum
* @result 0 upon success otherwise the errno error.
*/
extern errno_t mbuf_inet_cksum(mbuf_t mbuf, int protocol, u_int32_t offset,
- u_int32_t length, u_int16_t *csum);
+ u_int32_t length, u_int16_t *csum)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_inet6_cksum
* @result 0 upon success otherwise the errno error.
*/
extern errno_t mbuf_inet6_cksum(mbuf_t mbuf, int protocol, u_int32_t offset,
- u_int32_t length, u_int16_t *csum);
+ u_int32_t length, u_int16_t *csum)
+__NKE_API_DEPRECATED;
/* mbuf tags */
* @result 0 upon success otherwise the errno error.
*/
extern errno_t mbuf_tag_id_find(const char *module_string,
- mbuf_tag_id_t *module_id);
+ mbuf_tag_id_t *module_id)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_tag_allocate
* @result 0 upon success otherwise the errno error.
*/
extern errno_t mbuf_tag_allocate(mbuf_t mbuf, mbuf_tag_id_t module_id,
- mbuf_tag_type_t type, size_t length, mbuf_how_t how, void **data_p);
+ mbuf_tag_type_t type, size_t length, mbuf_how_t how, void **data_p)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_tag_find
* @result 0 upon success otherwise the errno error.
*/
extern errno_t mbuf_tag_find(mbuf_t mbuf, mbuf_tag_id_t module_id,
- mbuf_tag_type_t type, size_t *length, void **data_p);
+ mbuf_tag_type_t type, size_t *length, void **data_p)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_tag_free
* @param type The type of the tag to free.
*/
extern void mbuf_tag_free(mbuf_t mbuf, mbuf_tag_id_t module_id,
- mbuf_tag_type_t type);
+ mbuf_tag_type_t type)
+__NKE_API_DEPRECATED;
#ifdef KERNEL_PRIVATE
/*!
* @discussion Get the mbuf statistics.
* @param stats Storage to copy the stats in to.
*/
-extern void mbuf_stats(struct mbuf_stat *stats);
+extern void mbuf_stats(struct mbuf_stat *stats)
+__NKE_API_DEPRECATED;
/*!
* @param mbuf The mbuf to get the traffic class of.
* @result The traffic class
*/
-extern mbuf_traffic_class_t mbuf_get_traffic_class(mbuf_t mbuf);
+extern mbuf_traffic_class_t mbuf_get_traffic_class(mbuf_t mbuf)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_set_traffic_class
* @param tc The traffic class
* @result 0 on success, EINVAL if bad parameter is passed
*/
-extern errno_t mbuf_set_traffic_class(mbuf_t mbuf, mbuf_traffic_class_t tc);
+extern errno_t mbuf_set_traffic_class(mbuf_t mbuf, mbuf_traffic_class_t tc)
+__NKE_API_DEPRECATED;
/*!
* @function mbuf_is_traffic_class_privileged
* @param mbuf The mbuf to retrieve the status from.
* @result Non-zero if privileged, 0 otherwise.
*/
-extern int mbuf_is_traffic_class_privileged(mbuf_t mbuf);
+extern int mbuf_is_traffic_class_privileged(mbuf_t mbuf)
+__NKE_API_DEPRECATED;
#ifdef KERNEL_PRIVATE
#define MNTK_SWAP_MOUNT 0x00000100 /* we are swapping to this mount */
#define MNTK_DENY_READDIREXT 0x00000200 /* Deny Extended-style readdir's for this volume */
#define MNTK_PERMIT_UNMOUNT 0x00000400 /* Allow (non-forced) unmounts by UIDs other than the one that mounted the volume */
-#ifdef NFSCLIENT
#define MNTK_TYPENAME_OVERRIDE 0x00000800 /* override the fstypename for statfs() */
-#endif /* NFSCLIENT */
#define MNTK_KERNEL_MOUNT 0x00001000 /* mount came from kernel side */
#ifdef CONFIG_IMGSRC_ACCESS
#define MNTK_HAS_MOVED 0x00002000
/* this routine returns the pid of the parent of the current process */
extern int proc_selfppid(void);
/* this routine returns the csflags of the current process */
-extern int proc_selfcsflags(void);
+extern uint64_t proc_selfcsflags(void);
+/* this routine populates the given flags param with the csflags of the given process. Returns 0 on success, -1 on error. */
+extern int proc_csflags(proc_t p, uint64_t* flags);
/* this routine returns sends a signal signum to the process identified by the pid */
extern void proc_signal(int pid, int signum);
/* this routine checks whether any signal identified by the mask are pending in the process identified by the pid. The check is on all threads of the process. */
extern int proc_tbe(proc_t);
+/*!
+ * @function proc_gettty
+ * @abstract Copies the associated tty vnode for a given process if it exists. The caller needs to decrement the iocount of the vnode.
+ * @return 0 on success. ENOENT if the process has no associated TTY. EINVAL if arguments are NULL or vnode_getwithvid fails.
+ */
+extern int proc_gettty(proc_t p, vnode_t *vp);
+
+/* this routine populates the associated tty device for a given process if it exists, returns 0 on success or else returns EINVAL */
+extern int proc_gettty_dev(proc_t p, dev_t *dev);
+
/*!
* @function proc_selfpgrpid
* @abstract Get the process group id for the current process, as with proc_pgrpid().
pid_t e_pid; /* pid of the effective owner */
u_int64_t e_upid; /* upid of the effective owner */
+#if defined(XNU_TARGET_OS_OSX)
+ pid_t so_rpid; /* pid of the responsible process */
+#endif
uuid_t last_uuid; /* uuid of most recent accessor */
uuid_t e_uuid; /* uuid of effective owner */
uuid_t so_vuuid; /* UUID of the Voucher originator */
+#if defined(XNU_TARGET_OS_OSX)
+ uuid_t so_ruuid; /* UUID of the responsible process */
+#endif
int32_t so_policy_gencnt; /* UUID policy gencnt */
PSPA_AU_SESSION = 2,
PSPA_IMP_WATCHPORTS = 3,
PSPA_REGISTERED_PORTS = 4,
+ PSPA_SUID_CRED = 6,
} pspa_t;
/*
#endif
#endif
+/*
+ * Extended flags ("EF") returned by ATTR_CMNEXT_EXT_FLAGS from getattrlist/getattrlistbulk
+ */
+#define EF_MAY_SHARE_BLOCKS 0x00000001 /* file may share blocks with another file */
+#define EF_NO_XATTRS 0x00000002 /* file has no xattrs at all */
+#define EF_IS_SYNC_ROOT 0x00000004 /* file is a sync root for iCloud */
+#define EF_IS_PURGEABLE 0x00000008 /* file is purgeable */
+#define EF_IS_SPARSE 0x00000010 /* file has at least one sparse region */
+
+
+
#ifndef KERNEL
__BEGIN_DECLS
#define VNODE_ATTR_va_fsid64 (1LL<<41) /* 20000000000 */
#define VNODE_ATTR_va_write_gencount (1LL<<42) /* 40000000000 */
#define VNODE_ATTR_va_private_size (1LL<<43) /* 80000000000 */
+#define VNODE_ATTR_va_clone_id (1LL<<44) /* 100000000000 */
+#define VNODE_ATTR_va_extflags (1LL<<45) /* 200000000000 */
#define VNODE_ATTR_BIT(n) (VNODE_ATTR_ ## n)
VNODE_ATTR_BIT(va_rsrc_alloc) | \
VNODE_ATTR_BIT(va_fsid64) | \
VNODE_ATTR_BIT(va_write_gencount) | \
- VNODE_ATTR_BIT(va_private_size))
+ VNODE_ATTR_BIT(va_private_size) | \
+ VNODE_ATTR_BIT(va_clone_id) | \
+ VNODE_ATTR_BIT(va_extflags))
/*
* Read-only attributes.
VNODE_ATTR_BIT(va_rsrc_length) | \
VNODE_ATTR_BIT(va_rsrc_alloc) | \
VNODE_ATTR_BIT(va_fsid64) | \
- VNODE_ATTR_BIT(va_write_gencount) | \
- VNODE_ATTR_BIT(va_private_size))
+ VNODE_ATTR_BIT(va_write_gencount) | \
+ VNODE_ATTR_BIT(va_private_size) | \
+ VNODE_ATTR_BIT(va_clone_id) | \
+ VNODE_ATTR_BIT(va_extflags))
+
/*
* Attributes that can be applied to a new file object.
*/
uint32_t va_write_gencount; /* counter that increments each time the file changes */
uint64_t va_private_size; /* If the file were deleted, how many bytes would be freed immediately */
+ uint64_t va_clone_id; /* If a file is cloned this is a unique id shared by all "perfect" clones */
+ uint64_t va_extflags; /* extended file/directory flags */
/* add new fields here only */
};
*/
int vn_authorize_unlink(vnode_t dvp, vnode_t vp, struct componentname *cnp, vfs_context_t ctx, void *reserved);
+
+/*!
+ * @function vn_authorize_rmdir
+ * @abstract Authorize an rmdir operation given the vfs_context_t
+ * @discussion Check if the context assocated with vfs_context_t is allowed to rmdir the vnode vp in directory dvp.
+ * @param dvp Parent vnode of the directory to be rmdir'ed
+ * @param vp The vnode to be rmdir'ed
+ * @param cnp A componentname containing the name of the file to be rmdir'ed. May be NULL.
+ * @param reserved Pass NULL
+ * @return returns zero if the operation is allowed, non-zero indicates the rmdir is not authorized.
+ */
+int vn_authorize_rmdir(vnode_t dvp, vnode_t vp, struct componentname *cnp, vfs_context_t ctx, void *reserved);
+
/*!
* @function vn_getpath_fsenter
* @abstract Attempt to get a vnode's path, willing to enter the filesystem.
#define VN_GETPATH_FSENTER 0x0001 /* Can re-enter filesystem */
#define VN_GETPATH_NO_FIRMLINK 0x0002
#define VN_GETPATH_VOLUME_RELATIVE 0x0004 /* also implies VN_GETPATH_NO_FIRMLINK */
+#define VN_GETPATH_NO_PROCROOT 0x0008 /* Give the non chrooted path for a process */
#endif /* KERNEL_PRIVATE */
#define BUILDPATH_CHECK_MOVED 0x4 /* Return EAGAIN if the parent hierarchy is modified */
#define BUILDPATH_VOLUME_RELATIVE 0x8 /* Return path relative to the nearest mount point */
#define BUILDPATH_NO_FIRMLINK 0x10 /* Return non-firmlinked path */
+#define BUILDPATH_NO_PROCROOT 0x20 /* Return path relative to system root, not the process root */
int build_path(vnode_t first_vp, char *buff, int buflen, int *outlen, int flags, vfs_context_t ctx);
int vn_authorize_renamex_with_paths(struct vnode *fdvp, struct vnode *fvp, struct componentname *fcnp, const char *from_path,
struct vnode *tdvp, struct vnode *tvp, struct componentname *tcnp, const char *to_path,
vfs_context_t ctx, vfs_rename_flags_t flags, void *reserved);
-int vn_authorize_rmdir(vnode_t dvp, vnode_t vp, struct componentname *cnp, vfs_context_t ctx, void *reserved);
typedef int (*vn_create_authorizer_t)(vnode_t, struct componentname *, struct vnode_attr *, vfs_context_t, void*);
int vn_authorize_mkdir(vnode_t, struct componentname *, struct vnode_attr *, vfs_context_t, void*);
{.attr = ATTR_CMNEXT_NOFIRMLINKPATH, .bits = 0, .size = sizeof(struct attrreference), .action = KAUTH_VNODE_READ_ATTRIBUTES},
{.attr = ATTR_CMNEXT_REALDEVID, .bits = VATTR_BIT(va_devid), .size = sizeof(uint32_t), .action = KAUTH_VNODE_READ_ATTRIBUTES},
{.attr = ATTR_CMNEXT_REALFSID, .bits = VATTR_BIT(va_fsid64), .size = sizeof(fsid_t), .action = KAUTH_VNODE_READ_ATTRIBUTES},
+ {.attr = ATTR_CMNEXT_CLONEID, .bits = VATTR_BIT(va_clone_id), .size = sizeof(uint64_t), .action = KAUTH_VNODE_READ_ATTRIBUTES},
+ {.attr = ATTR_CMNEXT_EXT_FLAGS, .bits = VATTR_BIT(va_extflags), .size = sizeof(uint64_t), .action = KAUTH_VNODE_READ_ATTRIBUTES},
{.attr = 0, .bits = 0, .size = 0, .action = 0}
};
#define VFS_DFLT_ATTR_CMN_EXT (ATTR_CMNEXT_PRIVATESIZE | ATTR_CMNEXT_LINKID | \
ATTR_CMNEXT_NOFIRMLINKPATH | ATTR_CMNEXT_REALDEVID | \
- ATTR_CMNEXT_REALFSID)
+ ATTR_CMNEXT_REALFSID | ATTR_CMNEXT_CLONEID | \
+ ATTR_CMNEXT_EXT_FLAGS)
#define VFS_DFLT_ATTR_DIR (ATTR_DIR_LINKCOUNT | ATTR_DIR_MOUNTSTATUS)
VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: could not allocate f_vol_name buffer");
goto out;
}
+ vs.f_vol_name[0] = '\0';
}
VFS_DEBUG(ctx, vp, "ATTRLIST - calling to get %016llx with supported %016llx", vs.f_active, vs.f_supported);
}
}
+ if (alp->forkattr & ATTR_CMNEXT_CLONEID) {
+ if (VATTR_IS_SUPPORTED(vap, va_clone_id)) {
+ ATTR_PACK8((*abp), vap->va_clone_id);
+ abp->actual.forkattr |= ATTR_CMNEXT_CLONEID;
+ } else if (!return_valid || pack_invalid) {
+ uint64_t zero_val = 0;
+ ATTR_PACK8((*abp), zero_val);
+ }
+ }
+
+ if (alp->forkattr & ATTR_CMNEXT_EXT_FLAGS) {
+ if (VATTR_IS_SUPPORTED(vap, va_extflags)) {
+ ATTR_PACK8((*abp), vap->va_extflags);
+ abp->actual.forkattr |= ATTR_CMNEXT_EXT_FLAGS;
+ } else if (!return_valid || pack_invalid) {
+ uint64_t zero_val = 0;
+ ATTR_PACK8((*abp), zero_val);
+ }
+ }
+
return 0;
}
/*
* Grab the process fd so we can evaluate fd_rdir.
*/
- if (vfs_context_proc(ctx)->p_fd) {
+ if (vfs_context_proc(ctx)->p_fd && !(flags & BUILDPATH_NO_PROCROOT)) {
proc_root_dir_vp = vfs_context_proc(ctx)->p_fd->fd_rdir;
} else {
proc_root_dir_vp = NULL;
#include <sys/mount_internal.h>
#include <sys/vnode_internal.h>
+#include <nfs/nfs_conf.h>
+
/*
* These define the root filesystem, device, and root filesystem type.
*/
*/
static struct vfstable vfstbllist[] = {
/* Sun-compatible Network Filesystem */
-#if NFSCLIENT
+#if CONFIG_NFS_CLIENT
{
.vfc_vfsops = &nfs_vfsops,
.vfc_name = "nfs",
.vfc_descsize = 0,
.vfc_sysctl = NULL
},
-#endif /* NFSCLIENT */
+#endif /* CONFIG_NFS_CLIENT */
/* Device Filesystem */
#if DEVFS
#if MFS
&mfs_vnodeop_opv_desc,
#endif
-#if NFSCLIENT
+#if CONFIG_NFS_CLIENT
&nfsv2_vnodeop_opv_desc,
&spec_nfsv2nodeop_opv_desc,
#if CONFIG_NFS4
&fifo_nfsv4nodeop_opv_desc,
#endif /* CONFIG_NFS4 */
#endif /* FIFO */
-#endif /* NFSCLIENT */
+#endif /* CONFIG_NFS_CLIENT */
#if DEVFS
&devfs_vnodeop_opv_desc,
&devfs_spec_vnodeop_opv_desc,
// forward prototype
static void release_event_ref(kfs_event *kfse);
-static int
+static boolean_t
watcher_cares_about_dev(fs_event_watcher *watcher, dev_t dev)
{
unsigned int i;
// if devices_not_to_watch is NULL then we care about all
// events from all devices
if (watcher->devices_not_to_watch == NULL) {
- return 1;
+ return true;
}
for (i = 0; i < watcher->num_devices; i++) {
if (dev == watcher->devices_not_to_watch[i]) {
// found a match! that means we do not
// want events from this device.
- return 0;
+ return false;
}
}
// if we're here it's not in the devices_not_to_watch[]
// list so that means we do care about it
- return 1;
+ return true;
}
break;
}
- if (watcher->event_list[kfse->type] == FSE_REPORT && watcher_cares_about_dev(watcher, kfse->dev)) {
- if (!(watcher->flags & WATCHER_APPLE_SYSTEM_SERVICE) && kfse->type != FSE_DOCID_CREATED && kfse->type != FSE_DOCID_CHANGED && is_ignored_directory(kfse->str)) {
- // If this is not an Apple System Service, skip specified directories
- // radar://12034844
- error = 0;
- skipped = 1;
+ if (watcher->event_list[kfse->type] == FSE_REPORT) {
+ boolean_t watcher_cares;
+
+ if (watcher->devices_not_to_watch == NULL) {
+ watcher_cares = true;
} else {
- skipped = 0;
- if (last_event_ptr == kfse) {
- last_event_ptr = NULL;
- last_event_type = -1;
- last_coalesced_time = 0;
- }
- error = copy_out_kfse(watcher, kfse, uio);
- if (error != 0) {
- // if an event won't fit or encountered an error while
- // we were copying it out, then backup to the last full
- // event and just bail out. if the error was ENOENT
- // then we can continue regular processing, otherwise
- // we should unlock things and return.
- uio_setresid(uio, last_full_event_resid);
- if (error != ENOENT) {
- lck_rw_unlock_shared(&event_handling_lock);
- error = 0;
- goto get_out;
+ lock_watch_table();
+ watcher_cares = watcher_cares_about_dev(watcher, kfse->dev);
+ unlock_watch_table();
+ }
+
+ if (watcher_cares) {
+ if (!(watcher->flags & WATCHER_APPLE_SYSTEM_SERVICE) && kfse->type != FSE_DOCID_CREATED && kfse->type != FSE_DOCID_CHANGED && is_ignored_directory(kfse->str)) {
+ // If this is not an Apple System Service, skip specified directories
+ // radar://12034844
+ error = 0;
+ skipped = 1;
+ } else {
+ skipped = 0;
+ if (last_event_ptr == kfse) {
+ last_event_ptr = NULL;
+ last_event_type = -1;
+ last_coalesced_time = 0;
+ }
+ error = copy_out_kfse(watcher, kfse, uio);
+ if (error != 0) {
+ // if an event won't fit or encountered an error while
+ // we were copying it out, then backup to the last full
+ // event and just bail out. if the error was ENOENT
+ // then we can continue regular processing, otherwise
+ // we should unlock things and return.
+ uio_setresid(uio, last_full_event_resid);
+ if (error != ENOENT) {
+ lck_rw_unlock_shared(&event_handling_lock);
+ error = 0;
+ goto get_out;
+ }
}
- }
- last_full_event_resid = uio_resid(uio);
+ last_full_event_resid = uio_resid(uio);
+ }
}
}
tdp = dp;
dp = tdp->v_mount->mnt_vnodecovered;
- vnode_put(tdp);
-
if ((vnode_getwithref(dp))) {
+ vnode_put(tdp);
dp = NULLVP;
error = ENOENT;
goto bad;
}
+
+ vnode_put(tdp);
+
ndp->ni_dvp = dp;
dp_authorized = 0;
}
#include <sys/lockf.h>
#include <miscfs/fifofs/fifo.h>
+#include <nfs/nfs_conf.h>
+
#include <string.h>
#include <machine/machine_routines.h>
* Clean out any buffers associated with the vnode.
*/
if (flags & DOCLOSE) {
-#if NFSCLIENT
+#if CONFIG_NFS_CLIENT
if (vp->v_tag == VT_NFS) {
nfs_vinvalbuf(vp, V_SAVE, ctx, 0);
} else
-#endif
+#endif /* CONFIG_NFS_CLIENT */
{
VNOP_FSYNC(vp, MNT_WAIT, ctx);
if (flags & VN_GETPATH_VOLUME_RELATIVE) {
bpflags |= (BUILDPATH_VOLUME_RELATIVE | BUILDPATH_NO_FIRMLINK);
}
+ if (flags & VN_GETPATH_NO_PROCROOT) {
+ bpflags |= BUILDPATH_NO_PROCROOT;
+ }
}
return build_path_with_parent(vp, dvp, pathbuf, *len, len, bpflags, vfs_context_current());
sfs.f_ffree = (user64_long_t)sp->f_ffree;
sfs.f_fsid = sp->f_fsid;
sfs.f_owner = sp->f_owner;
-#ifdef NFSCLIENT
+#ifdef CONFIG_NFS_CLIENT
if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
} else
-#endif
+#endif /* CONFIG_NFS_CLIENT */
{
strlcpy(sfs.f_fstypename, sp->f_fstypename, MFSNAMELEN);
}
sfs.f_fsid = sp->f_fsid;
sfs.f_owner = sp->f_owner;
-#ifdef NFSCLIENT
+#ifdef CONFIG_NFS_CLIENT
if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
} else
-#endif
+#endif /* CONFIG_NFS_CLIENT */
{
strlcpy(sfs.f_fstypename, sp->f_fstypename, MFSNAMELEN);
}
vp = *vpp;
old_error = error;
-#if CONFIG_MACF
- if (!(flags & VN_CREATE_NOLABEL)) {
- error = vnode_label(vnode_mount(vp), dvp, vp, cnp, VNODE_LABEL_CREATE, ctx);
- if (error) {
- goto error;
- }
- }
-#endif
-
/*
* If some of the requested attributes weren't handled by the VNOP,
* use our fallback code.
*/
- if (!VATTR_ALL_SUPPORTED(vap) && *vpp) {
+ if ((error == 0) && !VATTR_ALL_SUPPORTED(vap) && *vpp) {
KAUTH_DEBUG(" CREATE - doing fallback with ACL %p", vap->va_acl);
error = vnode_setattr_fallback(*vpp, vap, ctx);
}
+
#if CONFIG_MACF
-error:
+ if ((error == 0) && !(flags & VN_CREATE_NOLABEL)) {
+ error = vnode_label(vnode_mount(vp), dvp, vp, cnp, VNODE_LABEL_CREATE, ctx);
+ }
#endif
+
if ((error != 0) && (vp != (vnode_t)0)) {
/* If we've done a compound open, close */
if (batched && (old_error == 0) && (vap->va_type == VREG)) {
#include <mach/vfs_nspace.h>
#include <os/log.h>
+#include <nfs/nfs_conf.h>
+
#if ROUTEFS
#include <miscfs/routefs/routefs.h>
#endif /* ROUTEFS */
* Virtual File System System Calls
*/
-#if NFSCLIENT || DEVFS || ROUTEFS
+#if CONFIG_NFS_CLIENT || DEVFS || ROUTEFS
/*
* Private in-kernel mounting spi (NFS only, not exported)
*/
return error;
}
-#endif /* NFSCLIENT || DEVFS */
+#endif /* CONFIG_NFS_CLIENT || DEVFS */
/*
* Mount a file system.
/* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
-#if NFSCLIENT || DEVFS || ROUTEFS
+#if CONFIG_NFS_CLIENT || DEVFS || ROUTEFS
if (kernelmount) {
mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
}
if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0) {
mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT;
}
-#endif /* NFSCLIENT || DEVFS */
+#endif /* CONFIG_NFS_CLIENT || DEVFS */
update:
error = nameiat(&nd, fd1);
if (error) {
+ if (error == EPERM) {
+ printf("XXX 54841485: nameiat() src EPERM\n");
+ }
return error;
}
vp = nd.ni_vp;
if (vp->v_type == VDIR) {
if (!ISSET(vp->v_mount->mnt_kern_flag, MNTK_DIR_HARDLINKS)) {
error = EPERM; /* POSIX */
+ printf("XXX 54841485: VDIR EPERM\n");
goto out;
}
nd.ni_dirp = link;
error = nameiat(&nd, fd2);
if (error != 0) {
+ if (error == EPERM) {
+ printf("XXX 54841485: nameiat() dst EPERM\n");
+ }
goto out;
}
dvp = nd.ni_dvp;
#if CONFIG_MACF
if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0) {
+ if (error == EPERM) {
+ printf("XXX 54841485: mac_vnode_check_link() EPERM\n");
+ }
goto out2;
}
#endif
/* or to anything that kauth doesn't want us to (eg. immutable items) */
if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0) {
+ if (error == EPERM) {
+ printf("XXX 54841485: vnode_authorize() LINKTARGET EPERM\n");
+ }
goto out2;
}
/* authorize creation of the target note */
if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0) {
+ if (error == EPERM) {
+ printf("XXX 54841485: vnode_authorize() ADD_FILE EPERM\n");
+ }
goto out2;
}
/* and finally make the link */
error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
if (error) {
+ if (error == EPERM) {
+ printf("XXX 54841485: VNOP_LINK() EPERM\n");
+ }
goto out2;
}
error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
}
-#if CONFIG_MACF
+ /* do fallback attribute handling */
if (error == 0 && vp) {
- error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
+ error = vnode_setattr_fallback(vp, &va, ctx);
}
-#endif
- /* do fallback attribute handling */
+#if CONFIG_MACF
if (error == 0 && vp) {
- error = vnode_setattr_fallback(vp, &va, ctx);
+ error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
}
+#endif
if (error == 0) {
int update_flags = 0;
int fsevent;
#endif /* CONFIG_FSE */
-#if CONFIG_MACF
- (void)vnode_label(vnode_mount(tvp), tdvp, tvp, cnp,
- VNODE_LABEL_CREATE, ctx);
-#endif
/*
* If some of the requested attributes weren't handled by the
* VNOP, use our fallback code.
(void)vnode_setattr_fallback(tvp, &nva, ctx);
}
+#if CONFIG_MACF
+ (void)vnode_label(vnode_mount(tvp), tdvp, tvp, cnp,
+ VNODE_LABEL_CREATE, ctx);
+#endif
+
// Make sure the name & parent pointers are hooked up
if (tvp->v_name == NULL) {
update_flags |= VNODE_UPDATE_NAME;
snapshot_mount(int dirfd, user_addr_t name, user_addr_t directory,
__unused user_addr_t mnt_data, __unused uint32_t flags, vfs_context_t ctx)
{
+ mount_t mp;
vnode_t rvp, snapdvp, snapvp, vp, pvp;
+ struct fs_snapshot_mount_args smnt_data;
int error;
struct nameidata *snapndp, *dirndp;
/* carving out a chunk for structs that are too big to be on stack. */
vp = dirndp->ni_vp;
pvp = dirndp->ni_dvp;
+ mp = vnode_mount(rvp);
if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
error = EINVAL;
- } else {
- mount_t mp = vnode_mount(rvp);
- struct fs_snapshot_mount_args smnt_data;
+ goto out2;
+ }
- smnt_data.sm_mp = mp;
- smnt_data.sm_cnp = &snapndp->ni_cnd;
- error = mount_common(mp->mnt_vfsstat.f_fstypename, pvp, vp,
- &dirndp->ni_cnd, CAST_USER_ADDR_T(&smnt_data), flags & MNT_DONTBROWSE,
- KERNEL_MOUNT_SNAPSHOT, NULL, FALSE, ctx);
+#if CONFIG_MACF
+ error = mac_mount_check_snapshot_mount(ctx, rvp, vp, &dirndp->ni_cnd, snapndp->ni_cnd.cn_nameptr,
+ mp->mnt_vfsstat.f_fstypename);
+ if (error) {
+ goto out2;
}
+#endif
+ smnt_data.sm_mp = mp;
+ smnt_data.sm_cnp = &snapndp->ni_cnd;
+ error = mount_common(mp->mnt_vfsstat.f_fstypename, pvp, vp,
+ &dirndp->ni_cnd, CAST_USER_ADDR_T(&smnt_data), flags & MNT_DONTBROWSE,
+ KERNEL_MOUNT_SNAPSHOT, NULL, FALSE, ctx);
+
+out2:
vnode_put(vp);
vnode_put(pvp);
nameidone(dirndp);
#include <kern/assert.h>
#include <sys/kdebug.h>
+#include <nfs/nfs_conf.h>
#include <nfs/rpcv2.h>
#include <nfs/nfsproto.h>
#include <nfs/nfs.h>
* of it's pages
*/
for (offset = upl_offset; isize; isize -= PAGE_SIZE, offset += PAGE_SIZE) {
-#if NFSCLIENT
+#if CONFIG_NFS_CLIENT
if (vp->v_tag == VT_NFS) {
/* check with nfs if page is OK to drop */
error = nfs_buf_page_inval(vp, (off_t)f_offset);
} else
-#endif
+#endif /* CONFIG_NFS_CLIENT */
{
blkno = ubc_offtoblk(vp, (off_t)f_offset);
error = buf_invalblkno(vp, blkno, 0);
* Note we must not sleep here if the buffer is busy - that is
* a lock inversion which causes deadlock.
*/
-#if NFSCLIENT
+#if CONFIG_NFS_CLIENT
if (vp->v_tag == VT_NFS) {
/* check with nfs if page is OK to drop */
error = nfs_buf_page_inval(vp, (off_t)f_offset);
} else
-#endif
+#endif /* CONFIG_NFS_CLIENT */
{
blkno = ubc_offtoblk(vp, (off_t)f_offset);
error = buf_invalblkno(vp, blkno, 0);
_vfs_typenum
_vfs_unbusy
_vfs_unmountbyfsid
+_vn_authorize_rmdir
_vn_authorize_unlink
_vn_bwrite
_vn_default_error
_gIOResourcesKey
_gIOServiceKey
_gIOServicePlane
+_gIOSupportedPropertiesKey
_gIOTerminatedNotification
_gIOUserClientClassKey
_gIOWillTerminateNotification
__ZN25IODataQueueDispatchSource19DequeueWithCoalesceEPbU13block_pointerFvPKvmE
__ZN25IODataQueueDispatchSource19EnqueueWithCoalesceEjPbU13block_pointerFvPvmE
-__ZN11IOMemoryMap17_CopyState_InvokeE5IORPCP15OSMetaClassBasePFiS2_P23IOMemoryMapPrivateStateE
+__ZN11IOMemoryMap17_CopyState_InvokeE5IORPCP15OSMetaClassBasePFiS2_P24_IOMemoryMapPrivateStateE
__ZN12IOUserClient22AsyncCompletion_InvokeE5IORPCP15OSMetaClassBasePFvS2_P8OSActioniPKyjE
__ZN12IOUserClient22_ExternalMethod_InvokeE5IORPCP15OSMetaClassBasePFiS2_yPKyjP6OSDataP18IOMemoryDescriptorPyPjyPS6_S8_P8OSActionE
__ZN12IOUserClient30CopyClientMemoryForType_InvokeE5IORPCP15OSMetaClassBasePFiS2_yPyPP18IOMemoryDescriptorE
__ZN16IODispatchSource16SetEnable_InvokeE5IORPCP15OSMetaClassBasePFiS2_bE
__ZN16IODispatchSource19CheckForWork_InvokeE5IORPCP15OSMetaClassBasePFiS2_S0_bE
__ZN16IODispatchSource30SetEnableWithCompletion_InvokeE5IORPCP15OSMetaClassBasePFiS2_bU13block_pointerFvvEE
-__ZN18IOMemoryDescriptor17_CopyState_InvokeE5IORPCP15OSMetaClassBasePFiS2_P16IOMDPrivateStateE
+__ZN18IOMemoryDescriptor17_CopyState_InvokeE5IORPCP15OSMetaClassBasePFiS2_P17_IOMDPrivateStateE
__ZN18IOMemoryDescriptor20PrepareForDMA_InvokeE5IORPCP15OSMetaClassBasePFiS2_yP9IOServiceyyPyS5_PjP16IOAddressSegmentE
__ZN24IOBufferMemoryDescriptor13Create_InvokeE5IORPCPFiyyyPPS_E
__ZN24IOBufferMemoryDescriptor16SetLength_InvokeE5IORPCP15OSMetaClassBasePFiS2_yE
_mac_audit_text
_mac_iokit_check_hid_control
+_mac_mount_check_snapshot_mount
_mac_vnode_check_trigger_resolve
_sbuf_cat
-19.3.0
+19.4.0
# The first line of this file contains the master version number for the kernel.
# All other instances of the kernel version in xnu are derived from this file.
_port_name_to_task
_port_name_to_thread
_post_sys_powersource
+_proc_csflags
_proc_get_syscall_filter_mask_size
_proc_getexecutableoffset
_proc_getexecutablevnode
_proc_pidversion
_proc_set_responsible_pid
_proc_set_syscall_filter_mask
+_proc_selfcsflags
_proc_task
_proc_uniqueid
_proc_puniqueid
+_proc_gettty
+_proc_gettty_dev
_proc_exitstatus
_priv_check_cred
_pru_abort_notsupp
* IOBufferMemoryDescriptor describes a memory buffer allocated in the callers address space.
*
* @discussion
- * To allocate memory for I/O or sharing, use IOBufferMemoryDescriptor::Create()
- * Methods in this class are used for memory that was supplied as a parameter.
+ * To allocate memory for I/O or sharing, use IOBufferMemoryDescriptor::Create().
* IOBufferMemoryDescriptor can be handed to any API that expects an IOMemoryDescriptor.
*/
--- /dev/null
+/*
+ * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+ * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
+ *
+ * Common symbol definitions for IOKit.
+ *
+ * HISTORY
+ *
+ */
+
+
+#ifndef _IOKIT_IOKITKEYS_H
+#define _IOKIT_IOKITKEYS_H
+
+// properties found in the registry root
+#define kIOKitBuildVersionKey "IOKitBuildVersion"
+#define kIOKitDiagnosticsKey "IOKitDiagnostics"
+// a dictionary keyed by plane name
+#define kIORegistryPlanesKey "IORegistryPlanes"
+#define kIOCatalogueKey "IOCatalogue"
+
+// registry plane names
+#define kIOServicePlane "IOService"
+#define kIOPowerPlane "IOPower"
+#define kIODeviceTreePlane "IODeviceTree"
+#define kIOAudioPlane "IOAudio"
+#define kIOFireWirePlane "IOFireWire"
+#define kIOUSBPlane "IOUSB"
+
+// registry ID number
+#define kIORegistryEntryIDKey "IORegistryEntryID"
+// property name to get array of property names
+#define kIORegistryEntryPropertyKeysKey "IORegistryEntryPropertyKeys"
+
+// IOService class name
+#define kIOServiceClass "IOService"
+
+// IOResources class name
+#define kIOResourcesClass "IOResources"
+
+// IOService driver probing property names
+#define kIOClassKey "IOClass"
+#define kIOProbeScoreKey "IOProbeScore"
+#define kIOKitDebugKey "IOKitDebug"
+
+// Properties to be supported as API
+#define kIOSupportedPropertiesKey "IOSupportedProperties"
+// Properties writable by dexts
+#define kIOUserServicePropertiesKey "IOUserServiceProperties"
+
+
+// IOService matching property names
+#define kIOProviderClassKey "IOProviderClass"
+#define kIONameMatchKey "IONameMatch"
+#define kIOPropertyMatchKey "IOPropertyMatch"
+#define kIOPropertyExistsMatchKey "IOPropertyExistsMatch"
+#define kIOPathMatchKey "IOPathMatch"
+#define kIOLocationMatchKey "IOLocationMatch"
+#define kIOParentMatchKey "IOParentMatch"
+#define kIOResourceMatchKey "IOResourceMatch"
+#define kIOResourceMatchedKey "IOResourceMatched"
+#define kIOMatchedServiceCountKey "IOMatchedServiceCountMatch"
+
+#define kIONameMatchedKey "IONameMatched"
+
+#define kIOMatchCategoryKey "IOMatchCategory"
+#define kIODefaultMatchCategoryKey "IODefaultMatchCategory"
+
+#define kIOMatchedPersonalityKey "IOMatchedPersonality"
+#define kIORematchPersonalityKey "IORematchPersonality"
+#define kIORematchCountKey "IORematchCount"
+#define kIODEXTMatchCountKey "IODEXTMatchCount"
+
+// Entitlements to check against dext process
+// Property is an array, one or more of which may match, of:
+// an array of entitlement strings, all must be present
+// Any array can be a single string.
+#define kIOServiceDEXTEntitlementsKey "IOServiceDEXTEntitlements"
+
+// Entitlement required to open dext connection
+#define kIODriverKitEntitlementKey "com.apple.developer.driverkit"
+
+// Entitlements required to open dext IOUserClient
+// Property is an array of strings containing CFBundleIdentifiers of service being opened
+#define kIODriverKitUserClientEntitlementsKey "com.apple.developer.driverkit.userclient-access"
+
+// Other DriverKit entitlements
+#define kIODriverKitUSBTransportEntitlementKey "com.apple.developer.driverkit.transport.usb"
+#define kIODriverKitHIDTransportEntitlementKey "com.apple.developer.driverkit.transport.hid"
+#define kIODriverKitHIDFamilyDeviceEntitlementKey "com.apple.developer.driverkit.family.hid.device"
+#define kIODriverKitHIDFamilyEventServiceEntitlementKey "com.apple.developer.driverkit.family.hid.eventservice"
+#define kIODriverKitTransportBuiltinEntitlementKey "com.apple.developer.driverkit.builtin"
+
+// Entitlement required to read nvram root-only properties as non-root user
+#define kIONVRAMReadAccessKey "com.apple.private.iokit.nvram-read-access"
+// Entitlement required to write nvram properties as non-root user
+#define kIONVRAMWriteAccessKey "com.apple.private.iokit.nvram-write-access"
+
+// When possible, defer matching of this driver until kextd has started.
+#define kIOMatchDeferKey "IOMatchDefer"
+
+// Published after processor_start() has been called on all CPUs at boot time.
+#define kIOAllCPUInitializedKey "IOAllCPUInitialized"
+
+// IOService default user client class, for loadable user clients
+#define kIOUserClientClassKey "IOUserClientClass"
+
+// key to find IOMappers
+#define kIOMapperIDKey "IOMapperID"
+
+#define kIOUserClientCrossEndianKey "IOUserClientCrossEndian"
+#define kIOUserClientCrossEndianCompatibleKey "IOUserClientCrossEndianCompatible"
+#define kIOUserClientSharedInstanceKey "IOUserClientSharedInstance"
+#if KERNEL_PRIVATE
+#define kIOUserClientMessageAppSuspendedKey "IOUserClientMessageAppSuspended"
+#endif
+// diagnostic string describing the creating task
+#define kIOUserClientCreatorKey "IOUserClientCreator"
+// the expected cdhash value of the userspace driver executable
+#define kIOUserServerCDHashKey "IOUserServerCDHash"
+
+#define kIOUserUserClientKey "IOUserUserClient"
+
+
+// IOService notification types
+#define kIOPublishNotification "IOServicePublish"
+#define kIOFirstPublishNotification "IOServiceFirstPublish"
+#define kIOMatchedNotification "IOServiceMatched"
+#define kIOFirstMatchNotification "IOServiceFirstMatch"
+#define kIOTerminatedNotification "IOServiceTerminate"
+#define kIOWillTerminateNotification "IOServiceWillTerminate"
+
+// IOService interest notification types
+#define kIOGeneralInterest "IOGeneralInterest"
+#define kIOBusyInterest "IOBusyInterest"
+#define kIOAppPowerStateInterest "IOAppPowerStateInterest"
+#define kIOPriorityPowerStateInterest "IOPriorityPowerStateInterest"
+
+#define kIOPlatformDeviceMessageKey "IOPlatformDeviceMessage"
+
+// IOService interest notification types
+#define kIOCFPlugInTypesKey "IOCFPlugInTypes"
+
+#define kIOCompatibilityMatchKey "IOCompatibilityMatch"
+#define kIOCompatibilityPropertiesKey "IOCompatibilityProperties"
+#define kIOPathKey "IOPath"
+
+
+// properties found in services that implement command pooling
+#define kIOCommandPoolSizeKey "IOCommandPoolSize" // (OSNumber)
+
+// properties found in services that implement priority
+#define kIOMaximumPriorityCountKey "IOMaximumPriorityCount" // (OSNumber)
+
+// properties found in services that have transfer constraints
+#define kIOMaximumBlockCountReadKey "IOMaximumBlockCountRead" // (OSNumber)
+#define kIOMaximumBlockCountWriteKey "IOMaximumBlockCountWrite" // (OSNumber)
+#define kIOMaximumByteCountReadKey "IOMaximumByteCountRead" // (OSNumber)
+#define kIOMaximumByteCountWriteKey "IOMaximumByteCountWrite" // (OSNumber)
+#define kIOMaximumSegmentCountReadKey "IOMaximumSegmentCountRead" // (OSNumber)
+#define kIOMaximumSegmentCountWriteKey "IOMaximumSegmentCountWrite" // (OSNumber)
+#define kIOMaximumSegmentByteCountReadKey "IOMaximumSegmentByteCountRead" // (OSNumber)
+#define kIOMaximumSegmentByteCountWriteKey "IOMaximumSegmentByteCountWrite" // (OSNumber)
+#define kIOMinimumSegmentAlignmentByteCountKey "IOMinimumSegmentAlignmentByteCount" // (OSNumber)
+#define kIOMaximumSegmentAddressableBitCountKey "IOMaximumSegmentAddressableBitCount" // (OSNumber)
+#define kIOMinimumSaturationByteCountKey "IOMinimumSaturationByteCount" // (OSNumber)
+#define kIOMaximumSwapWriteKey "IOMaximumSwapWrite" // (OSNumber)
+
+// properties found in services that wish to describe an icon
+//
+// IOIcon =
+// {
+// CFBundleIdentifier = "com.example.driver.example";
+// IOBundleResourceFile = "example.icns";
+// };
+//
+// where IOBundleResourceFile is the filename of the resource
+
+#define kIOIconKey "IOIcon" // (OSDictionary)
+#define kIOBundleResourceFileKey "IOBundleResourceFile" // (OSString)
+
+#define kIOBusBadgeKey "IOBusBadge" // (OSDictionary)
+#define kIODeviceIconKey "IODeviceIcon" // (OSDictionary)
+
+// property of root that describes the machine's serial number as a string
+#define kIOPlatformSerialNumberKey "IOPlatformSerialNumber" // (OSString)
+
+// property of root that describes the machine's UUID as a string
+#define kIOPlatformUUIDKey "IOPlatformUUID" // (OSString)
+
+// IODTNVRAM property keys
+#define kIONVRAMBootArgsKey "boot-args"
+#define kIONVRAMDeletePropertyKey "IONVRAM-DELETE-PROPERTY"
+#define kIONVRAMSyncNowPropertyKey "IONVRAM-SYNCNOW-PROPERTY"
+#define kIONVRAMActivateCSRConfigPropertyKey "IONVRAM-ARMCSR-PROPERTY"
+#define kIODTNVRAMPanicInfoKey "aapl,panic-info"
+
+// keys for complex boot information
+#define kIOBootDeviceKey "IOBootDevice" // dict | array of dicts
+#define kIOBootDevicePathKey "IOBootDevicePath" // arch-neutral OSString
+#define kIOBootDeviceSizeKey "IOBootDeviceSize" // OSNumber of bytes
+
+// keys for OS Version information
+#define kOSBuildVersionKey "OS Build Version"
+
+#endif /* ! _IOKIT_IOKITKEYS_H */
uint64_t length;
};
-struct IOMDPrivateState {
+struct _IOMDPrivateState {
uint64_t length;
uint64_t options;
};
{
virtual kern_return_t
_CopyState(
- IOMDPrivateState * state);
+ _IOMDPrivateState * state);
};
#include <DriverKit/OSObject.iig>
-struct IOMemoryMapPrivateState {
+struct _IOMemoryMapPrivateState {
uint64_t length;
uint64_t offset;
uint64_t options;
{
virtual kern_return_t
_CopyState(
- IOMemoryMapPrivateState * state);
+ _IOMemoryMapPrivateState * state);
};
#endif /* ! _IOKIT_UIOMEMORYMAP_H */
#else /* PLATFORM_DriverKit */
-#ifndef _MACH_ERROR_H_
-#define _MACH_ERROR_H_
+#ifdef DRIVERKIT_PRIVATE
+
+#include <mach/error.h>
+
+#else /* DRIVERKIT_PRIVATE */
typedef int kern_return_t;
#define sub_emask (err_sub(0xfff))
#define code_emask (0x3fff)
-#endif /* _MACH_ERROR_H_ */
+#endif /* DRIVERKIT_PRIVATE */
#endif /* PLATFORM_DriverKit */
#define sub_iokit_smc err_sub(32)
#endif
#define sub_iokit_apfs err_sub(33)
+#define sub_iokit_acpiec err_sub(34)
+#define sub_iokit_timesync_avb err_sub(35)
+
#define sub_iokit_platform err_sub(0x2A)
#define sub_iokit_audio_video err_sub(0x45)
#define sub_iokit_cec err_sub(0x46)
class IOMemoryDescriptor;
class IOBufferMemoryDescriptor;
class IOUserClient;
+class OSAction;
typedef char IOServiceName[128];
typedef char IOPropertyName[128];
Create(
IOService * provider,
const IOPropertyName propertiesKey,
- IOService ** result);
+ IOService ** result) LOCAL;
+
+ /*!
+ * @brief Start an IOService termination.
+ * @discussion An IOService object created with Create() may be removed by calling Terminate().
+ * The termination is asynchronous and will later call Stop() on the service.
+ * @param options No options are currently defined, pass zero.
+ * @return kIOReturnSuccess on success. See IOReturn.h for error codes.
+ */
+ virtual kern_return_t
+ Terminate(
+ uint64_t options);
+
+ /*!
+ * @brief Obtain supportable properties describing the provider chain.
+ * @discussion Obtain supportable properties describing the provider chain. This will be a subset of registry
+ * properties the OS considers supportable.
+ * The array is ordered with a dictionary of properties for each entry in the provider chain from this
+ * service towards the root.
+ * @param propertyKeys If only certain property values are need, they may be passed in this array.
+ * @param properties Returned, retained array of dictionaries of properties or NULL. The caller should release
+ * this array.
+ * @return kIOReturnSuccess on success. See IOReturn.h for error codes.
+ */
+ virtual kern_return_t
+ CopyProviderProperties(
+ OSArray * propertyKeys,
+ OSArray ** properties);
+
+
+ /*! @function IOCreatePropertyMatchingDictionary
+ * @abstract Construct a matching dictionary for property matching.
+ */
+ static OSDictionary *
+ CreatePropertyMatchingDictionary(const char * key, OSObjectPtr value, OSDictionary * matching) LOCALONLY;
+
+ /*! @function IOCreatePropertyMatchingDictionary
+ * @abstract Construct a matching dictionary for property matching.
+ */
+ static OSDictionary *
+ CreatePropertyMatchingDictionary(const char * key, const char * stringValue, OSDictionary * matching) LOCALONLY;
+
+ /*! @function IOCreateKernelClassMatchingDictionary
+ * @abstract Construct a matching dictionary for kernel class matching.
+ */
+ static OSDictionary *
+ CreateKernelClassMatchingDictionary(OSString * className, OSDictionary * matching) LOCALONLY;
+
+ /*! @function IOCreateKernelClassMatchingDictionary
+ * @abstract Construct a matching dictionary for kernel class matching.
+ */
+ static OSDictionary *
+ CreateKernelClassMatchingDictionary(const char * className, OSDictionary * matching) LOCALONLY;
+
+ /*! @function IOCreateUserClassMatchingDictionary
+ * @abstract Construct a matching dictionary for user class matching.
+ */
+ static OSDictionary *
+ CreateUserClassMatchingDictionary(OSString * className, OSDictionary * matching) LOCALONLY;
+
+ /*! @function IOCreateUserClassMatchingDictionary
+ * @abstract Construct a matching dictionary for user class matching.
+ */
+ static OSDictionary *
+ CreateUserClassMatchingDictionary(const char * className, OSDictionary * matching) LOCALONLY;
+
+ /*! @function IOCreateNameMatchingDictionary
+ * @abstract Construct a matching dictionary for IOService name matching.
+ */
+ static OSDictionary *
+ CreateNameMatchingDictionary(OSString * serviceName, OSDictionary * matching) LOCALONLY;
+
+ /*! @function IOCreateNameMatchingDictionary
+ * @abstract Construct a matching dictionary for IOService name matching.
+ */
+ static OSDictionary *
+ CreateNameMatchingDictionary(const char * serviceName, OSDictionary * matching) LOCALONLY;
};
#endif /* ! _IOKIT_UIOSERVICE_H */
--- /dev/null
+/*
+ * Copyright (c) 2019-2019 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _IOKIT_UIOSERVICEDISPATCHSOURCE_H
+#define _IOKIT_UIOSERVICEDISPATCHSOURCE_H
+
+#include <DriverKit/IODispatchQueue.iig>
+#include <DriverKit/OSAction.iig>
+#include <DriverKit/IOService.iig>
+
+
+typedef void (^IOServiceNotificationBlock)(uint64_t type, IOService * service, uint64_t options);
+
+enum {
+ kIOServiceNotificationTypeTerminated = 0x00000000,
+ kIOServiceNotificationTypeMatched = 0x00000001,
+ kIOServiceNotificationTypeLast = kIOServiceNotificationTypeMatched,
+ kIOServiceNotificationTypeNone = 0xFFFFFFFF,
+};
+
+class NATIVE KERNEL IOServiceNotificationDispatchSource : public IODispatchSource
+{
+public:
+
+ /*!
+ * @brief Create an IOServiceNotificationDispatchSource for IOService matching and termination events.
+ * @param matching An IOService matching dictionary.
+ * @param options None defined, pass zero.
+ * @param queue IODispatchQueue the source is attached to. Note that the ServiceNotificationReady
+ * handler is invoked on the queue set for the target method
+ * of the OSAction, not this queue.
+ * @param source Created source with +1 retain count to be released by the caller.
+ * @return kIOReturnSuccess on success. See IOReturn.h for error codes.
+ */
+ static kern_return_t
+ Create(
+ OSDictionary * matching,
+ uint64_t options,
+ IODispatchQueue * queue,
+ IOServiceNotificationDispatchSource ** notification) LOCAL;
+
+ virtual bool
+ init() override;
+
+ virtual void
+ free() override;
+
+ /*!
+ * @brief Control the enable state of the notification.
+ * @param enable Pass true to enable the source or false to disable.
+ * @param handler Optional block to be executed after the interrupt has been disabled and any pending
+ * interrupt handlers completed.
+ * @return kIOReturnSuccess on success. See IOReturn.h for error codes.
+ */
+ virtual kern_return_t
+ SetEnableWithCompletion(
+ bool enable,
+ IODispatchSourceCancelHandler handler) override LOCAL;
+
+ /*!
+ * @brief Cancel all callbacks from the event source.
+ * @discussion After cancellation, the source can only be freed. It cannot be reactivated.
+ * @param handler Handler block to be invoked after any callbacks have completed.
+ * @return kIOReturnSuccess on success. See IOReturn.h for error codes.
+ */
+ virtual kern_return_t
+ Cancel(IODispatchSourceCancelHandler handler) override LOCAL;
+
+ /*!
+ * @brief Set the handler block to run when the notification has become ready.
+ * @param action OSAction instance specifying the callback method. The OSAction object will be retained
+ * until SetHandler is called again or the event source is cancelled.
+ * The ServiceNotificationReady handler is invoked on the queue set for the target method of the
+ * OSAction.
+ * @return kIOReturnSuccess on success. See IOReturn.h for error codes.
+ */
+ virtual kern_return_t
+ SetHandler(
+ OSAction * action TYPE(ServiceNotificationReady));
+
+ /*!
+ * @brief Invoke a block for each notification available in response to ServiceNotificationReady.
+ * @discussion The IOService object passed to the notification is only retained for the duration of the block.
+ * It should be retained by the block code if used beyond the invocation.
+ * @param block to be invoked with each notification
+ * @return kIOReturnSuccess on success. See IOReturn.h for error codes.
+ */
+ kern_return_t
+ DeliverNotifications(IOServiceNotificationBlock block) LOCALONLY;
+
+private:
+ virtual kern_return_t
+ CheckForWork(bool synchronous) override LOCAL;
+
+ virtual void
+ ServiceNotificationReady(
+ OSAction * action TARGET) LOCAL = 0;
+
+ virtual kern_return_t
+ CopyNextNotification(
+ uint64_t * type,
+ IOService ** service,
+ uint64_t * options);
+};
+
+#endif /* ! _IOKIT_UIOSERVICEDISPATCHSOURCE_H */
INSTALL_MI_LIST = $(ALL_DEFS)
INSTALL_DRIVERKIT_MI_LIST = $(ALL_DEFS)
-OTHER_HEADERS = IOTypes.h IOReturn.h IORPC.h
+OTHER_HEADERS = IOTypes.h IOReturn.h IORPC.h IOKitKeys.h
EXPORT_MI_GEN_LIST = $(GENERATED_HEADERS) $(OTHER_HEADERS)
INSTALL_MI_GEN_LIST = $(GENERATED_HEADERS) $(OTHER_HEADERS)
typedef void (^OSActionCancelHandler)(void);
typedef void (^OSActionAbortedHandler)(void);
+struct OSActionWaitToken;
/*!
* @class OSAction
* The callback is specified as a method and object pair.
* State associated with the callback may be allocated and stored for the creator of the object.
* Methods to allocate an OSAction instance are generated for each method defined in a class with
- * a TYPE attribute, so there should not be any need to directly call OSAction::Create().
+ * a TYPE attribute. The generated methods are named CreateAction{name of method with type attribute}
+ * and have the following declaration:
+ *
+ * kern_return_t CreateActionNameOfMethod(size_t referenceSize, OSAction **action);
+ *
+ * referenceSize refers to the size of additional state structure available to the creator of the OSAction
+ * with GetReference. If successful, the generated method returns kIOReturnSuccess and a created OSAction
+ * through the 'action' parameter with a +1 retain count to be released by the caller. See IOReturn.h for
+ * error codes.
*/
class NATIVE KERNEL OSAction : public OSObject
{
public:
+#if DRIVERKIT_PRIVATE
/*!
* @brief Create an instance of OSAction.
* @discussion Methods to allocate an OSAction instance are generated for each method defined in a class with
uint64_t msgid,
size_t referenceSize,
OSAction ** action) LOCAL;
+#endif
virtual void
free() override;
kern_return_t
SetAbortedHandler(OSActionAbortedHandler handler) LOCALONLY;
+ /*!
+ * @brief Mark this OSAction to be waited for later with Wait().
+ * @discussion This call should be made before any possible invocation of the action.
+ * An OSAction instance only supports one waiter and WillWait() will return an error if already called.
+ * @param token Opaque value to be passed to a later call to Wait() and EndWait().
+ * @return kIOReturnSuccess on success. See IOReturn.h for error codes.
+ */
+ kern_return_t
+ WillWait(OSActionWaitToken ** token) LOCALONLY;
+
+ /*!
+ * @brief Discard the OSActionWaitToken for the action.
+ * @discussion Free any resources needed to wait for the action allocated by WillWait().
+ * There should be no outstanding invocations of the action when EndWait is called,
+ * if necessary the action should be canceled before calling EndWait().
+ * @param token Opaque value to be passed from an earlier call to WillWait().
+ * @return kIOReturnSuccess on success. kIOReturnAborted if aborted or canceled.
+ kIOReturnTimeout if the deadline was passed. See IOReturn.h for error codes.
+ */
+ kern_return_t
+ EndWait(
+ OSActionWaitToken * token) LOCALONLY;
+
+ /*!
+ * @brief Wait for the action to be invoked.
+ * @discussion The current thread is blocked until the action invocation has completed, the action canceled
+ or aborted, or the deadline passed.
+ * @param token Opaque value to be passed from an earlier call to WillWait().
+ * @param options Pass one of the kIOTimerClock* options to specify the timebase for the
+ * deadline.
+ * @param deadline Pass the time the wait should timeout, or zero for no timeout.
+ * @return kIOReturnSuccess on success. kIOReturnAborted if aborted or canceled.
+ kIOReturnTimeout if the deadline was passed. See IOReturn.h for error codes.
+ */
+ kern_return_t
+ Wait(
+ OSActionWaitToken * token,
+ uint64_t options,
+ uint64_t deadline) LOCALONLY;
+
virtual void
Aborted(void) LOCAL;
};
typedef OSObject * OSObjectPtr;
#endif
-#if __IIG && !__IIG_ATTRIBUTES_DEFINED__
+#if !__IIG_ATTRIBUTES_DEFINED__
#define __IIG_ATTRIBUTES_DEFINED__ 1
-#define KERNEL __attribute__((annotate("kernel")))
-#define NATIVE __attribute__((annotate("native")))
-#define LOCAL __attribute__((annotate("local")))
-#define LOCALONLY __attribute__((annotate("localonly")))
-#define REMOTE __attribute__((annotate("remote")))
-
-#define LOCALHOST __attribute__((annotate("localhost")))
-
-#define INVOKEREPLY __attribute__((annotate("invokereply")))
-#define REPLY __attribute__((annotate("reply")))
-
-#define PORTMAKESEND __attribute__((annotate("MACH_MSG_TYPE_MAKE_SEND")))
-#define PORTCOPYSEND __attribute__((annotate("MACH_MSG_TYPE_COPY_SEND")))
-
-#define TARGET __attribute__((annotate("target")))
-#define TYPE(p) __attribute__((annotate("type=" # p)))
-
-//#define ARRAY(maxcount) __attribute__((annotate(# maxcount), annotate("array")))
-#define EXTENDS(cls) __attribute__((annotate("extends=" # cls)))
-
-//#define INTERFACE __attribute__((annotate("interface")))
-//#define IMPLEMENTS(i) void implements(i *);
-
-#define QUEUENAME(name) __attribute__((annotate("queuename=" # name)))
-
-#define IIG_SERIALIZABLE __attribute__((annotate("serializable")))
-
-#else
+#if __IIG || __DOCUMENTATION__
+
+#define IIG_KERNEL __attribute__((annotate("kernel")))
+#define IIG_NATIVE __attribute__((annotate("native")))
+#define IIG_LOCAL __attribute__((annotate("local")))
+#define IIG_LOCALONLY __attribute__((annotate("localonly")))
+#define IIG_REMOTE __attribute__((annotate("remote")))
+#define IIG_LOCALHOST __attribute__((annotate("localhost")))
+#define IIG_INVOKEREPLY __attribute__((annotate("invokereply")))
+#define IIG_REPLY __attribute__((annotate("reply")))
+#define IIG_PORTMAKESEND __attribute__((annotate("MACH_MSG_TYPE_MAKE_SEND")))
+#define IIG_PORTCOPYSEND __attribute__((annotate("MACH_MSG_TYPE_COPY_SEND")))
+#define IIG_TARGET __attribute__((annotate("target")))
+#define IIG_TYPE(p) __attribute__((annotate("type=" # p)))
+//#define IIG_ARRAY(maxcount) __attribute__((annotate(# maxcount), annotate("array")))
+#define IIG_EXTENDS(cls) __attribute__((annotate("extends=" # cls)))
+//#define IIG_INTERFACE __attribute__((annotate("interface")))
+//#define IIG_IMPLEMENTS(i) void __implements(i *);
+#define IIG_QUEUENAME(name) __attribute__((annotate("queuename=" # name)))
+#define IIG_SERIALIZABLE __attribute__((annotate("serializable")))
+#if __IIG
+#define KERNEL IIG_KERNEL
+#endif /* __IIG */
+#define NATIVE IIG_NATIVE
+#define LOCAL IIG_LOCAL
+#define LOCALONLY IIG_LOCALONLY
+#define REMOTE IIG_REMOTE
+#define LOCALHOST IIG_LOCALHOST
+#define INVOKEREPLY IIG_INVOKEREPLY
+#define REPLY IIG_REPLY
+#define PORTMAKESEND IIG_PORTMAKESEND
+#define PORTCOPYSEND IIG_PORTCOPYSEND
+#define TARGET IIG_TARGET
+#define TYPE(p) IIG_TYPE(p)
+//#define ARRAY(maxcount) IIG_ARRAY(maxcount)
+#define EXTENDS(cls) IIG_EXTENDS(cls)
+//#define INTERFACE IIG_INTERFACE
+//#define IMPLEMENTS(i) IIG_IMPLEMENTS(i)
+#define QUEUENAME(name) IIG_QUEUENAME(name)
+
+#else /* __IIG || __DOCUMENTATION__ */
+
+#define IIG_KERNEL
+#define IIG_NATIVE
+#define IIG_LOCAL
+#define IIG_LOCALONLY
+#define IIG_REMOTE
+#define IIG_LOCALHOST
+#define IIG_INVOKEREPLY
+#define IIG_REPLY
+#define IIG_PORTMAKESEND
+#define IIG_PORTCOPYSEND
+#define IIG_TARGET
+#define IIG_TYPE(p)
+//#define IIG_ARRAY(maxcount)
+#define IIG_EXTENDS(cls)
+//#define IIG_INTERFACE
+//#define IIG_IMPLEMENTS(i)
+#define IIG_QUEUENAME(name)
#define IIG_SERIALIZABLE
-#endif /* __IIG */
+#endif /* __IIG || __DOCUMENTATION__ */
+
+#endif /* __IIG_ATTRIBUTES_DEFINED__ */
#if !__IIG
* @param unload Flag to cause the actual unloading of the module.
*/
IOReturn terminateDriversForModule( const char * moduleName, bool unload = true);
+#if XNU_KERNEL_PRIVATE
+ IOReturn terminateDrivers(OSDictionary * matching, io_name_t className);
+#endif /* XNU_KERNEL_PRIVATE */
/*!
* @function startMatching
IOReturn unloadModule( OSString * moduleName ) const;
IOReturn _removeDrivers(OSDictionary * matching);
- IOReturn _terminateDrivers(OSDictionary * matching);
};
extern const OSSymbol * gIOClassKey;
#define kIOProbeScoreKey "IOProbeScore"
#define kIOKitDebugKey "IOKitDebug"
+// Properties to be supported as API
+#define kIOSupportedPropertiesKey "IOSupportedProperties"
+// Properties writable by dexts
+#define kIOUserServicePropertiesKey "IOUserServiceProperties"
+
+
// IOService matching property names
#define kIOProviderClassKey "IOProviderClass"
#define kIONameMatchKey "IONameMatch"
io_object_t obj, ipc_kobject_type_t type );
extern mach_port_t ipc_port_make_send(mach_port_t);
+extern mach_port_t ipc_port_copy_send(mach_port_t);
extern void ipc_port_release_send(ipc_port_t port);
extern io_object_t iokit_lookup_io_object(ipc_port_t port, ipc_kobject_type_t type);
extern kern_return_t
uext_server(ipc_kmsg_t request, ipc_kmsg_t * preply);
+extern kern_return_t
+iokit_label_dext_task(task_t task);
/*
* Functions imported by iokit:IOMemoryDescriptor.cpp
void *arg0 = NULL, void *arg1 = NULL,
void *arg2 = NULL, void *arg3 = NULL);
+#ifdef __BLOCKS__
+ typedef IOReturn (^ActionBlock)(void);
+ IOReturn runPropertyActionBlock(ActionBlock block);
+#endif /* __BLOCKS__ */
+
private:
#if __LP64__
OSMetaClassDeclareReservedUnused(IORegistryEntry, 0);
#else /* PLATFORM_DriverKit */
+#ifdef DRIVERKIT_PRIVATE
+
+#include <mach/error.h>
+
+#else /* DRIVERKIT_PRIVATE */
+
typedef int kern_return_t;
#define KERN_SUCCESS 0
#define sub_emask (err_sub(0xfff))
#define code_emask (0x3fff)
+#endif /* DRIVERKIT_PRIVATE */
+
#endif /* PLATFORM_DriverKit */
typedef kern_return_t IOReturn;
extern const OSSymbol * gIOInterruptControllersKey;
extern const OSSymbol * gIOInterruptSpecifiersKey;
+extern const OSSymbol * gIOSupportedPropertiesKey;
+extern const OSSymbol * gIOUserServicePropertiesKey;
+
extern const OSSymbol * gIOBSDKey;
extern const OSSymbol * gIOBSDNameKey;
extern const OSSymbol * gIOBSDMajorKey;
kIOPMDeviceUsable = 0x00008000,
kIOPMLowPower = 0x00010000,
#if PRIVATE
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
kIOPMAOTPower = 0x00020000,
kIOPMAOTCapability = kIOPMAOTPower,
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
#endif /* PRIVATE */
kIOPMPreventIdleSleep = 0x00000040,
kIOPMSleepCapability = 0x00000004,
#define kIOPMWakeEventSource 0x00000001
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
/*****************************************************************************
*
* AOT defs
#define kIOPMAOTPowerKey "aot-power"
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
-
/*****************************************************************************
*
* System Sleep Policy
OSArray * _systemWakeEventsArray;
bool _acceptSystemWakeEvents;
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
// AOT --
IOPMCalendarStruct _aotWakeTimeCalendar;
IOTimerEventSource * _aotTimerES;
bool isAOTMode(void);
private:
// -- AOT
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
void updateTasksSuspend(void);
int findSuspendedPID(uint32_t pid, uint32_t *outRefCount);
}
IOReturn
-IOCatalogue::_terminateDrivers(OSDictionary * matching)
+IOCatalogue::terminateDrivers(OSDictionary * matching, io_name_t className)
{
OSDictionary * dict;
OSIterator * iter;
IOService * service;
IOReturn ret;
- if (!matching) {
- return kIOReturnBadArgument;
- }
-
ret = kIOReturnSuccess;
dict = NULL;
iter = IORegistryIterator::iterateOver(gIOServicePlane,
return kIOReturnNoMemory;
}
- OSKext::uniquePersonalityProperties( matching );
+ if (matching) {
+ OSKext::uniquePersonalityProperties( matching );
+ }
// terminate instances.
do {
iter->reset();
while ((service = (IOService *)iter->getNextObject())) {
- dict = service->getPropertyTable();
- if (!dict) {
+ if (className && !service->metaCast(className)) {
continue;
}
-
- /* Terminate only for personalities that match the matching dictionary.
- * This comparison must be done with only the keys in the
- * "matching" dict to enable general matching.
- */
- if (!dict->isEqualTo(matching, matching)) {
- continue;
+ if (matching) {
+ /* Terminate only for personalities that match the matching dictionary.
+ * This comparison must be done with only the keys in the
+ * "matching" dict to enable general matching.
+ */
+ dict = service->getPropertyTable();
+ if (!dict) {
+ continue;
+ }
+ if (!dict->isEqualTo(matching, matching)) {
+ continue;
+ }
}
+ OSKext * kext;
+ const char * bundleIDStr;
+ OSObject * prop;
+ bool okToTerminate;
+ for (okToTerminate = true;;) {
+ kext = service->getMetaClass()->getKext();
+ if (!kext) {
+ break;
+ }
+ bundleIDStr = kext->getIdentifierCString();
+ if (!bundleIDStr) {
+ break;
+ }
+ prop = kext->getPropertyForHostArch(kOSBundleAllowUserTerminateKey);
+ if (prop) {
+ okToTerminate = (kOSBooleanTrue == prop);
+ break;
+ }
+ if (!strcmp(kOSKextKernelIdentifier, bundleIDStr)) {
+ okToTerminate = false;
+ break;
+ }
+ if (!strncmp("com.apple.", bundleIDStr, strlen("com.apple."))) {
+ okToTerminate = false;
+ break;
+ }
+ break;
+ }
+ if (!okToTerminate) {
+#if DEVELOPMENT || DEBUG
+ okToTerminate = true;
+#endif /* DEVELOPMENT || DEBUG */
+ IOLog("%sallowing kextunload terminate for bundleID %s\n",
+ okToTerminate ? "" : "dis", bundleIDStr ? bundleIDStr : "?");
+ if (!okToTerminate) {
+ ret = kIOReturnUnsupported;
+ break;
+ }
+ }
if (!service->terminate(kIOServiceRequired | kIOServiceSynchronous)) {
ret = kIOReturnUnsupported;
break;
{
IOReturn ret;
- ret = _terminateDrivers(matching);
+ if (!matching) {
+ return kIOReturnBadArgument;
+ }
+ ret = terminateDrivers(matching, NULL);
IORWLockWrite(lock);
if (kIOReturnSuccess == ret) {
ret = _removeDrivers(matching);
dict->setObject(gIOModuleIdentifierKey, moduleName);
- ret = _terminateDrivers(dict);
+ ret = terminateDrivers(dict, NULL);
/* No goto between IOLock calls!
*/
if (!intMap && child->getProperty( gIODTInterruptParentKey)) {
intMap = true;
}
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
if (!strcmp("sep", child->getName())
|| !strcmp("aop", child->getName())
|| !strcmp("disp0", child->getName())) {
uint32_t aotFlags = 1;
child->setProperty("aot-power", &aotFlags, sizeof(aotFlags));
}
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
}
regIter->release();
}
if ((kernel_task != current_task()) && (self = proc_self())) {
bool user_64 = false;
mem->btPID = proc_pid(self);
- (void)backtrace_user(&mem->btUser[0], kIOTrackingCallSiteBTs - 1, &num,
+ num = backtrace_user(&mem->btUser[0], kIOTrackingCallSiteBTs - 1, NULL,
&user_64, NULL);
mem->user32 = !user_64;
proc_rele(self);
.outputPowerCharacter = kIOPMSleep,
.inputPowerRequirement = SLEEP_POWER },
{ .version = 1,
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
.capabilityFlags = kIOPMAOTCapability,
.outputPowerCharacter = kIOPMAOTPower,
.inputPowerRequirement = ON_POWER },
-#else /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
- .capabilityFlags = 0,
- .outputPowerCharacter = 0,
- .inputPowerRequirement = 0xFFFFFFFF },
-#endif /* (defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
{ .version = 1,
.capabilityFlags = kIOPMPowerOn,
.outputPowerCharacter = kIOPMPowerOn,
{
bool newSuspend;
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
newSuspend = (tasksSuspended || _aotTasksSuspended);
-#else /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
- newSuspend = tasksSuspended;
-#endif /* (defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
if (newSuspend == tasksSuspendState) {
return;
}
static SYSCTL_INT(_debug, OID_AUTO, swd_panic_phase, CTLFLAG_RW, &swd_panic_phase, 0, "");
#endif
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
//******************************************************************************
// AOT
NULL, 0, sysctl_aotmode, "I", "");
//******************************************************************************
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
-
static const OSSymbol * gIOPMSettingAutoWakeCalendarKey;
static const OSSymbol * gIOPMSettingAutoWakeSecondsKey;
&IOPMrootDomain::dispatchPowerEvent));
gIOPMWorkLoop->addEventSource(pmPowerStateQueue);
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
_aotMode = 0;
_aotTimerES = IOTimerEventSource::timerEventSource(this,
OSMemberFunctionCast(IOTimerEventSource::Action,
this, &IOPMrootDomain::aotEvaluate));
gIOPMWorkLoop->addEventSource(_aotTimerES);
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
// create our power parent
patriarch = new IORootParent;
sysctl_register_oid(&sysctl__kern_consoleoptions);
sysctl_register_oid(&sysctl__kern_progressoptions);
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
sysctl_register_oid(&sysctl__kern_aotmode);
sysctl_register_oid(&sysctl__kern_aotmodebits);
sysctl_register_oid(&sysctl__kern_aotmetrics);
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
#if HIBERNATION
IOHibernateSystemInit(this);
unsigned long newState;
clock_sec_t secs;
clock_usec_t microsecs;
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
clock_sec_t adjWakeTime;
IOPMCalendarStruct nowCalendar;
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
ASSERT_GATED();
newState = getPowerState();
notifierThread = current_thread();
switch (getPowerState()) {
case SLEEP_STATE: {
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
if (kPMCalendarTypeInvalid != _aotWakeTimeCalendar.selector) {
secs = 0;
microsecs = 0;
}
}
_aotPendingFlags &= ~kIOPMWakeEventAOTPerCycleFlags;
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
acceptSystemWakeEvents(true);
// re-enable this timer for next sleep
logtime(secs);
gIOLastSleepTime.tv_sec = secs;
gIOLastSleepTime.tv_usec = microsecs;
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
if (!_aotLastWakeTime) {
gIOLastUserSleepTime = gIOLastSleepTime;
}
-#else
- gIOLastUserSleepTime = gIOLastSleepTime;
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
gIOLastWakeTime.tv_sec = 0;
gIOLastWakeTime.tv_usec = 0;
gIOLastWakeTime.tv_sec = secs;
gIOLastWakeTime.tv_usec = microsecs;
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
// aot
if (_aotWakeTimeCalendar.selector != kPMCalendarTypeInvalid) {
_aotWakeTimeCalendar.selector = kPMCalendarTypeInvalid;
setWakeTime(_aotTestTime);
}
}
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
#if HIBERNATION
LOG("System %sWake\n", gIOHibernateState ? "SafeSleep " : "");
*cancel = true;
DLOG("cancel dark->sleep\n");
}
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
if (_aotMode && (kPMCalendarTypeInvalid != _aotWakeTimeCalendar.selector)) {
uint64_t now = mach_continuous_time();
if (((now + _aotWakePreWindow) >= _aotWakeTimeContinuous)
IOLog("AOT wake window cancel: %qd, %qd\n", now, _aotWakeTimeContinuous);
}
}
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
}
}
#endif /* !CONFIG_EMBEDDED */
}
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
_aotReadyToFullWake = false;
#if 0
if (_aotLingerTime) {
clock_interval_to_absolutetime_interval(2000, kMillisecondScale, &_aotWakePreWindow);
clock_interval_to_absolutetime_interval(1100, kMillisecondScale, &_aotWakePostWindow);
}
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
#if HIBERNATION
IOHibernateSystemSleep();
uint32_t changeFlags = *inOutChangeFlags;
uint32_t currentPowerState = (uint32_t) getPowerState();
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
if ((AOT_STATE == powerState) && (ON_STATE == currentPowerState)) {
// Assertion may have been taken in AOT leading to changePowerStateTo(AOT)
*inOutChangeFlags |= kIOPMNotDone;
return;
}
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
if (changeFlags & kIOPMParentInitiated) {
// Root parent is permanently pegged at max power,
_desiredCapability, _currentCapability, _pendingCapability);
}
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
if ((AOT_STATE == powerState) && (SLEEP_STATE != currentPowerState)) {
panic("illegal AOT entry from %s", getPowerStateString(currentPowerState));
}
aotShouldExit(false, true);
aotExit(false);
}
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
}
void
return secs;
}
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
-
unsigned long
IOPMrootDomain::getRUN_STATE(void)
{
}
}
-#else /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
-
-unsigned long
-IOPMrootDomain::getRUN_STATE(void)
-{
- return ON_STATE;
-}
-
-IOReturn
-IOPMrootDomain::setWakeTime(uint64_t wakeContinuousTime)
-{
- return kIOReturnUnsupported;
-}
-
-#endif /* (defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
-
//******************************************************************************
// adjustPowerState
//
ASSERT_GATED();
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
if (_aotNow) {
bool exitNow;
}
return;
}
-#endif /* (defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
if ((!idleSleepEnabled) || !checkSystemSleepEnabled()) {
changePowerStateToPriv(getRUN_STATE());
unsigned int
IOPMrootDomain::idleSleepPreventersCount()
{
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
if (_aotMode) {
unsigned int count __block;
count = 0;
});
return count;
}
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
return preventIdleSleepList->getCount();
}
}
if (changedBits & kIOPMDriverAssertionCPUBit) {
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
if (_aotNow) {
IOLog("CPU assertions %d\n", (0 != (kIOPMDriverAssertionCPUBit & newAssertions)));
}
evaluatePolicy(_aotNow ? kStimulusNoIdleSleepPreventers : kStimulusDarkWakeEvaluate);
-#else
- evaluatePolicy(kStimulusDarkWakeEvaluate);
-#endif
if (!assertOnWakeSecs && gIOLastWakeAbsTime) {
AbsoluteTime now;
clock_usec_t microsecs;
_systemWakeEventsArray = OSArray::withCapacity(4);
}
_acceptSystemWakeEvents = (_systemWakeEventsArray != NULL);
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
- if (!(_aotNow && (kIOPMWakeEventAOTExitFlags & _aotPendingFlags)))
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
- {
+ if (!(_aotNow && (kIOPMWakeEventAOTExitFlags & _aotPendingFlags))) {
gWakeReasonString[0] = '\0';
if (_systemWakeEventsArray) {
_systemWakeEventsArray->flushCollection();
return;
}
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
IOOptionBits aotFlags = 0;
bool needAOTEvaluate = FALSE;
flags |= kIOPMWakeEventAOTPossibleExit;
}
#endif /* DEVELOPMENT || DEBUG */
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
deviceName = device->copyName(gIOServicePlane);
deviceRegId = OSNumber::withNumber(device->getRegistryEntryID(), 64);
WAKEEVENT_LOCK();
addWakeReason = _acceptSystemWakeEvents;
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
if (_aotMode) {
IOLog("claimSystemWakeEvent(%s, %s, 0x%x) 0x%x %d\n", reason, deviceName->getCStringNoCopy(), (int)flags, _aotPendingFlags, _aotReadyToFullWake);
}
addWakeReason = _aotNow && _systemWakeEventsArray && ((kIOPMWakeEventAOTExitFlags & aotFlags));
needAOTEvaluate = _aotReadyToFullWake;
}
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
if (!gWakeReasonSysctlRegistered) {
// Lazy registration until the platform driver stops registering
}
WAKEEVENT_UNLOCK();
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
if (needAOTEvaluate) {
aotEvaluate(NULL);
}
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
done:
if (deviceName) {
return res;
}
+static IOReturn
+IORegistryEntryActionToBlock(OSObject *target,
+ void *arg0, void *arg1,
+ void *arg2, void *arg3)
+{
+ IORegistryEntry::ActionBlock block = (typeof(block))arg0;
+ return block();
+}
+
+IOReturn
+IORegistryEntry::runPropertyActionBlock(ActionBlock block)
+{
+ IOReturn res;
+
+ res = runPropertyAction(&IORegistryEntryActionToBlock, this, block);
+
+ return res;
+}
+
OSObject *
IORegistryEntry::getProperty( const OSString * aKey) const
{
const OSSymbol * gIORematchPersonalityKey;
const OSSymbol * gIORematchCountKey;
const OSSymbol * gIODEXTMatchCountKey;
+const OSSymbol * gIOSupportedPropertiesKey;
+const OSSymbol * gIOUserServicePropertiesKey;
#if !CONFIG_EMBEDDED
const OSSymbol * gIOServiceLegacyMatchingRegistryIDKey;
#endif
gIOInterruptSpecifiersKey
= OSSymbol::withCStringNoCopy("IOInterruptSpecifiers");
+ gIOSupportedPropertiesKey = OSSymbol::withCStringNoCopy(kIOSupportedPropertiesKey);
+ gIOUserServicePropertiesKey = OSSymbol::withCStringNoCopy(kIOUserServicePropertiesKey);
+
gIOMapperIDKey = OSSymbol::withCStringNoCopy(kIOMapperIDKey);
gIOKitDebugKey = OSSymbol::withCStringNoCopy( kIOKitDebugKey );
lowestPowerState = fPowerStates[0].stateOrderToIndex;
fHighestPowerState = fPowerStates[numberOfStates - 1].stateOrderToIndex;
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
{
uint32_t aotFlags;
IOService * service;
}
}
}
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
// OR'in all the output power flags
fMergedOutputPowerFlags = 0;
myChangeFlags = kIOPMParentInitiated | kIOPMDomainDidChange |
(parentChangeFlags & kIOPMRootBroadcastFlags);
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
if (kIOPMAOTPower & fPowerStates[maxPowerState].inputPowerFlags) {
IOLog("aotPS %s0x%qx[%ld]\n", getName(), getRegistryEntryID(), maxPowerState);
}
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
result = startPowerChange(
/* flags */ myChangeFlags,
newPowerState = fHighestPowerState;
}
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
if (getPMRootDomain()->isAOTMode()) {
if ((kIOPMPreventIdleSleep & fPowerStates[newPowerState].capabilityFlags)
&& !(kIOPMPreventIdleSleep & fPowerStates[fDesiredPowerState].capabilityFlags)) {
getPMRootDomain()->claimSystemWakeEvent(this, kIOPMWakeEventAOTExit, getName(), NULL);
}
}
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
fDesiredPowerState = newPowerState;
proc_suspended = get_task_pidsuspended((task_t) proc->task);
if (proc_suspended) {
logClientIDForNotification(object, context, "PMTellAppWithResponse - Suspended");
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
} else if (getPMRootDomain()->isAOTMode() && get_task_suspended((task_t) proc->task)) {
proc_suspended = true;
context->skippedInDark++;
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
}
proc_rele(proc);
if (proc_suspended) {
}
if (context->us == getPMRootDomain() &&
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
getPMRootDomain()->isAOTMode()
-#else /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
- false
-#endif /* (defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
) {
OSNumber *clientID = NULL;
boolean_t proc_suspended = FALSE;
proc_suspended = get_task_pidsuspended((task_t) proc->task);
if (proc_suspended) {
logClientIDForNotification(object, context, "tellAppClientApplier - Suspended");
-#if !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146))
} else if (IOService::getPMRootDomain()->isAOTMode() && get_task_suspended((task_t) proc->task)) {
proc_suspended = true;
context->skippedInDark++;
-#endif /* !(defined(RC_HIDE_N144) || defined(RC_HIDE_N146)) */
}
proc_rele(proc);
if (proc_suspended) {
// functions called from osfmk/device/iokit_rpc.c
void
-iokit_add_reference( io_object_t obj, ipc_kobject_type_t type )
+iokit_port_object_description(io_object_t obj, kobject_description_t desc)
+{
+ IORegistryEntry * regEntry;
+ IOUserNotification * __unused noti;
+ _IOServiceNotifier * __unused serviceNoti;
+ OSSerialize * __unused s;
+
+ if ((regEntry = OSDynamicCast(IORegistryEntry, obj))) {
+ snprintf(desc, KOBJECT_DESCRIPTION_LENGTH, "%s(0x%qx)", obj->getMetaClass()->getClassName(), regEntry->getRegistryEntryID());
+#if DEVELOPMENT || DEBUG
+ } else if ((noti = OSDynamicCast(IOUserNotification, obj))
+ && ((serviceNoti = OSDynamicCast(_IOServiceNotifier, noti->holdNotify)))) {
+ s = OSSerialize::withCapacity(page_size);
+ if (s && serviceNoti->matching->serialize(s)) {
+ snprintf(desc, KOBJECT_DESCRIPTION_LENGTH, "%s(%s)", obj->getMetaClass()->getClassName(), s->text());
+ }
+ OSSafeReleaseNULL(s);
+#endif /* DEVELOPMENT || DEBUG */
+ } else {
+ snprintf(desc, KOBJECT_DESCRIPTION_LENGTH, "%s", obj->getMetaClass()->getClassName());
+ }
+}
+
+void
+iokit_add_reference( io_object_t obj, natural_t type )
{
IOUserClient * uc;
switch (flag) {
#if !defined(SECURE_KERNEL)
case kIOCatalogServiceTerminate:
- OSIterator * iter;
- IOService * service;
-
- iter = IORegistryIterator::iterateOver(gIOServicePlane,
- kIORegistryIterateRecursively);
- if (!iter) {
- return kIOReturnNoMemory;
- }
-
- do {
- iter->reset();
- while ((service = (IOService *)iter->getNextObject())) {
- if (service->metaCast(name)) {
- if (!service->terminate( kIOServiceRequired
- | kIOServiceSynchronous)) {
- kr = kIOReturnUnsupported;
- break;
- }
- }
- }
- } while (!service && !iter->isValid());
- iter->release();
+ kr = gIOCatalogue->terminateDrivers(NULL, name);
break;
case kIOCatalogModuleUnload:
#include <DriverKit/IOBufferMemoryDescriptor.h>
#include <DriverKit/IOMemoryMap.h>
#include <DriverKit/IODataQueueDispatchSource.h>
+#include <DriverKit/IOServiceNotificationDispatchSource.h>
#include <DriverKit/IOUserServer.h>
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
virtual bool
start(IOService * provider) APPLE_KEXT_OVERRIDE;
- virtual IOReturn
- setProperties(OSObject * props) APPLE_KEXT_OVERRIDE;
};
OSDefineMetaClassAndStructors(IOUserService, IOService)
return ok;
}
-IOReturn
-IOUserService::setProperties(OSObject * properties)
-{
- setProperty("USER", properties);
- return kIOReturnSuccess;
-}
-
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
#undef super
kern_return_t
IMPL(IOService, SetProperties)
{
- IOReturn ret = kIOReturnUnsupported;
+ IOUserServer * us;
+ OSDictionary * dict;
+ IOReturn ret;
ret = setProperties(properties);
+ if (kIOReturnUnsupported == ret) {
+ dict = OSDynamicCast(OSDictionary, properties);
+ us = (typeof(us))thread_iokit_tls_get(0);
+ if (dict && reserved->uvars && (reserved->uvars->userServer == us)) {
+ ret = runPropertyActionBlock(^IOReturn (void) {
+ OSDictionary * userProps;
+ IOReturn ret;
+
+ userProps = OSDynamicCast(OSDictionary, getProperty(gIOUserServicePropertiesKey));
+ if (userProps) {
+ userProps = (typeof(userProps))userProps->copyCollection();
+ } else {
+ userProps = OSDictionary::withCapacity(4);
+ }
+ if (!userProps) {
+ ret = kIOReturnNoMemory;
+ } else {
+ bool ok = userProps->merge(dict);
+ if (ok) {
+ ok = setProperty(gIOUserServicePropertiesKey, userProps);
+ }
+ OSSafeReleaseNULL(userProps);
+ ret = ok ? kIOReturnSuccess : kIOReturnNotWritable;
+ }
+ return ret;
+ });
+ }
+ }
+
return ret;
}
super::free();
}
+kern_return_t
+IMPL(IODispatchSource, SetEnable)
+{
+ return SetEnableWithCompletion(enable, NULL);
+}
+
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
struct IOInterruptDispatchSource_IVars {
assert(kIOReturnSuccess == ret);
}
+ if (ivars && ivars->lock) {
+ IOSimpleLockFree(ivars->lock);
+ }
+
IOSafeDeleteNULL(ivars, IOInterruptDispatchSource_IVars, 1);
super::free();
}
kern_return_t
-IMPL(IODispatchSource, SetEnable)
+IMPL(IOInterruptDispatchSource, Cancel)
{
- return SetEnableWithCompletion(enable, NULL);
+ return kIOReturnUnsupported;
}
kern_return_t
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+enum {
+ kIOServiceNotificationTypeCount = kIOServiceNotificationTypeLast + 1,
+};
+
+struct IOServiceNotificationDispatchSource_IVars {
+ OSObject * serverName;
+ OSAction * action;
+ IOLock * lock;
+ IONotifier * notifier;
+ OSDictionary * interestNotifiers;
+ OSArray * pending[kIOServiceNotificationTypeCount];
+ bool enable;
+};
+
+kern_return_t
+IMPL(IOServiceNotificationDispatchSource, Create)
+{
+ IOUserServer * us;
+ IOReturn ret;
+ IOServiceNotificationDispatchSource * inst;
+
+ inst = OSTypeAlloc(IOServiceNotificationDispatchSource);
+ if (!inst->init()) {
+ OSSafeReleaseNULL(inst);
+ return kIOReturnNoMemory;
+ }
+
+ us = (typeof(us))thread_iokit_tls_get(0);
+ assert(OSDynamicCast(IOUserServer, us));
+ if (!us) {
+ OSSafeReleaseNULL(inst);
+ return kIOReturnError;
+ }
+ inst->ivars->serverName = us->copyProperty(gIOUserServerNameKey);
+ if (!inst->ivars->serverName) {
+ OSSafeReleaseNULL(inst);
+ return kIOReturnNoMemory;
+ }
+
+ inst->ivars->lock = IOLockAlloc();
+ if (!inst->ivars->lock) {
+ OSSafeReleaseNULL(inst);
+ return kIOReturnNoMemory;
+ }
+ for (uint32_t idx = 0; idx < kIOServiceNotificationTypeCount; idx++) {
+ inst->ivars->pending[idx] = OSArray::withCapacity(4);
+ if (!inst->ivars->pending[idx]) {
+ OSSafeReleaseNULL(inst);
+ return kIOReturnNoMemory;
+ }
+ }
+ inst->ivars->interestNotifiers = OSDictionary::withCapacity(4);
+ if (!inst->ivars->interestNotifiers) {
+ OSSafeReleaseNULL(inst);
+ return kIOReturnNoMemory;
+ }
+
+ inst->ivars->notifier = IOService::addMatchingNotification(gIOMatchedNotification, matching, 0 /*priority*/,
+ ^bool (IOService * newService, IONotifier * notifier) {
+ bool notifyReady = false;
+ IONotifier * interest;
+ OSObject * serverName;
+ bool okToUse;
+
+ serverName = newService->copyProperty(gIOUserServerNameKey);
+ okToUse = (serverName && inst->ivars->serverName->isEqualTo(serverName));
+ OSSafeReleaseNULL(serverName);
+ if (!okToUse) {
+ return false;
+ }
+
+ IOLockLock(inst->ivars->lock);
+ notifyReady = (0 == inst->ivars->pending[kIOServiceNotificationTypeMatched]->getCount());
+ inst->ivars->pending[kIOServiceNotificationTypeMatched]->setObject(newService);
+ IOLockUnlock(inst->ivars->lock);
+
+ interest = newService->registerInterest(gIOGeneralInterest,
+ ^IOReturn (uint32_t messageType, IOService * provider,
+ void * messageArgument, size_t argSize) {
+ IONotifier * interest;
+ bool notifyReady = false;
+
+ switch (messageType) {
+ case kIOMessageServiceIsTerminated:
+ IOLockLock(inst->ivars->lock);
+ notifyReady = (0 == inst->ivars->pending[kIOServiceNotificationTypeTerminated]->getCount());
+ inst->ivars->pending[kIOServiceNotificationTypeTerminated]->setObject(provider);
+ interest = (typeof(interest))inst->ivars->interestNotifiers->getObject((const OSSymbol *) newService);
+ assert(interest);
+ interest->remove();
+ inst->ivars->interestNotifiers->removeObject((const OSSymbol *) newService);
+ IOLockUnlock(inst->ivars->lock);
+ break;
+ default:
+ break;
+ }
+ if (notifyReady && inst->ivars->action) {
+ inst->ServiceNotificationReady(inst->ivars->action);
+ }
+ return kIOReturnSuccess;
+ });
+ if (interest) {
+ IOLockLock(inst->ivars->lock);
+ inst->ivars->interestNotifiers->setObject((const OSSymbol *) newService, interest);
+ IOLockUnlock(inst->ivars->lock);
+ }
+ if (notifyReady) {
+ if (inst->ivars->action) {
+ inst->ServiceNotificationReady(inst->ivars->action);
+ }
+ }
+ return false;
+ });
+
+ if (!inst->ivars->notifier) {
+ OSSafeReleaseNULL(inst);
+ ret = kIOReturnError;
+ }
+
+ *notification = inst;
+ ret = kIOReturnSuccess;
+
+ return ret;
+}
+
+kern_return_t
+IMPL(IOServiceNotificationDispatchSource, CopyNextNotification)
+{
+ IOService * next;
+ uint32_t idx;
+
+ IOLockLock(ivars->lock);
+ for (idx = 0; idx < kIOServiceNotificationTypeCount; idx++) {
+ next = (IOService *) ivars->pending[idx]->getObject(0);
+ if (next) {
+ next->retain();
+ ivars->pending[idx]->removeObject(0);
+ break;
+ }
+ }
+ IOLockUnlock(ivars->lock);
+
+ if (idx == kIOServiceNotificationTypeCount) {
+ idx = kIOServiceNotificationTypeNone;
+ }
+ *type = idx;
+ *service = next;
+ *options = 0;
+
+ return kIOReturnSuccess;
+}
+
+bool
+IOServiceNotificationDispatchSource::init()
+{
+ if (!super::init()) {
+ return false;
+ }
+ ivars = IONewZero(IOServiceNotificationDispatchSource_IVars, 1);
+ if (!ivars) {
+ return false;
+ }
+
+ return true;
+}
+
+void
+IOServiceNotificationDispatchSource::free()
+{
+ if (ivars) {
+ OSSafeReleaseNULL(ivars->serverName);
+ if (ivars->interestNotifiers) {
+ ivars->interestNotifiers->iterateObjects(^bool (const OSSymbol * key, OSObject * object) {
+ IONotifier * interest = (typeof(interest))object;
+ interest->remove();
+ return false;
+ });
+ OSSafeReleaseNULL(ivars->interestNotifiers);
+ }
+ for (uint32_t idx = 0; idx < kIOServiceNotificationTypeCount; idx++) {
+ OSSafeReleaseNULL(ivars->pending[idx]);
+ }
+ if (ivars->lock) {
+ IOLockFree(ivars->lock);
+ ivars->lock = NULL;
+ }
+ if (ivars->notifier) {
+ ivars->notifier->remove();
+ ivars->notifier = NULL;
+ }
+ IOSafeDeleteNULL(ivars, IOServiceNotificationDispatchSource_IVars, 1);
+ }
+
+ super::free();
+}
+
+kern_return_t
+IMPL(IOServiceNotificationDispatchSource, SetHandler)
+{
+ IOReturn ret;
+ bool notifyReady;
+
+ notifyReady = false;
+
+ IOLockLock(ivars->lock);
+ OSSafeReleaseNULL(ivars->action);
+ action->retain();
+ ivars->action = action;
+ if (action) {
+ for (uint32_t idx = 0; idx < kIOServiceNotificationTypeCount; idx++) {
+ notifyReady = (ivars->pending[idx]->getCount());
+ if (notifyReady) {
+ break;
+ }
+ }
+ }
+ IOLockUnlock(ivars->lock);
+
+ if (notifyReady) {
+ ServiceNotificationReady(action);
+ }
+ ret = kIOReturnSuccess;
+
+ return ret;
+}
+
+kern_return_t
+IMPL(IOServiceNotificationDispatchSource, SetEnableWithCompletion)
+{
+ if (enable == ivars->enable) {
+ return kIOReturnSuccess;
+ }
+
+ IOLockLock(ivars->lock);
+ ivars->enable = enable;
+ IOLockUnlock(ivars->lock);
+
+ return kIOReturnSuccess;
+}
+
+kern_return_t
+IMPL(IOServiceNotificationDispatchSource, Cancel)
+{
+ return kIOReturnUnsupported;
+}
+
+kern_return_t
+IMPL(IOServiceNotificationDispatchSource, CheckForWork)
+{
+ return kIOReturnNotReady;
+}
+
+kern_return_t
+IOServiceNotificationDispatchSource::DeliverNotifications(IOServiceNotificationBlock block)
+{
+ return kIOReturnUnsupported;
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
kern_return_t
IOUserServer::waitInterruptTrap(void * p1, void * p2, void * p3, void * p4, void * p5, void * p6)
{
kern_return_t
IMPL(IODispatchQueue, SetPort)
{
+ if (MACH_PORT_NULL != ivars->serverPort) {
+ return kIOReturnNotReady;
+ }
+
ivars->serverPort = port;
return kIOReturnSuccess;
}
void
IODispatchQueue::free()
{
+ if (ivars && ivars->serverPort) {
+ ipc_port_release_send(ivars->serverPort);
+ ivars->serverPort = MACH_PORT_NULL;
+ }
IOSafeDeleteNULL(ivars, IODispatchQueue_IVars, 1);
super::free();
}
kern_return_t
OSUserMetaClass::Dispatch(IORPC rpc)
{
- return const_cast<OSMetaClass *>(meta)->Dispatch(rpc);
+ if (meta) {
+ return const_cast<OSMetaClass *>(meta)->Dispatch(rpc);
+ } else {
+ return kIOReturnUnsupported;
+ }
}
void
array->count = 0;
cstr = &array->strings[0];
end = &array->strings[array->dataSize];
- while ((len = cstr[0])) {
+ while ((len = (unsigned char)cstr[0])) {
cstr++;
if ((cstr + len) >= end) {
break;
cstr = &array->strings[0];
end = &array->strings[array->dataSize];
llen = strlen(look);
- while ((len = cstr[0])) {
+ while ((len = (unsigned char)cstr[0])) {
cstr++;
if ((cstr + len) >= end) {
break;
resultFlags |= kOSObjectRPCRemote;
}
if (service->reserved->uvars && service->reserved->uvars->userServer) {
+ IOLockLock(service->reserved->uvars->userServer->fLock);
userMeta = (typeof(userMeta))service->reserved->uvars->userServer->fClasses->getObject(str);
+ IOLockUnlock(service->reserved->uvars->userServer->fLock);
}
}
if (!str && !userMeta) {
const OSMetaClass * meta;
meta = obj->getMetaClass();
+ IOLockLock(fLock);
while (meta && !userMeta) {
str = (OSString *) meta->getClassNameSymbol();
userMeta = (typeof(userMeta))fClasses->getObject(str);
meta = meta->getSuperClass();
}
}
+ IOLockUnlock(fLock);
}
if (str) {
if (!userMeta) {
+ IOLockLock(fLock);
userMeta = (typeof(userMeta))fClasses->getObject(str);
+ IOLockUnlock(fLock);
}
if (kIODKLogSetup & gIODKDebug) {
DKLOG("userMeta %s %p\n", str->getCStringNoCopy(), userMeta);
idx = 0;
sendPort = NULL;
if (queue && (kIODispatchQueueStopped != queue)) {
- sendPort = ipc_port_make_send(queue->ivars->serverPort);
+ sendPort = ipc_port_copy_send(queue->ivars->serverPort);
}
replySize = sizeof(OSObject_Instantiate_Rpl)
+ queueCount * sizeof(machReply->objects[0])
queue = uvars->queueArray[idx];
sendPort = NULL;
if (queue) {
- sendPort = ipc_port_make_send(queue->ivars->serverPort);
+ sendPort = ipc_port_copy_send(queue->ivars->serverPort);
}
machReply->objects[idx].type = MACH_MSG_PORT_DESCRIPTOR;
machReply->objects[idx].disposition = MACH_MSG_TYPE_MOVE_SEND;
if (!message) {
return kIOReturnIPCError;
}
+ if (message->objectRefs == 0) {
+ return kIOReturnIPCError;
+ }
ret = copyInObjects(msgin, message, msgin->msgh.msgh_size, true, false);
if (kIOReturnSuccess != ret) {
if (kIODKLogIPC & gIODKDebug) {
bzero((void *)msgout, replyAlloc);
}
- IORPC rpc = { .message = msgin, .sendSize = msgin->msgh.msgh_size, .reply = msgout, .replySize = replyAlloc };
+ IORPC rpc = { .message = msgin, .reply = msgout, .sendSize = msgin->msgh.msgh_size, .replySize = replyAlloc };
if (object) {
thread_iokit_tls_set(0, this);
port = queue->ivars->serverPort;
}
if (port) {
- sendPort = ipc_port_make_send(port);
+ sendPort = ipc_port_copy_send(port);
}
IOLockUnlock(gIOUserServerLock);
if (!sendPort) {
ret = copyOutObjects(mach, message, sendSize, false);
mach->msgh.msgh_bits = MACH_MSGH_BITS_COMPLEX |
- MACH_MSGH_BITS(MACH_MSG_TYPE_MOVE_SEND, (oneway ? 0 : MACH_MSG_TYPE_MAKE_SEND_ONCE));
+ MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, (oneway ? 0 : MACH_MSG_TYPE_MAKE_SEND_ONCE));
mach->msgh.msgh_remote_port = sendPort;
mach->msgh.msgh_local_port = (oneway ? MACH_PORT_NULL : mig_get_reply_port());
mach->msgh.msgh_id = kIORPCVersionCurrent;
mach->msgh.msgh_reserved = 0;
+ boolean_t message_moved;
+
if (oneway) {
- ret = mach_msg_send_from_kernel(&mach->msgh, sendSize);
+ ret = kernel_mach_msg_send(&mach->msgh, sendSize,
+ MACH_SEND_MSG | MACH_SEND_ALWAYS | MACH_SEND_NOIMPORTANCE,
+ 0, &message_moved);
} else {
assert(replySize >= (sizeof(IORPCMessageMach) + sizeof(IORPCMessage)));
- ret = mach_msg_rpc_from_kernel(&mach->msgh, sendSize, replySize);
- if (KERN_SUCCESS == ret) {
- if (kIORPCVersionCurrentReply != mach->msgh.msgh_id) {
- ret = (MACH_NOTIFY_SEND_ONCE == mach->msgh.msgh_id) ? MIG_SERVER_DIED : MIG_REPLY_MISMATCH;
- } else if ((replySize = mach->msgh.msgh_size) < (sizeof(IORPCMessageMach) + sizeof(IORPCMessage))) {
+ ret = kernel_mach_msg_rpc(&mach->msgh, sendSize, replySize, FALSE, &message_moved);
+ }
+
+ ipc_port_release_send(sendPort);
+
+ if (MACH_MSG_SUCCESS != ret) {
+ if (kIODKLogIPC & gIODKDebug) {
+ DKLOG("mach_msg() failed 0x%x\n", ret);
+ }
+ if (!message_moved) {
+ // release ports
+ copyInObjects(mach, message, sendSize, false, true);
+ }
+ }
+
+ if ((KERN_SUCCESS == ret) && !oneway) {
+ if (kIORPCVersionCurrentReply != mach->msgh.msgh_id) {
+ ret = (MACH_NOTIFY_SEND_ONCE == mach->msgh.msgh_id) ? MIG_SERVER_DIED : MIG_REPLY_MISMATCH;
+ } else if ((replySize = mach->msgh.msgh_size) < (sizeof(IORPCMessageMach) + sizeof(IORPCMessage))) {
// printf("BAD REPLY SIZE\n");
+ ret = MIG_BAD_ARGUMENTS;
+ } else {
+ if (!(MACH_MSGH_BITS_COMPLEX & mach->msgh.msgh_bits)) {
+ mach->msgh_body.msgh_descriptor_count = 0;
+ }
+ message = IORPCMessageFromMach(mach, true);
+ if (!message) {
+ ret = kIOReturnIPCError;
+ } else if (message->msgid != msgid) {
+// printf("BAD REPLY ID\n");
ret = MIG_BAD_ARGUMENTS;
} else {
- if (!(MACH_MSGH_BITS_COMPLEX & mach->msgh.msgh_bits)) {
- mach->msgh_body.msgh_descriptor_count = 0;
- }
- message = IORPCMessageFromMach(mach, true);
- if (!message) {
- ret = kIOReturnIPCError;
- } else if (message->msgid != msgid) {
-// printf("BAD REPLY ID\n");
- ret = MIG_BAD_ARGUMENTS;
- } else {
- bool isError = (0 != (kIORPCMessageError & message->flags));
- ret = copyInObjects(mach, message, replySize, !isError, true);
- if (kIOReturnSuccess != ret) {
- if (kIODKLogIPC & gIODKDebug) {
- DKLOG("rpc copyin(0x%x) %x\n", ret, mach->msgh.msgh_id);
- }
- return KERN_NOT_SUPPORTED;
- }
- if (isError) {
- IORPCMessageErrorReturnContent * errorMsg = (typeof(errorMsg))message;
- ret = errorMsg->result;
+ bool isError = (0 != (kIORPCMessageError & message->flags));
+ ret = copyInObjects(mach, message, replySize, !isError, true);
+ if (kIOReturnSuccess != ret) {
+ if (kIODKLogIPC & gIODKDebug) {
+ DKLOG("rpc copyin(0x%x) %x\n", ret, mach->msgh.msgh_id);
}
+ return KERN_NOT_SUPPORTED;
+ }
+ if (isError) {
+ IORPCMessageErrorReturnContent * errorMsg = (typeof(errorMsg))message;
+ ret = errorMsg->result;
}
}
}
}
}
+ /* Mark the current task's space as eligible for uext object ports */
+ iokit_label_dext_task(inst->fOwningTask);
+
inst->fLock = IOLockAlloc();
inst->fServices = OSArray::withCapacity(4);
inst->fClasses = OSDictionary::withCapacity(16);
cls->name = sym;
cls->meta = OSMetaClass::copyMetaClassWithName(sym);
+ IOLockLock(fLock);
cls->superMeta = OSDynamicCast(OSUserMetaClass, fClasses->getObject(desc->superName));
- fClasses->setObject(sym, cls);
+ if (fClasses->getObject(sym) != NULL) {
+ /* class with this name exists */
+ ret = kIOReturnBadArgument;
+ } else {
+ if (fClasses->setObject(sym, cls)) {
+ *pCls = cls;
+ } else {
+ /* could not add class to fClasses */
+ ret = kIOReturnNoMemory;
+ }
+ }
+ IOLockUnlock(fLock);
cls->release();
-
- *pCls = cls;
-
return ret;
}
}
}
- ret = userUC->Start(service);
- if (kIOReturnSuccess != ret) {
- userUC->detach(this);
- userUC->release();
- return ret;
- }
-
*handler = userUC;
return ret;
service = OSDynamicCast(IOService, inst);
if (service && service->init(properties) && service->attach(this)) {
reserved->uvars->userServer->serviceAttach(service, this);
+ service->reserved->uvars->started = true;
ret = kIOReturnSuccess;
*result = service;
}
return ret;
}
+kern_return_t
+IMPL(IOService, Terminate)
+{
+ IOUserServer * us;
+
+ if (options) {
+ return kIOReturnUnsupported;
+ }
+
+ us = (typeof(us))thread_iokit_tls_get(0);
+ if (!reserved->uvars
+ || (reserved->uvars->userServer != us)) {
+ return kIOReturnNotPermitted;
+ }
+ terminate(kIOServiceTerminateNeedWillTerminate);
+
+ return kIOReturnSuccess;
+}
+
kern_return_t
IMPL(IOService, NewUserClient)
{
return object ? kIOReturnSuccess : kIOReturnNotFound;
}
+kern_return_t
+IMPL(IOService, CopyProviderProperties)
+{
+ IOReturn ret;
+ OSArray * result;
+ IOService * provider;
+
+ result = OSArray::withCapacity(8);
+ if (!result) {
+ return kIOReturnNoMemory;
+ }
+
+ ret = kIOReturnSuccess;
+ for (provider = this; provider; provider = provider->getProvider()) {
+ OSObject * obj;
+ OSDictionary * props;
+
+ obj = provider->copyProperty(gIOSupportedPropertiesKey);
+ props = OSDynamicCast(OSDictionary, obj);
+ if (!props) {
+ OSSafeReleaseNULL(obj);
+ props = provider->dictionaryWithProperties();
+ }
+ if (!props) {
+ ret = kIOReturnNoMemory;
+ break;
+ }
+ bool __block addClass = true;
+ if (propertyKeys) {
+ OSDictionary * retProps;
+ retProps = OSDictionary::withCapacity(4);
+ addClass = false;
+ if (!retProps) {
+ ret = kIOReturnNoMemory;
+ break;
+ }
+ propertyKeys->iterateObjects(^bool (OSObject * _key) {
+ OSString * key = OSDynamicCast(OSString, _key);
+ if (gIOClassKey->isEqualTo(key)) {
+ addClass = true;
+ return false;
+ }
+ retProps->setObject(key, props->getObject(key));
+ return false;
+ });
+ OSSafeReleaseNULL(props);
+ props = retProps;
+ }
+ if (addClass) {
+ OSArray * classes = OSArray::withCapacity(8);
+ if (!classes) {
+ ret = kIOReturnNoMemory;
+ break;
+ }
+ for (const OSMetaClass * meta = provider->getMetaClass(); meta; meta = meta->getSuperClass()) {
+ classes->setObject(meta->getClassNameSymbol());
+ }
+ props->setObject(gIOClassKey, classes);
+ OSSafeReleaseNULL(classes);
+ }
+ bool ok = result->setObject(props);
+ props->release();
+ if (!ok) {
+ ret = kIOReturnNoMemory;
+ break;
+ }
+ }
+ if (kIOReturnSuccess != ret) {
+ OSSafeReleaseNULL(result);
+ }
+ *properties = result;
+ return ret;
+}
+
void
IOUserServer::systemPower(bool powerOff)
{
return kIOReturnSuccess;
}
- IOMachPortDestroyUserReferences(service, IKOT_UEXT_OBJECT);
-
if (uvars->queueArray && uvars->userMeta) {
queueAlloc = 1;
if (uvars->userMeta->queueNames) {
return kIOReturnSuccess;
}
-kern_return_t
-IMPL(IOInterruptDispatchSource, Cancel)
-{
- return kIOReturnUnsupported;
-}
-
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
#undef super
IOReturn
IOUserUserClient::clientClose(void)
{
- terminate();
+ terminate(kIOServiceTerminateNeedWillTerminate);
return kIOReturnSuccess;
}
./DriverKit/IODispatchQueue.iig.cpp optional iokitcpp
./DriverKit/IOInterruptDispatchSource.iig.cpp optional iokitcpp
./DriverKit/IODataQueueDispatchSource.iig.cpp optional iokitcpp
+./DriverKit/IOServiceNotificationDispatchSource.iig.cpp optional iokitcpp
./DriverKit/IOUserServer.iig.cpp optional iokitcpp
# libIOKit
}
/*********************************************************************
-* xxx - I want to rename this :-/
*********************************************************************/
const OSSymbol *
OSMetaClass::getKmodName() const
return OSSymbol::withCStringNoCopy("unknown");
}
+/*********************************************************************
+*********************************************************************/
+OSKext *
+OSMetaClass::getKext() const
+{
+ return reserved ? reserved->kext : NULL;
+}
+
/*********************************************************************
*********************************************************************/
unsigned int
*/
#define kOSBundleAllowUserLoadKey "OSBundleAllowUserLoad"
+/*!
+ * @define kOSBundleAllowUserTerminateKey
+ * @abstract A boolean value indicating whether the kextunload tool
+ * is allowed to issue IOService terminate to classes defined in this kext.
+ * @discussion A boolean value indicating whether the kextunload tool
+ * is allowed to issue IOService terminate to classes defined in this kext.
+ */
+#define kOSBundleAllowUserTerminateKey "OSBundleAllowUserTerminate"
+
/*!
* @define kOSKernelResourceKey
* @abstract A boolean value indicating whether the kext represents a built-in
#ifdef XNU_KERNEL_PRIVATE
class OSOrderedSet;
class OSCollection;
+class OSKext;
#endif /* XNU_KERNEL_PRIVATE */
struct IORPC;
class OSInterface
virtual OSObject * alloc() const = 0;
#ifdef XNU_KERNEL_PRIVATE
+ OSKext * getKext() const;
void addInstance(const OSObject * instance, bool super = false) const;
void removeInstance(const OSObject * instance, bool super = false) const;
void applyToInstances(OSMetaClassInstanceApplierFunction applier,
return rv;
}
+kern_return_t
+mach_port_kobject_description(
+ ipc_space_t task,
+ mach_port_name_t name,
+ natural_t *object_type,
+ mach_vm_address_t *object_addr,
+ kobject_description_t desc)
+{
+ kern_return_t rv;
+
+ rv = _kernelrpc_mach_port_kobject_description(task, name, object_type, object_addr, desc);
+
+ return rv;
+}
+
kern_return_t
mach_port_construct(
ipc_space_t task,
}
}
+__attribute__((visibility("hidden")))
+int
+pthread_current_stack_contains_np(const void *addr, size_t len)
+{
+ if (_libkernel_functions->version >= 4 &&
+ _libkernel_functions->pthread_current_stack_contains_np) {
+ return _libkernel_functions->pthread_current_stack_contains_np(addr, len);
+ }
+
+ return 0;
+}
+
/*
* Upcalls to optimized libplatform string functions
*/
/* The following functions are included in version 3 of this structure */
void (*pthread_clear_qos_tsd)(mach_port_t);
+ /* The following functions are included in version 4 of this structure */
+ int (*pthread_current_stack_contains_np)(const void *, size_t);
+
/* Subsequent versions must only add pointers! */
} *_libkernel_functions_t;
return posix_spawn_appendportaction_np(attr, &action);
}
+/*
+ * posix_spawnattr_setsuidcredport_np
+ *
+ * Description: Set an suid cred port to be used to execute with a different UID.
+ *
+ * Parameters: attr The spawn attributes object for the
+ * new process
+ * port The suid cred port
+ *
+ * Returns: 0 Success
+ */
+int
+posix_spawnattr_setsuidcredport_np(posix_spawnattr_t *attr, mach_port_t port)
+{
+ _ps_port_action_t action = {
+ .port_type = PSPA_SUID_CRED,
+ .new_port = port,
+ };
+ return posix_spawn_appendportaction_np(attr, &action);
+}
+
/*
* posix_spawnattr_setexceptionports_np
*
int posix_spawnattr_setspecialport_np(posix_spawnattr_t * __restrict,
mach_port_t, int) __API_AVAILABLE(macos(10.5), ios(2.0)) __SPI_AVAILABLE(watchos(2.0), tvos(9.0), bridgeos(1.0));
+int posix_spawnattr_setsuidcredport_np(posix_spawnattr_t * __restrict, mach_port_t) __SPI_AVAILABLE(ios(13.0), macos(10.15));
+
int posix_spawn_file_actions_addinherit_np(posix_spawn_file_actions_t *,
int) __API_AVAILABLE(macos(10.7), ios(4.3)) __SPI_AVAILABLE(watchos(2.0), tvos(9.0), bridgeos(1.0));
#include <sys/types.h>
#include <stdint.h>
#include <signal.h>
+#include <os/reason_private.h>
#include <unistd.h>
+/* Crash simulation */
+
+extern int pthread_current_stack_contains_np(const void *, unsigned long);
+int
+__darwin_check_fd_set_overflow(int n, const void *fd_set, int unlimited_select)
+{
+ if (n < 0) {
+ os_fault_with_payload(OS_REASON_LIBSYSTEM, OS_REASON_LIBSYSTEM_CODE_FAULT,
+ &n, sizeof(n), "FD_SET underflow", 0);
+ return 0;
+ }
+
+ if (n >= __DARWIN_FD_SETSIZE) {
+ if (pthread_current_stack_contains_np((const void *) fd_set, sizeof(struct fd_set))) {
+ if (!unlimited_select) {
+ os_fault_with_payload(OS_REASON_LIBSYSTEM, OS_REASON_LIBSYSTEM_CODE_FAULT,
+ &n, sizeof(n), "FD_SET overflow", 0);
+ return 0;
+ } else {
+ return 1;
+ }
+ } else {
+ return 1;
+ }
+ }
+
+ return 1;
+}
+
/* System call entry points */
int __terminate_with_payload(int pid, uint32_t reason_namespace, uint64_t reason_code,
void *payload, uint32_t payload_size, const char *reason_string,
ip_unlock(port);
return UND_REPLY_NULL;
}
- reply = (UNDReplyRef) port->ip_kobject;
+ reply = (UNDReplyRef) ip_get_kobject(port);
assert(reply != UND_REPLY_NULL);
ip_unlock(port);
return reply;
+ ((uintptr_t)&BootCpuData
- (uintptr_t)(args->virtBase)));
- thread_bootstrap();
- thread = current_thread();
+ thread = thread_bootstrap();
+ thread->machine.CpuDatap = &BootCpuData;
+ machine_set_current_thread(thread);
+
/*
* Preemption is enabled for this thread so that it can lock mutexes without
* tripping the preemption check. In reality scheduling is not enabled until
* preemption level is not really meaningful for the bootstrap thread.
*/
thread->machine.preemption_count = 0;
- thread->machine.CpuDatap = &BootCpuData;
#if __arm__ && __ARM_USER_PROTECT__
{
unsigned int ttbr0_val, ttbr1_val, ttbcr_val;
add r3, r3, SS_R4
stmia r3!, {r4-r14} // Save general registers to pcb
switch_threads:
+ ldr r3, [r2, ACT_CPUDATAP]
+ str r2, [r3, CPU_ACTIVE_THREAD]
ldr r3, [r2, TH_KSTACKPTR] // get kernel stack top
mcr p15, 0, r2, c13, c0, 4 // Write TPIDRPRW
ldr r6, [r2, TH_CTH_SELF]
#define LCK_FRAMES_MAX 8
extern uint64_t MutexSpin;
+extern uint64_t low_MutexSpin;
+extern int64_t high_MutexSpin;
typedef struct {
unsigned int type;
#include <kern/debug.h>
#include <kern/kcdata.h>
#include <string.h>
+#include <arm/cpu_internal.h>
+#include <os/hash.h>
+#include <arm/cpu_data.h>
#include <arm/cpu_data_internal.h>
#include <arm/proc_reg.h>
typedef enum {
SPINWAIT_ACQUIRED, /* Got the lock. */
SPINWAIT_INTERLOCK, /* Got the interlock, no owner, but caller must finish acquiring the lock. */
- SPINWAIT_DID_SPIN, /* Got the interlock, spun, but failed to get the lock. */
+ SPINWAIT_DID_SPIN_HIGH_THR, /* Got the interlock, spun, but failed to get the lock. */
+ SPINWAIT_DID_SPIN_OWNER_NOT_CORE, /* Got the interlock, spun, but failed to get the lock. */
+ SPINWAIT_DID_SPIN_NO_WINDOW_CONTENTION, /* Got the interlock, spun, but failed to get the lock. */
+ SPINWAIT_DID_SPIN_SLIDING_THR,/* Got the interlock, spun, but failed to get the lock. */
SPINWAIT_DID_NOT_SPIN, /* Got the interlock, did not spin. */
} spinwait_result_t;
return current_thread()->machine.preemption_count;
}
-#if __SMP__
-static inline boolean_t
-interlock_try_disable_interrupts(
- lck_mtx_t *mutex,
- boolean_t *istate)
-{
- *istate = ml_set_interrupts_enabled(FALSE);
-
- if (interlock_try(mutex)) {
- return 1;
- } else {
- ml_set_interrupts_enabled(*istate);
- return 0;
- }
-}
-
-static inline void
-interlock_unlock_enable_interrupts(
- lck_mtx_t *mutex,
- boolean_t istate)
-{
- interlock_unlock(mutex);
- ml_set_interrupts_enabled(istate);
-}
-#endif /* __SMP__ */
-
/*
* Routine: lck_spin_alloc_init
*/
int has_interlock = (int)interlocked;
#if __SMP__
__kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
- thread_t holder;
- uint64_t overall_deadline;
- uint64_t check_owner_deadline;
- uint64_t cur_time;
- spinwait_result_t retval = SPINWAIT_DID_SPIN;
- int loopcount = 0;
- uintptr_t state;
- boolean_t istate;
+ thread_t owner, prev_owner;
+ uint64_t window_deadline, sliding_deadline, high_deadline;
+ uint64_t start_time, cur_time, avg_hold_time, bias, delta;
+ int loopcount = 0;
+ uint i, prev_owner_cpu;
+ int total_hold_time_samples, window_hold_time_samples, unfairness;
+ bool owner_on_core, adjust;
+ uintptr_t state, new_state, waiters;
+ spinwait_result_t retval = SPINWAIT_DID_SPIN_HIGH_THR;
if (__improbable(!(lck_mtx_adaptive_spin_mode & ADAPTIVE_SPIN_ENABLE))) {
if (!has_interlock) {
return SPINWAIT_DID_NOT_SPIN;
}
- state = ordered_load_mtx(lock);
-
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(LCK_MTX_STATE_TO_THREAD(state)), lock->lck_mtx_waiters, 0, 0);
- cur_time = mach_absolute_time();
- overall_deadline = cur_time + MutexSpin;
- check_owner_deadline = cur_time;
-
- if (has_interlock) {
- istate = ml_get_interrupts_enabled();
+ start_time = mach_absolute_time();
+ /*
+ * window_deadline represents the "learning" phase.
+ * The thread collects statistics about the lock during
+ * window_deadline and then it makes a decision on whether to spin more
+ * or block according to the concurrency behavior
+ * observed.
+ *
+ * Every thread can spin at least low_MutexSpin.
+ */
+ window_deadline = start_time + low_MutexSpin;
+ /*
+ * Sliding_deadline is the adjusted spin deadline
+ * computed after the "learning" phase.
+ */
+ sliding_deadline = window_deadline;
+ /*
+ * High_deadline is a hard deadline. No thread
+ * can spin more than this deadline.
+ */
+ if (high_MutexSpin >= 0) {
+ high_deadline = start_time + high_MutexSpin;
+ } else {
+ high_deadline = start_time + low_MutexSpin * real_ncpus;
}
+ /*
+ * Do not know yet which is the owner cpu.
+ * Initialize prev_owner_cpu with next cpu.
+ */
+ prev_owner_cpu = (cpu_number() + 1) % real_ncpus;
+ total_hold_time_samples = 0;
+ window_hold_time_samples = 0;
+ avg_hold_time = 0;
+ adjust = TRUE;
+ bias = (os_hash_kernel_pointer(lock) + cpu_number()) % real_ncpus;
+
/* Snoop the lock state */
state = ordered_load_mtx(lock);
+ owner = LCK_MTX_STATE_TO_THREAD(state);
+ prev_owner = owner;
+
+ if (has_interlock) {
+ if (owner == NULL) {
+ retval = SPINWAIT_INTERLOCK;
+ goto done_spinning;
+ } else {
+ /*
+ * We are holding the interlock, so
+ * we can safely dereference owner.
+ */
+ if (!(owner->machine.machine_thread_flags & MACHINE_THREAD_FLAGS_ON_CPU) ||
+ (owner->state & TH_IDLE)) {
+ retval = SPINWAIT_DID_NOT_SPIN;
+ goto done_spinning;
+ }
+ }
+ interlock_unlock(lock);
+ has_interlock = 0;
+ }
/*
* Spin while:
* - mutex is locked, and
* - it's locked as a spin lock, and
* - owner is running on another processor, and
- * - owner (processor) is not idling, and
* - we haven't spun for long enough.
*/
do {
- if (!(state & LCK_ILOCK) || has_interlock) {
- if (!has_interlock) {
- has_interlock = interlock_try_disable_interrupts(lock, &istate);
+ /*
+ * Try to acquire the lock.
+ */
+ owner = LCK_MTX_STATE_TO_THREAD(state);
+ if (owner == NULL) {
+ waiters = state & ARM_LCK_WAITERS;
+ if (waiters) {
+ /*
+ * preserve the waiter bit
+ * and try acquire the interlock.
+ * Note: we will successfully acquire
+ * the interlock only if we can also
+ * acquire the lock.
+ */
+ new_state = ARM_LCK_WAITERS | LCK_ILOCK;
+ has_interlock = 1;
+ retval = SPINWAIT_INTERLOCK;
+ disable_preemption();
+ } else {
+ new_state = LCK_MTX_THREAD_TO_STATE(thread);
+ retval = SPINWAIT_ACQUIRED;
+ }
+
+ /*
+ * The cmpxchg will succed only if the lock
+ * is not owned (doesn't have an owner set)
+ * and it is not interlocked.
+ * It will not fail if there are waiters.
+ */
+ if (os_atomic_cmpxchgv(&lock->lck_mtx_data,
+ waiters, new_state, &state, acquire)) {
+ goto done_spinning;
+ } else {
+ if (waiters) {
+ has_interlock = 0;
+ enable_preemption();
+ }
}
+ }
- if (has_interlock) {
- state = ordered_load_mtx(lock);
- holder = LCK_MTX_STATE_TO_THREAD(state);
+ cur_time = mach_absolute_time();
- if (holder == NULL) {
- retval = SPINWAIT_INTERLOCK;
+ /*
+ * Never spin past high_deadline.
+ */
+ if (cur_time >= high_deadline) {
+ retval = SPINWAIT_DID_SPIN_HIGH_THR;
+ break;
+ }
- if (istate) {
- ml_set_interrupts_enabled(istate);
- }
+ /*
+ * Check if owner is on core. If not block.
+ */
+ owner = LCK_MTX_STATE_TO_THREAD(state);
+ if (owner) {
+ i = prev_owner_cpu;
+ owner_on_core = FALSE;
- break;
- }
+ disable_preemption();
+ state = ordered_load_mtx(lock);
+ owner = LCK_MTX_STATE_TO_THREAD(state);
- if (!(holder->machine.machine_thread_flags & MACHINE_THREAD_FLAGS_ON_CPU) ||
- (holder->state & TH_IDLE)) {
- if (loopcount == 0) {
- retval = SPINWAIT_DID_NOT_SPIN;
+ /*
+ * For scalability we want to check if the owner is on core
+ * without locking the mutex interlock.
+ * If we do not lock the mutex interlock, the owner that we see might be
+ * invalid, so we cannot dereference it. Therefore we cannot check
+ * any field of the thread to tell us if it is on core.
+ * Check if the thread that is running on the other cpus matches the owner.
+ */
+ if (owner) {
+ do {
+ cpu_data_t *cpu_data_ptr = CpuDataEntries[i].cpu_data_vaddr;
+ if ((cpu_data_ptr != NULL) && (cpu_data_ptr->cpu_active_thread == owner)) {
+ owner_on_core = TRUE;
+ break;
}
-
- if (istate) {
- ml_set_interrupts_enabled(istate);
+ if (++i >= real_ncpus) {
+ i = 0;
}
-
- break;
+ } while (i != prev_owner_cpu);
+ enable_preemption();
+
+ if (owner_on_core) {
+ prev_owner_cpu = i;
+ } else {
+ prev_owner = owner;
+ state = ordered_load_mtx(lock);
+ owner = LCK_MTX_STATE_TO_THREAD(state);
+ if (owner == prev_owner) {
+ /*
+ * Owner is not on core.
+ * Stop spinning.
+ */
+ if (loopcount == 0) {
+ retval = SPINWAIT_DID_NOT_SPIN;
+ } else {
+ retval = SPINWAIT_DID_SPIN_OWNER_NOT_CORE;
+ }
+ break;
+ }
+ /*
+ * Fall through if the owner changed while we were scanning.
+ * The new owner could potentially be on core, so loop
+ * again.
+ */
}
-
- interlock_unlock_enable_interrupts(lock, istate);
- has_interlock = 0;
+ } else {
+ enable_preemption();
}
}
- cur_time = mach_absolute_time();
-
- if (cur_time >= overall_deadline) {
- break;
+ /*
+ * Save how many times we see the owner changing.
+ * We can roughly estimate the the mutex hold
+ * time and the fairness with that.
+ */
+ if (owner != prev_owner) {
+ prev_owner = owner;
+ total_hold_time_samples++;
+ window_hold_time_samples++;
}
- check_owner_deadline = cur_time + (MutexSpin / SPINWAIT_OWNER_CHECK_COUNT);
+ /*
+ * Learning window expired.
+ * Try to adjust the sliding_deadline.
+ */
+ if (cur_time >= window_deadline) {
+ /*
+ * If there was not contention during the window
+ * stop spinning.
+ */
+ if (window_hold_time_samples < 1) {
+ retval = SPINWAIT_DID_SPIN_NO_WINDOW_CONTENTION;
+ break;
+ }
+
+ if (adjust) {
+ /*
+ * For a fair lock, we'd wait for at most (NCPU-1) periods,
+ * but the lock is unfair, so let's try to estimate by how much.
+ */
+ unfairness = total_hold_time_samples / real_ncpus;
+
+ if (unfairness == 0) {
+ /*
+ * We observed the owner changing `total_hold_time_samples` times which
+ * let us estimate the average hold time of this mutex for the duration
+ * of the spin time.
+ * avg_hold_time = (cur_time - start_time) / total_hold_time_samples;
+ *
+ * In this case spin at max avg_hold_time * (real_ncpus - 1)
+ */
+ delta = cur_time - start_time;
+ sliding_deadline = start_time + (delta * (real_ncpus - 1)) / total_hold_time_samples;
+ } else {
+ /*
+ * In this case at least one of the other cpus was able to get the lock twice
+ * while I was spinning.
+ * We could spin longer but it won't necessarily help if the system is unfair.
+ * Try to randomize the wait to reduce contention.
+ *
+ * We compute how much time we could potentially spin
+ * and distribute it over the cpus.
+ *
+ * bias is an integer between 0 and real_ncpus.
+ * distributed_increment = ((high_deadline - cur_time) / real_ncpus) * bias
+ */
+ delta = high_deadline - cur_time;
+ sliding_deadline = cur_time + ((delta * bias) / real_ncpus);
+ adjust = FALSE;
+ }
+ }
- if (cur_time < check_owner_deadline) {
- machine_delay_until(check_owner_deadline - cur_time, check_owner_deadline);
+ window_deadline += low_MutexSpin;
+ window_hold_time_samples = 0;
}
- /* Snoop the lock state */
- state = ordered_load_mtx(lock);
+ /*
+ * Stop spinning if we past
+ * the adjusted deadline.
+ */
+ if (cur_time >= sliding_deadline) {
+ retval = SPINWAIT_DID_SPIN_SLIDING_THR;
+ break;
+ }
- if (state == 0) {
- /* Try to grab the lock. */
- if (os_atomic_cmpxchg(&lock->lck_mtx_data,
- 0, LCK_MTX_THREAD_TO_STATE(thread), acquire)) {
- retval = SPINWAIT_ACQUIRED;
- break;
- }
+ /*
+ * We want to arm the monitor for wfe,
+ * so load exclusively the lock.
+ *
+ * NOTE:
+ * we rely on the fact that wfe will
+ * eventually return even if the cache line
+ * is not modified. This way we will keep
+ * looping and checking if the deadlines expired.
+ */
+ state = os_atomic_load_exclusive(&lock->lck_mtx_data, relaxed);
+ owner = LCK_MTX_STATE_TO_THREAD(state);
+ if (owner != NULL) {
+ wait_for_event();
+ state = ordered_load_mtx(lock);
+ } else {
+ atomic_exchange_abort();
}
loopcount++;
} while (TRUE);
+done_spinning:
#if CONFIG_DTRACE
/*
- * We've already kept a count via overall_deadline of how long we spun.
- * If dtrace is active, then we compute backwards to decide how
- * long we spun.
- *
* Note that we record a different probe id depending on whether
* this is a direct or indirect mutex. This allows us to
* penalize only lock groups that have debug/stats enabled
*/
if (__probable(lock->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)) {
LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, lock,
- mach_absolute_time() - (overall_deadline - MutexSpin));
+ mach_absolute_time() - start_time);
} else {
LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN, lock,
- mach_absolute_time() - (overall_deadline - MutexSpin));
+ mach_absolute_time() - start_time);
}
/* The lockstat acquire event is recorded by the caller. */
#endif
return retval;
}
+
/*
* Common code for mutex locking as spinlock
*/
uint32_t LockTimeOutUsec;
uint64_t TLockTimeOut;
uint64_t MutexSpin;
+uint64_t low_MutexSpin;
+int64_t high_MutexSpin;
+
boolean_t is_clock_configured = FALSE;
#if CONFIG_NONFATAL_ASSERTS
nanoseconds_to_absolutetime(10 * NSEC_PER_USEC, &abstime);
}
MutexSpin = abstime;
+ low_MutexSpin = MutexSpin;
+ /*
+ * high_MutexSpin should be initialized as low_MutexSpin * real_ncpus, but
+ * real_ncpus is not set at this time
+ *
+ * NOTE: active spinning is disabled in arm. It can be activated
+ * by setting high_MutexSpin through the sysctl.
+ */
+ high_MutexSpin = low_MutexSpin;
}
/*
.align 2
.globl EXT(machine_set_current_thread)
LEXT(machine_set_current_thread)
+ ldr r1, [r0, ACT_CPUDATAP]
+ str r0, [r1, CPU_ACTIVE_THREAD]
mcr p15, 0, r0, c13, c0, 4 // Write TPIDRPRW
ldr r1, [r0, TH_CTH_SELF]
mrc p15, 0, r2, c13, c0, 3 // Read TPIDRURO
"arm debug state");
}
+/*
+ * Routine: machine_thread_template_init
+ *
+ */
+void
+machine_thread_template_init(thread_t __unused thr_template)
+{
+ /* Nothing to do on this platform. */
+}
/*
* Routine: get_useraddr
case APRR_USER_RW_INDEX: return XPRR_USER_RW_PERM;
case APRR_PPL_RX_INDEX: return XPRR_PPL_RX_PERM;
case APRR_KERN_RX_INDEX: return XPRR_KERN_RX_PERM;
- case APRR_PPL_RO_INDEX: return XPRR_PPL_RO_PERM;
+ case APRR_USER_XO_INDEX: return XPRR_USER_XO_PERM;
case APRR_KERN_RO_INDEX: return XPRR_KERN_RO_PERM;
case APRR_KERN0_RX_INDEX: return XPRR_KERN0_RO_PERM;
case APRR_KERN0_RO_INDEX: return XPRR_KERN0_RO_PERM;
case XPRR_USER_RW_PERM: return APRR_USER_RW_INDEX;
case XPRR_PPL_RX_PERM: return APRR_PPL_RX_INDEX;
case XPRR_KERN_RX_PERM: return APRR_KERN_RX_INDEX;
- case XPRR_PPL_RO_PERM: return APRR_PPL_RO_INDEX;
+ case XPRR_USER_XO_PERM: return APRR_USER_XO_INDEX;
case XPRR_KERN_RO_PERM: return APRR_KERN_RO_INDEX;
case XPRR_KERN0_RX_PERM: return APRR_KERN0_RO_INDEX;
case XPRR_KERN0_RO_PERM: return APRR_KERN0_RO_INDEX;
monitor_start_pa = BootArgs->topOfKernelData;
monitor_end_pa = BootArgs->topOfKernelData + BOOTSTRAP_TABLE_SIZE;
- /* The bootstrap page tables are mapped RO at boostrap. */
- pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RO_PERM, XPRR_PPL_RO_PERM);
+ /*
+ * The bootstrap page tables are mapped RO at boostrap.
+ *
+ * Note that this function call requests switching XPRR permissions from
+ * XPRR_KERN_RO_PERM to XPRR_KERN_RO_PERM. Whilst this may seem redundant,
+ * pa_set_range_xprr_perm() does other things too, such as calling
+ * pa_set_range_monitor() on the requested address range and performing a number
+ * of integrity checks on the PTEs. We should still
+ * call this function for all PPL-owned memory, regardless of whether
+ * permissions are required to be changed or not.
+ */
+ pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RO_PERM, XPRR_KERN_RO_PERM);
monitor_start_pa = BootArgs->topOfKernelData + BOOTSTRAP_TABLE_SIZE;
monitor_end_pa = avail_start;
/* The other bootstrap allocations are mapped RW at bootstrap. */
pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RW_PERM, XPRR_PPL_RW_PERM);
- /* The RO page tables are mapped RW at bootstrap. */
+ /*
+ * The RO page tables are mapped RW at bootstrap and remain RW after the call
+ * to pa_set_range_xprr_perm(). We do this, as opposed to using XPRR_PPL_RW_PERM,
+ * to work around a functional issue on H11 devices where CTRR shifts the APRR
+ * lookup table index to USER_XO before APRR is applied, hence causing the hardware
+ * to believe we are dealing with an user XO page upon performing a translation.
+ *
+ * Note that this workaround does not pose a security risk, because the RO
+ * page tables still remain read-only, due to KTRR/CTRR, and further protecting
+ * them at the APRR level would be unnecessary.
+ */
monitor_start_pa = kvtophys((vm_offset_t)&ropagetable_begin);
monitor_end_pa = monitor_start_pa + ((vm_offset_t)&ropagetable_end - (vm_offset_t)&ropagetable_begin);
- pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RW_PERM, XPRR_PPL_RW_PERM);
+ pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RW_PERM, XPRR_KERN_RW_PERM);
monitor_start_pa = kvtophys(segPPLDATAB);
monitor_end_pa = monitor_start_pa + segSizePPLDATA;
monitor_start_pa = kvtophys(segPPLDATACONSTB);
monitor_end_pa = monitor_start_pa + segSizePPLDATACONST;
- pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RO_PERM, XPRR_PPL_RO_PERM);
+ pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RO_PERM, XPRR_KERN_RO_PERM);
}
/*
* Mark the original physical aperture mapping for the PPL stack pages RO as an additional security
* precaution. The real RW mappings are at a different location with guard pages.
*/
- pa_set_range_xprr_perm(pmap_stacks_start_pa, pmap_stacks_end_pa, XPRR_PPL_RW_PERM, XPRR_PPL_RO_PERM);
+ pa_set_range_xprr_perm(pmap_stacks_start_pa, pmap_stacks_end_pa, XPRR_PPL_RW_PERM, XPRR_KERN_RO_PERM);
}
if ((p = pmap_alloc_pmap()) == PMAP_NULL) {
return PMAP_NULL;
}
+
+ if (ledger) {
+ pmap_ledger_validate(ledger);
+ pmap_ledger_retain(ledger);
+ }
#else
/*
* Allocate a pmap struct from the pmap_zone. Then allocate
}
#endif
+ p->ledger = ledger;
+
if (flags & PMAP_CREATE_64BIT) {
p->min = MACH_VM_MIN_ADDRESS;
p->max = MACH_VM_MAX_ADDRESS;
}
-#if XNU_MONITOR
- if (ledger) {
- pmap_ledger_validate(ledger);
- pmap_ledger_retain(ledger);
- }
-#endif /* XNU_MONITOR */
-
- p->ledger = ledger;
PMAP_LOCK_INIT(p);
memset((void *) &p->stats, 0, sizeof(p->stats));
pte_set_was_writeable(tmplate, false);
#if __APRR_SUPPORTED__
- if (__improbable(is_pte_xprr_protected(spte) && (pte_to_xprr_perm(spte) != XPRR_USER_JIT_PERM))) {
+ if (__improbable(is_pte_xprr_protected(spte) && (pte_to_xprr_perm(spte) != XPRR_USER_JIT_PERM)
+ && (pte_to_xprr_perm(spte) != XPRR_USER_XO_PERM))) {
/* Only test for PPL protection here, User-JIT mappings may be mutated by this function. */
panic("%s: modifying a PPL mapping pte_p=%p pmap=%p prot=%d options=%u, pte=0x%llx, tmplate=0x%llx",
__func__, pte_p, pmap, prot, options, (uint64_t)spte, (uint64_t)tmplate);
}
- if (__improbable(is_pte_xprr_protected(tmplate))) {
+ if (__improbable(is_pte_xprr_protected(tmplate) && (pte_to_xprr_perm(tmplate) != XPRR_USER_XO_PERM))) {
panic("%s: creating an xPRR mapping pte_p=%p pmap=%p prot=%d options=%u, pte=0x%llx, tmplate=0x%llx",
__func__, pte_p, pmap, prot, options, (uint64_t)spte, (uint64_t)tmplate);
}
#if XNU_MONITOR
if (!pmap_ppl_disable && (wimg_bits & PP_ATTR_MONITOR)) {
uint64_t xprr_perm = pte_to_xprr_perm(pte);
- pte &= ~ARM_PTE_XPRR_MASK;
switch (xprr_perm) {
case XPRR_KERN_RO_PERM:
- pte |= xprr_perm_to_pte(XPRR_PPL_RO_PERM);
break;
case XPRR_KERN_RW_PERM:
+ pte &= ~ARM_PTE_XPRR_MASK;
pte |= xprr_perm_to_pte(XPRR_PPL_RW_PERM);
break;
default:
*/
.macro set_thread_registers
msr TPIDR_EL1, $0 // Write new thread pointer to TPIDR_EL1
+ ldr $1, [$0, ACT_CPUDATAP]
+ str $0, [$1, CPU_ACTIVE_THREAD]
ldr $1, [$0, TH_CTH_SELF] // Get cthread pointer
mrs $2, TPIDRRO_EL0 // Extract cpu number from TPIDRRO_EL0
and $2, $2, #(MACHDEP_CPUNUM_MASK)
#define PMCR0_PMC_ENABLE_MASK(PMC) (UINT64_C(0x1) << PMCR_PMC_SHIFT(PMC))
#define PMCR0_PMC_DISABLE_MASK(PMC) (~PMCR0_PMC_ENABLE_MASK(PMC))
-/* how interrupts are generated on PMIs */
-#define PMCR0_INTGEN_SHIFT (8)
-#define PMCR0_INTGEN_MASK (UINT64_C(0x7) << PMCR0_INTGEN_SHIFT)
-#define PMCR0_INTGEN_OFF (UINT64_C(0) << PMCR0_INTGEN_SHIFT)
-#define PMCR0_INTGEN_PMI (UINT64_C(1) << PMCR0_INTGEN_SHIFT)
-#define PMCR0_INTGEN_AIC (UINT64_C(2) << PMCR0_INTGEN_SHIFT)
-#define PMCR0_INTGEN_DBG_HLT (UINT64_C(3) << PMCR0_INTGEN_SHIFT)
-#define PMCR0_INTGEN_FIQ (UINT64_C(4) << PMCR0_INTGEN_SHIFT)
-
-/* 10 unused */
-
-/* set by hardware if PMI was generated */
-#define PMCR0_PMAI_SHIFT (11)
-#define PMCR0_PMAI_MASK (UINT64_C(1) << PMCR0_PMAI_SHIFT)
-
/* overflow on a PMC generates an interrupt */
#define PMCR0_PMI_OFFSET (12)
#define PMCR0_PMI_SHIFT(PMC) (PMCR0_PMI_OFFSET + PMCR_PMC_SHIFT(PMC))
static boolean_t
enable_counter(uint32_t counter)
{
- int cpuid = cpu_number();
- uint64_t pmcr0 = 0, intgen_type;
- boolean_t counter_running, pmi_enabled, intgen_correct, enabled;
+ uint64_t pmcr0 = 0;
+ boolean_t counter_running, pmi_enabled, enabled;
pmcr0 = SREG_READ(SREG_PMCR0) | 0x3 /* leave the fixed counters enabled for monotonic */;
counter_running = (pmcr0 & PMCR0_PMC_ENABLE_MASK(counter)) != 0;
pmi_enabled = (pmcr0 & PMCR0_PMI_ENABLE_MASK(counter)) != 0;
- /* TODO this should use the PMI path rather than AIC for the interrupt
- * as it is faster
- */
- intgen_type = PMCR0_INTGEN_AIC;
- intgen_correct = (pmcr0 & PMCR0_INTGEN_MASK) == intgen_type;
-
- enabled = counter_running && pmi_enabled && intgen_correct;
+ enabled = counter_running && pmi_enabled;
if (!enabled) {
pmcr0 |= PMCR0_PMC_ENABLE_MASK(counter);
pmcr0 |= PMCR0_PMI_ENABLE_MASK(counter);
- pmcr0 &= ~PMCR0_INTGEN_MASK;
- pmcr0 |= intgen_type;
-
SREG_WRITE(SREG_PMCR0, pmcr0);
}
- saved_PMCR[cpuid][0] = pmcr0;
return enabled;
}
{
uint64_t pmcr0;
boolean_t enabled;
- int cpuid = cpu_number();
if (counter < 2) {
return true;
SREG_WRITE(SREG_PMCR0, pmcr0);
}
- saved_PMCR[cpuid][0] = pmcr0;
return enabled;
}
assert(ml_get_interrupts_enabled() == FALSE);
- /* Save current PMCR0/1 values. PMCR2-4 are in the RAWPMU set. */
- saved_PMCR[cpuid][0] = SREG_READ(SREG_PMCR0) | 0x3;
-
/* Save event selections. */
saved_PMESR[cpuid][0] = SREG_READ(SREG_PMESR0);
saved_PMESR[cpuid][1] = SREG_READ(SREG_PMESR1);
/* Restore PMCR0/1 values (with PMCR0 last to enable). */
SREG_WRITE(SREG_PMCR1, saved_PMCR[cpuid][1] | 0x30303);
- SREG_WRITE(SREG_PMCR0, saved_PMCR[cpuid][0] | 0x3);
}
static uint64_t
add sp, sp, ARM_CONTEXT_SIZE // Clean up stack
b.ne Lel1_sp1_synchronous_vector_continue
msr ELR_EL1, lr // Return to caller
- eret
+ ERET_CONTEXT_SYNCHRONIZING
#endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */
/* 64-bit first level exception handler dispatcher.
and x1, x4, BA_BOOT_FLAGS_DISABLE_USER_JOP
cbnz x1, Ldisable_jop // if global user JOP disabled, always turn off JOP regardless of thread flag (kernel running with JOP on)
mrs x2, TPIDR_EL1
- ldr x2, [x2, TH_DISABLE_USER_JOP]
- cbz x2, Lskip_disable_jop // if thread has JOP enabled, leave it on (kernel running with JOP on)
+ ldr w2, [x2, TH_DISABLE_USER_JOP]
+ cbz w2, Lskip_disable_jop // if thread has JOP enabled, leave it on (kernel running with JOP on)
Ldisable_jop:
MOV64 x1, SCTLR_JOP_KEYS_ENABLED
mrs x4, SCTLR_EL1
Lskip_ttbr1_switch:
#endif /* __ARM_KERNEL_PROTECT__ */
- eret
+ ERET_CONTEXT_SYNCHRONIZING
user_take_ast:
PUSH_FRAME
uint32_t LockTimeOutUsec;
uint64_t TLockTimeOut;
uint64_t MutexSpin;
+uint64_t low_MutexSpin;
+int64_t high_MutexSpin;
+
boolean_t is_clock_configured = FALSE;
uint32_t yield_delay_us = 0; /* Must be less than cpu_idle_latency to ensure ml_delay_should_spin is true */
void *lockdown_this;
lck_mtx_t lockdown_handler_lck;
lck_grp_t *lockdown_handler_grp;
-int lockdown_done;
+uint32_t lockdown_done;
void ml_lockdown_init(void);
void ml_lockdown_run_handler(void);
nanoseconds_to_absolutetime(10 * NSEC_PER_USEC, &abstime);
}
MutexSpin = abstime;
+ low_MutexSpin = MutexSpin;
+ /*
+ * high_MutexSpin should be initialized as low_MutexSpin * real_ncpus, but
+ * real_ncpus is not set at this time
+ *
+ * NOTE: active spinning is disabled in arm. It can be activated
+ * by setting high_MutexSpin through the sysctl.
+ */
+ high_MutexSpin = low_MutexSpin;
}
/*
/*
- * Copyright (c) 2017-2019 Apple Inc. All rights reserved.
+ * Copyright (c) 2017-2020 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <arm64/monotonic.h>
#include <kern/assert.h>
#include <kern/debug.h> /* panic */
+#include <kern/kpc.h>
#include <kern/monotonic.h>
#include <machine/atomic.h>
#include <machine/limits.h> /* CHAR_BIT */
#define PMC5 "s3_2_c15_c5_0"
#define PMC6 "s3_2_c15_c6_0"
#define PMC7 "s3_2_c15_c7_0"
+
+#define PMC_0_7(X, A) X(0, A); X(1, A); X(2, A); X(3, A); X(4, A); X(5, A); \
+ X(6, A); X(7, A)
+
+#if CORE_NCTRS > 8
#define PMC8 "s3_2_c15_c9_0"
#define PMC9 "s3_2_c15_c10_0"
+#define PMC_8_9(X, A) X(8, A); X(9, A)
+#else // CORE_NCTRS > 8
+#define PMC_8_9(X, A)
+#endif // CORE_NCTRS > 8
+
+#define PMC_ALL(X, A) PMC_0_7(X, A); PMC_8_9(X, A)
#define CTR_MAX ((UINT64_C(1) << 47) - 1)
PMCR0_INTGEN_HALT = 3,
PMCR0_INTGEN_FIQ = 4,
};
-#define PMCR0_INTGEN_SET(INT) ((uint64_t)(INT) << 8)
+#define PMCR0_INTGEN_SET(X) ((uint64_t)(X) << 8)
#if CPMU_AIC_PMI
#define PMCR0_INTGEN_INIT PMCR0_INTGEN_SET(PMCR0_INTGEN_AIC)
#define PMCR0_INTGEN_INIT PMCR0_INTGEN_SET(PMCR0_INTGEN_FIQ)
#endif /* !CPMU_AIC_PMI */
-#define PMCR0_PMI_EN(CTR) (UINT64_C(1) << (12 + CTR_POS(CTR)))
+#define PMCR0_PMI_SHIFT (12)
+#define PMCR0_CTR_GE8_PMI_SHIFT (44)
+#define PMCR0_PMI_EN(CTR) (UINT64_C(1) << (PMCR0_PMI_SHIFT + CTR_POS(CTR)))
/* fixed counters are always counting */
#define PMCR0_PMI_INIT (PMCR0_PMI_EN(CYCLES) | PMCR0_PMI_EN(INSTRS))
/* disable counting on a PMI */
#define PMCR0_L2CGLOBAL_EN (UINT64_C(1) << 23)
/* user mode access to configuration registers */
#define PMCR0_USEREN_EN (UINT64_C(1) << 30)
+#define PMCR0_CTR_GE8_EN_SHIFT (32)
-#define PMCR0_INIT (PMCR0_INTGEN_INIT | PMCR0_PMI_INIT | PMCR0_DISCNT_EN)
+#define PMCR0_INIT (PMCR0_INTGEN_INIT | PMCR0_PMI_INIT)
/*
* PMCR1 controls which execution modes count events.
#define PMSR_OVF(CTR) (1ULL << (CTR))
+#define PMESR0 "S3_1_c15_c5_0"
+#define PMESR1 "S3_1_c15_c6_0"
+
static int
core_init(__unused mt_device_t dev)
{
mt_core_snap(unsigned int ctr)
{
switch (ctr) {
- case 0:
- return __builtin_arm_rsr64(PMC0);
- case 1:
- return __builtin_arm_rsr64(PMC1);
+#define PMC_RD(CTR, UNUSED) case (CTR): return __builtin_arm_rsr64(PMC ## CTR)
+ PMC_ALL(PMC_RD, 0);
+#undef PMC_RD
default:
panic("monotonic: invalid core counter read: %u", ctr);
__builtin_unreachable();
{
uint64_t pmcr0 = __builtin_arm_rsr64(PMCR0);
pmcr0 |= PMCR0_INIT | PMCR0_FIXED_EN;
- pmcr0 &= ~PMCR0_PMAI;
+
+ if (kpc_get_running() & KPC_CLASS_CONFIGURABLE_MASK) {
+ uint64_t kpc_ctrs = kpc_get_configurable_pmc_mask(
+ KPC_CLASS_CONFIGURABLE_MASK) << MT_CORE_NFIXED;
+#if KPC_ARM64_CONFIGURABLE_COUNT > 6
+ uint64_t ctrs_ge8 = kpc_ctrs >> 8;
+ pmcr0 |= ctrs_ge8 << PMCR0_CTR_GE8_EN_SHIFT;
+ pmcr0 |= ctrs_ge8 << PMCR0_CTR_GE8_PMI_SHIFT;
+ kpc_ctrs &= (1ULL << 8) - 1;
+#endif /* KPC_ARM64_CONFIGURABLE_COUNT > 6 */
+ kpc_ctrs |= kpc_ctrs << PMCR0_PMI_SHIFT;
+ pmcr0 |= kpc_ctrs;
+ }
+
__builtin_arm_wsr64(PMCR0, pmcr0);
#if MACH_ASSERT
/*
* Only check for the values that were ORed in.
*/
uint64_t pmcr0_check = __builtin_arm_rsr64(PMCR0);
- if (!(pmcr0_check & (PMCR0_INIT | PMCR0_FIXED_EN))) {
- panic("monotonic: hardware ignored enable (read %llx)",
- pmcr0_check);
+ if ((pmcr0_check & (PMCR0_INIT | PMCR0_FIXED_EN)) != (PMCR0_INIT | PMCR0_FIXED_EN)) {
+ panic("monotonic: hardware ignored enable (read %llx, wrote %llx)",
+ pmcr0_check, pmcr0);
}
#endif /* MACH_ASSERT */
}
assert(cpu != NULL);
assert(ml_get_interrupts_enabled() == FALSE);
+ __builtin_arm_wsr64(PMCR0, PMCR0_INIT);
+ /*
+ * Ensure the CPMU has flushed any increments at this point, so PMSR is up
+ * to date.
+ */
+ __builtin_arm_isb(ISB_SY);
+
cpu->cpu_monotonic.mtc_npmis += 1;
cpu->cpu_stat.pmi_cnt_wake += 1;
uint64_t pmsr = __builtin_arm_rsr64(PMSR);
#if MONOTONIC_DEBUG
- kprintf("monotonic: cpu = %d, PMSR = 0x%llx, PMCR0 = 0x%llx",
+ printf("monotonic: cpu = %d, PMSR = 0x%llx, PMCR0 = 0x%llx\n",
cpu_number(), pmsr, pmcr0);
#endif /* MONOTONIC_DEBUG */
+#if MACH_ASSERT
+ uint64_t handled = 0;
+#endif /* MACH_ASSERT */
+
/*
* monotonic handles any fixed counter PMIs.
*/
continue;
}
+#if MACH_ASSERT
+ handled |= 1ULL << i;
+#endif /* MACH_ASSERT */
uint64_t count = mt_cpu_update_count(cpu, i);
cpu->cpu_monotonic.mtc_counts[i] += count;
mt_core_set_snap(i, mt_core_reset_values[i]);
KDBG_RELEASE(KDBG_EVENTID(DBG_MONOTONIC, DBG_MT_DEBUG, 1),
mt_microstackshot_ctr, user_mode);
mt_microstackshot_pmi_handler(user_mode, mt_microstackshot_ctx);
+ } else if (mt_debug) {
+ KDBG_RELEASE(KDBG_EVENTID(DBG_MONOTONIC, DBG_MT_DEBUG, 2),
+ i, count);
}
}
*/
for (unsigned int i = MT_CORE_NFIXED; i < CORE_NCTRS; i++) {
if (pmsr & PMSR_OVF(i)) {
+#if MACH_ASSERT
+ handled |= 1ULL << i;
+#endif /* MACH_ASSERT */
extern void kpc_pmi_handler(unsigned int ctr);
kpc_pmi_handler(i);
}
}
#if MACH_ASSERT
- pmsr = __builtin_arm_rsr64(PMSR);
- assert(pmsr == 0);
+ uint64_t pmsr_after_handling = __builtin_arm_rsr64(PMSR);
+ if (pmsr_after_handling != 0) {
+ unsigned int first_ctr_ovf = __builtin_ffsll(pmsr_after_handling) - 1;
+ uint64_t count = 0;
+ const char *extra = "";
+ if (first_ctr_ovf >= CORE_NCTRS) {
+ extra = " (invalid counter)";
+ } else {
+ count = mt_core_snap(first_ctr_ovf);
+ }
+
+ panic("monotonic: PMI status not cleared on exit from handler, "
+ "PMSR = 0x%llx HANDLE -> -> 0x%llx, handled 0x%llx, "
+ "PMCR0 = 0x%llx, PMC%d = 0x%llx%s", pmsr, pmsr_after_handling,
+ handled, __builtin_arm_rsr64(PMCR0), first_ctr_ovf, count, extra);
+ }
#endif /* MACH_ASSERT */
core_set_enabled();
}
+/*
+ * Routine: machine_thread_template_init
+ *
+ */
+void
+machine_thread_template_init(thread_t __unused thr_template)
+{
+ /* Nothing to do on this platform. */
+}
/*
* Routine: get_useraddr
#define XPRR_KERN0_RW_PERM (6ULL)
#define XPRR_USER_RW_PERM (7ULL)
#define XPRR_PPL_RX_PERM (8ULL)
-#define XPRR_PPL_RO_PERM (9ULL)
+#define XPRR_USER_XO_PERM (9ULL)
#define XPRR_KERN_RX_PERM (10ULL)
#define XPRR_KERN_RO_PERM (11ULL)
#define XPRR_KERN0_RX_PERM (12ULL)
#define APRR_USER_RW_INDEX (7ULL) /* AP_RWRW, PXN, XN */
#define APRR_PPL_RX_INDEX (8ULL) /* AP_RONA, PX, X */
#define APRR_KERN_RX_INDEX (9ULL) /* AP_RONA, PX, XN */
-#define APRR_PPL_RO_INDEX (10ULL) /* AP_RONA, PXN, X */
+#define APRR_USER_XO_INDEX (10ULL) /* AP_RONA, PXN, X */
#define APRR_KERN_RO_INDEX (11ULL) /* AP_RONA, PXN, XN */
#define APRR_KERN0_RX_INDEX (12ULL) /* AP_RORO, PX, X */
#define APRR_KERN0_RO_INDEX (13ULL) /* AP_RORO, PX, XN */
#define APRR_USER_RW_SHIFT (28ULL) /* AP_RWRW, PXN, XN */
#define APRR_PPL_RX_SHIFT (32ULL) /* AP_RONA, PX, X */
#define APRR_KERN_RX_SHIFT (36ULL) /* AP_RONA, PX, XN */
-#define APRR_PPL_RO_SHIFT (40ULL) /* AP_RONA, PXN, X */
+#define APRR_USER_XO_SHIFT (40ULL) /* AP_RONA, PXN, X */
#define APRR_KERN_RO_SHIFT (44ULL) /* AP_RONA, PXN, XN */
#define APRR_KERN0_RX_SHIFT (48ULL) /* AP_RORO, PX, X */
#define APRR_KERN0_RO_SHIFT (52ULL) /* AP_RORO, PX, XN */
#define APRR_EL1_RESET \
APRR_EL1_UNRESTRICTED
+/*
+ * XO mappings bypass PAN protection (rdar://58360875)
+ * Revoke ALL kernel access permissions for XO mappings.
+ */
#define APRR_EL1_BASE \
- APRR_EL1_UNRESTRICTED
+ (APRR_EL1_UNRESTRICTED & \
+ APRR_REMOVE(APRR_ATTR_R << APRR_USER_XO_SHIFT))
#if XNU_MONITOR
#define APRR_EL1_DEFAULT \
(APRR_EL1_BASE & \
(APRR_REMOVE((APRR_ATTR_WX << APRR_PPL_RW_SHIFT) | \
- (APRR_ATTR_WX << APRR_PPL_RO_SHIFT) | \
+ (APRR_ATTR_WX << APRR_USER_XO_SHIFT) | \
(APRR_ATTR_WX << APRR_PPL_RX_SHIFT))))
#define APRR_EL1_PPL \
(APRR_EL1_BASE & \
(APRR_REMOVE((APRR_ATTR_X << APRR_PPL_RW_SHIFT) | \
- (APRR_ATTR_WX << APRR_PPL_RO_SHIFT) | \
+ (APRR_ATTR_WX << APRR_USER_XO_SHIFT) | \
(APRR_ATTR_W << APRR_PPL_RX_SHIFT))))
#else
#define APRR_EL1_DEFAULT \
(APRR_EL0_UNRESTRICTED & \
(APRR_REMOVE((APRR_ATTR_RWX << APRR_PPL_RW_SHIFT) | \
(APRR_ATTR_RWX << APRR_PPL_RX_SHIFT) | \
- (APRR_ATTR_RWX << APRR_PPL_RO_SHIFT))))
+ (APRR_ATTR_RWX << APRR_USER_XO_SHIFT))))
#else
#define APRR_EL0_BASE \
APRR_EL0_UNRESTRICTED
b.mi $2 // Unsigned "strictly less than"
.endmacro
+/*
+ * Macro intended to be used as a replacement for ERET.
+ * It prevents speculation past ERET instructions by padding
+ * up to the decoder width.
+ */
+.macro ERET_CONTEXT_SYNCHRONIZING
+eret
+#if __ARM_SB_AVAILABLE__
+sb // Technically unnecessary on Apple micro-architectures, may restrict mis-speculation on other architectures
+#else /* __ARM_SB_AVAILABLE__ */
+isb // ISB technically unnecessary on Apple micro-architectures, may restrict mis-speculation on other architectures
+nop // Sequence of six NOPs to pad out and terminate instruction decode group */
+nop
+nop
+nop
+nop
+nop
+#endif /* !__ARM_SB_AVAILABLE__ */
+.endmacro
+
#endif /* __ASSEMBLER__ */
#define MSR(reg, src) __asm__ volatile ("msr " reg ", %0" :: "r" (src))
/* spin until bootstrap core has completed machine lockdown */
adrp x17, EXT(lockdown_done)@page
1:
- ldr x18, [x17, EXT(lockdown_done)@pageoff]
- cbz x18, 1b
+ ldr w18, [x17, EXT(lockdown_done)@pageoff]
+ cbz w18, 1b
// load stashed rorgn_begin
adrp x17, EXT(rorgn_begin)@page
/* spin until bootstrap core has completed machine lockdown */
adrp x17, EXT(lockdown_done)@page
1:
- ldr x18, [x17, EXT(lockdown_done)@pageoff]
- cbz x18, 1b
+ ldr w18, [x17, EXT(lockdown_done)@pageoff]
+ cbz w18, 1b
// load stashed rorgn_begin
adrp x17, EXT(rorgn_begin)@page
panic("Bogus bank type: %d passed in get_value\n", bank_element->be_type);
}
- /* Change the persona-id to holder task's persona-id if the task is not spawned in system persona */
+ /* Do not replace persona id if the task is not spawned in system persona */
if (unique_persona &&
bank_merchant->bt_persona_id != persona_get_id(system_persona) &&
- bank_merchant->bt_persona_id != persona_get_id(proxy_system_persona)) {
- persona_id = bank_merchant->bt_persona_id;
+ bank_merchant->bt_persona_id != persona_get_id(proxy_system_persona) &&
+ bank_merchant->bt_persona_id != persona_id) {
+ return KERN_INVALID_ARGUMENT;
}
if (bank_holder->bt_persona_id == persona_id) {
osfmk/kern/remote_time.c standard
osfmk/kern/memset_s.c standard
osfmk/kern/copyout_shim.c optional copyout_shim
+osfmk/kern/suid_cred.c standard
./mach/clock_server.c standard
./mach/clock_priv_server.c standard
#include <mach/mach_types.h>
#include <mach/message.h>
#include <mach/port.h>
+#ifdef MACH_KERNEL_PRIVATE
+#include <mach_debug/mach_debug_types.h>
+#endif
#if PRIVATE
#define IOKIT_SERVER_VERSION 20190926
typedef io_object_t io_connect_t;
typedef io_object_t uext_object_t;
+extern void iokit_add_reference( io_object_t obj, natural_t type );
extern void iokit_remove_reference( io_object_t obj );
extern void iokit_remove_connect_reference( io_object_t obj );
+extern void iokit_port_object_description(io_object_t obj, kobject_description_t desc);
extern io_object_t iokit_lookup_object_port( ipc_port_t port );
extern io_connect_t iokit_lookup_connect_port( ipc_port_t port );
iokit_lock_port(port);
if (ip_active(port) && (ip_kotype(port) == type)) {
- obj = (io_object_t) port->ip_kobject;
+ obj = (io_object_t) ip_get_kobject(port);
iokit_add_reference( obj, type );
} else {
obj = NULL;
iokit_lock_port(port);
if (ip_kotype(port) == type) {
- obj = (io_object_t) port->ip_kobject;
+ obj = (io_object_t) ip_get_kobject(port);
iokit_add_reference(obj, type);
}
iokit_unlock_port(port);
if (type == IKOT_IOKIT_CONNECT) {
options |= IPC_KOBJECT_ALLOC_IMMOVABLE_SEND;
}
- return ipc_kobject_alloc_port((ipc_kobject_t) obj, type, options);
+ if (type == IKOT_UEXT_OBJECT) {
+ ipc_label_t label = IPC_LABEL_DEXT;
+ return ipc_kobject_alloc_labeled_port((ipc_kobject_t) obj, type, label, options);
+ } else {
+ return ipc_kobject_alloc_port((ipc_kobject_t) obj, type, options);
+ }
}
EXTERN kern_return_t
if (IP_VALID(port)) {
iokit_lock_port(port);
if (ip_active(port)) {
- obj = (io_object_t) port->ip_kobject;
+ obj = (io_object_t) ip_get_kobject(port);
type = ip_kotype( port );
if ((IKOT_IOKIT_OBJECT == type)
|| (IKOT_IOKIT_CONNECT == type)
}
}
+kern_return_t
+iokit_label_dext_task(task_t task)
+{
+ return ipc_space_add_label(task->itk_space, IPC_LABEL_DEXT);
+}
+
/* need to create a pmap function to generalize */
unsigned int
IODefaultCacheBits(addr64_t pa)
/*
- * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <libkern/crc.h>
#if DEBUG || DEVELOPMENT
-#define DPRINTF(x...) kprintf(x)
+#define DPRINTF(x ...) kprintf(x)
#else
-#define DPRINTF(x...)
+#define DPRINTF(x ...)
#endif
#ifndef ROUNDUP
}
gPEEFIRuntimeServices = runtime;
- }while (FALSE);
+ } while (FALSE);
}
static void
DPRINTF(" ResetSystem : 0x%x\n", runtime->ResetSystem);
gPEEFIRuntimeServices = runtime;
- }while (FALSE);
+ } while (FALSE);
}
kprintf("Initializing EFI runtime services\n");
- do{
+ do {
vm_offset_t vm_size, vm_addr;
vm_map_offset_t phys_addr;
EfiMemoryRange *mptr;
} else {
efi_set_tables_32((EFI_SYSTEM_TABLE_32 *) ml_static_ptovirt(args->efiSystemTable));
}
- }while (FALSE);
+ } while (FALSE);
return;
}
kprintf("Reinitializing EFI runtime services\n");
- do{
+ do {
vm_offset_t vm_size, vm_addr;
vm_map_offset_t phys_addr;
EfiMemoryRange *mptr;
} else {
efi_set_tables_32((EFI_SYSTEM_TABLE_32 *) ml_static_ptovirt(args->efiSystemTable));
}
- }while (FALSE);
+ } while (FALSE);
kprintf("Done reinitializing EFI runtime services\n");
/* Special handling of launchd died panics */
print_launchd_info();
} else {
- panic_i386_backtrace(stackptr, ((panic_double_fault_cpu == cn) ? 80: 48), debugger_msg, FALSE, NULL);
+ panic_i386_backtrace(stackptr, ((panic_double_fault_cpu == cn) ? 80 : 48), debugger_msg, FALSE, NULL);
}
if (panic_options & DEBUGGER_OPTION_COPROC_INITIATED_PANIC) {
int cn = cpu_number();
boolean_t old_doprnt_hide_pointers = doprnt_hide_pointers;
+#if DEVELOPMENT || DEBUG
+ /* Turn off I/O tracing now that we're panicking */
+ mmiotrace_enabled = 0;
+#endif
+
if (pbtcpu != cn) {
os_atomic_inc(&pbtcnt, relaxed);
/* Spin on print backtrace lock, which serializes output
struct cpu_data *cpu_this; /* pointer to myself */
thread_t cpu_active_thread;
thread_t cpu_nthread;
- volatile int cpu_preemption_level;
int cpu_number; /* Logical CPU */
void *cpu_int_state; /* interrupt state */
vm_offset_t cpu_active_stack; /* kernel stack base */
vm_offset_t cpu_kernel_stack; /* kernel stack top */
vm_offset_t cpu_int_stack_top;
- int cpu_interrupt_level;
volatile int cpu_signals; /* IPI events */
volatile int cpu_prior_signals; /* Last set of events,
* debugging
*/
ast_t cpu_pending_ast;
+ /*
+ * Note if rearranging fields:
+ * We want cpu_preemption_level on a different
+ * cache line than cpu_active_thread
+ * for optimizing mtx_spin phase.
+ */
+ int cpu_interrupt_level;
+ volatile int cpu_preemption_level;
volatile int cpu_running;
#if !MONOTONIC
boolean_t cpu_fixed_pmcs_enabled;
break;
case CPUID_MODEL_SKYLAKE:
case CPUID_MODEL_SKYLAKE_DT:
-#if !defined(RC_HIDE_XNU_J137)
case CPUID_MODEL_SKYLAKE_W:
-#endif
cpufamily = CPUFAMILY_INTEL_SKYLAKE;
break;
case CPUID_MODEL_KABYLAKE:
#define CPUID_MODEL_SKYLAKE_ULT 0x4E
#define CPUID_MODEL_SKYLAKE_ULX 0x4E
#define CPUID_MODEL_SKYLAKE_DT 0x5E
-#if !defined(RC_HIDE_XNU_J137)
#define CPUID_MODEL_SKYLAKE_W 0x55
#define PLATID_XEON_SP_1 0x00
#define PLATID_XEON_SP_2 0x07
#define PLATID_MAYBE_XEON_SP 0x01
-#endif /* not RC_HIDE_XNU_J137 */
#define CPUID_MODEL_KABYLAKE 0x8E
#define CPUID_MODEL_KABYLAKE_ULT 0x8E
#define CPUID_MODEL_KABYLAKE_ULX 0x8E
/*
- * Copyright (c) 2000-2018 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
__asm__ __volatile__ ("fxsave64 %0" : "=m" (*a));
}
-#if !defined(RC_HIDE_XNU_J137)
#define IS_VALID_XSTATE(x) ((x) == FP || (x) == AVX || (x) == AVX512)
-#else
-#define IS_VALID_XSTATE(x) ((x) == FP || (x) == AVX)
-#endif
zone_t ifps_zone[] = {
[FP] = NULL,
[AVX] = NULL,
-#if !defined(RC_HIDE_XNU_J137)
[AVX512] = NULL
-#endif
};
static uint32_t fp_state_size[] = {
[FP] = sizeof(struct x86_fx_thread_state),
[AVX] = sizeof(struct x86_avx_thread_state),
-#if !defined(RC_HIDE_XNU_J137)
[AVX512] = sizeof(struct x86_avx512_thread_state)
-#endif
};
static const char *xstate_name[] = {
[UNDEFINED] = "UNDEFINED",
[FP] = "FP",
[AVX] = "AVX",
-#if !defined(RC_HIDE_XNU_J137)
[AVX512] = "AVX512"
-#endif
};
-#if !defined(RC_HIDE_XNU_J137)
#define fpu_ZMM_capable (fpu_capability == AVX512)
#define fpu_YMM_capable (fpu_capability == AVX || fpu_capability == AVX512)
/*
* Note the initial state value is an AVX512 object but that the AVX initial
* value is a subset of it.
*/
-#else
-#define fpu_YMM_capable (fpu_capability == AVX)
-#endif
static uint32_t cpuid_reevaluated = 0;
static void fpu_store_registers(void *, boolean_t);
static void fpu_load_registers(void *);
-#if !defined(RC_HIDE_XNU_J137)
static const uint32_t xstate_xmask[] = {
[FP] = FP_XMASK,
[AVX] = AVX_XMASK,
[AVX512] = AVX512_XMASK
};
-#else
-static const uint32_t xstate_xmask[] = {
- [FP] = FP_XMASK,
- [AVX] = AVX_XMASK,
-};
-#endif
static inline void
xsave(struct x86_fx_thread_state *a, uint32_t rfbm)
__asm__ __volatile__ ("xrstor64 %0" :: "m" (*a), "a"(rfbm), "d"(0));
}
-#if !defined(RC_HIDE_XNU_J137)
__unused static inline void
vzeroupper(void)
{
}
#endif /* DEBUG_AVX512 */
-#endif
-
#if DEBUG
static inline unsigned short
fnstsw(void)
/* Clear vector register store */
bzero(&fps->fx.fx_XMM_reg[0][0], sizeof(fps->fx.fx_XMM_reg));
bzero(fps->avx.x_YMM_Hi128, sizeof(fps->avx.x_YMM_Hi128));
-#if !defined(RC_HIDE_XNU_J137)
if (fpu_ZMM_capable) {
bzero(fps->avx512.x_ZMM_Hi256, sizeof(fps->avx512.x_ZMM_Hi256));
bzero(fps->avx512.x_Hi16_ZMM, sizeof(fps->avx512.x_Hi16_ZMM));
bzero(fps->avx512.x_Opmask, sizeof(fps->avx512.x_Opmask));
}
-#endif
fps->fx.fp_valid = TRUE;
fps->fx.fp_save_layout = fpu_YMM_capable ? XSAVE32: FXSAVE32;
PE_parse_boot_argn("fpsimd_fault_popc", &fpsimd_fault_popc, sizeof(fpsimd_fault_popc));
-#if !defined(RC_HIDE_XNU_J137)
static boolean_t is_avx512_enabled = TRUE;
if (cpu_number() == master_cpu) {
if (cpuid_leaf7_features() & CPUID_LEAF7_FEATURE_AVX512F) {
is_avx512_enabled ? "and enabled" : "but disabled");
}
}
-#endif
/* Configure the XSAVE context mechanism if the processor supports
* AVX/YMM registers
*/
if (cpuid_features() & CPUID_FEATURE_XSAVE) {
cpuid_xsave_leaf_t *xs0p = &cpuid_info()->cpuid_xsave_leaf[0];
-#if !defined(RC_HIDE_XNU_J137)
if (is_avx512_enabled &&
(xs0p->extended_state[eax] & XFEM_ZMM) == XFEM_ZMM) {
assert(xs0p->extended_state[eax] & XFEM_SSE);
*/
xsetbv(0, AVX_XMASK);
fpu_default = AVX;
- } else
-#endif
- if (xs0p->extended_state[eax] & XFEM_YMM) {
+ } else if (xs0p->extended_state[eax] & XFEM_YMM) {
assert(xs0p->extended_state[eax] & XFEM_SSE);
fpu_capability = AVX;
fpu_default = AVX;
}
break;
case AVX:
-#if !defined(RC_HIDE_XNU_J137)
case AVX512:
-#endif
if (is64) {
xsave64(ifps, xstate_xmask[xs]);
ifps->fp_save_layout = XSAVE64;
*/
zone_change(ifps_zone[fpu_default], Z_ALIGNMENT_REQUIRED, TRUE);
-#if !defined(RC_HIDE_XNU_J137)
/*
* If AVX512 is supported, create a separate savearea zone.
* with allocation size: 19 pages = 32 * 2668
"x86 avx512 save state");
zone_change(ifps_zone[AVX512], Z_ALIGNMENT_REQUIRED, TRUE);
}
-#endif
/* Determine MXCSR reserved bits and configure initial FPU state*/
configure_mxcsr_capability_mask(&initial_fp_state);
x86_float_state64_t *state;
pcb_t pcb;
boolean_t old_valid, fresh_state = FALSE;
+ xstate_t thr_xstate;
if (fpu_capability == UNDEFINED) {
return KERN_FAILURE;
return KERN_FAILURE;
}
-#if !defined(RC_HIDE_XNU_J137)
+ assert(thr_act != THREAD_NULL);
+
+ thr_xstate = thread_xstate(thr_act);
+
if ((f == x86_AVX512_STATE32 || f == x86_AVX512_STATE64) &&
- thread_xstate(thr_act) == AVX) {
+ thr_xstate == AVX) {
if (!fpu_thread_promote_avx512(thr_act)) {
return KERN_FAILURE;
+ } else {
+ /* Reload thr_xstate after successful promotion */
+ thr_xstate = thread_xstate(thr_act);
}
}
-#endif
state = (x86_float_state64_t *)tstate;
- assert(thr_act != THREAD_NULL);
pcb = THREAD_TO_PCB(thr_act);
if (state == NULL) {
simple_unlock(&pcb->lock);
if (ifps != 0) {
- fp_state_free(ifps, thread_xstate(thr_act));
+ fp_state_free(ifps, thr_xstate);
}
} else {
/*
if (ifps == 0) {
if (new_ifps == 0) {
simple_unlock(&pcb->lock);
- new_ifps = fp_state_alloc(thread_xstate(thr_act));
+ new_ifps = fp_state_alloc(thr_xstate);
goto Retry;
}
ifps = new_ifps;
new_ifps = 0;
pcb->ifps = ifps;
- pcb->xstate = thread_xstate(thr_act);
+ pcb->xstate = thr_xstate;
fresh_state = TRUE;
}
__nochk_bcopy((char *)&state->fpu_fcw, (char *)ifps, fp_state_size[FP]);
- switch (thread_xstate(thr_act)) {
+ switch (thr_xstate) {
case UNDEFINED_FULL:
case FP_FULL:
case AVX_FULL:
case AVX512_FULL:
- panic("fpu_set_fxstate() INVALID xstate: 0x%x", thread_xstate(thr_act));
+ panic("fpu_set_fxstate() INVALID xstate: 0x%x", thr_xstate);
break;
case UNDEFINED:
}
break;
}
-#if !defined(RC_HIDE_XNU_J137)
case AVX512: {
struct x86_avx512_thread_state *iavx = (void *) ifps;
union {
}
break;
}
-#endif
}
ifps->fp_valid = old_valid;
simple_unlock(&pcb->lock);
if (new_ifps != 0) {
- fp_state_free(new_ifps, thread_xstate(thr_act));
+ fp_state_free(new_ifps, thr_xstate);
}
}
return KERN_SUCCESS;
x86_float_state64_t *state;
kern_return_t ret = KERN_FAILURE;
pcb_t pcb;
+ xstate_t thr_xstate = thread_xstate(thr_act);
if (fpu_capability == UNDEFINED) {
return KERN_FAILURE;
return KERN_FAILURE;
}
-#if !defined(RC_HIDE_XNU_J137)
if ((f == x86_AVX512_STATE32 || f == x86_AVX512_STATE64) &&
- thread_xstate(thr_act) != AVX512) {
+ thr_xstate != AVX512) {
return KERN_FAILURE;
}
-#endif
state = (x86_float_state64_t *)tstate;
}
if (ifps->fp_valid) {
__nochk_bcopy((char *)ifps, (char *)&state->fpu_fcw, fp_state_size[FP]);
- switch (thread_xstate(thr_act)) {
+ switch (thr_xstate) {
case UNDEFINED_FULL:
case FP_FULL:
case AVX_FULL:
case AVX512_FULL:
- panic("fpu_get_fxstate() INVALID xstate: 0x%x", thread_xstate(thr_act));
+ panic("fpu_get_fxstate() INVALID xstate: 0x%x", thr_xstate);
break;
case UNDEFINED:
}
break;
}
-#if !defined(RC_HIDE_XNU_J137)
case AVX512: {
struct x86_avx512_thread_state *iavx = (void *) ifps;
union {
}
break;
}
-#endif
}
ret = KERN_SUCCESS;
}
-#if !defined(RC_HIDE_XNU_J137)
/*
* If a thread is using an AVX-sized savearea:
* - allocate a new AVX512-sized area,
* - copy the 256-bit state into the 512-bit area,
* - deallocate the smaller area
+ * ASSUMES: thread is the current thread.
*/
static void
fpu_savearea_promote_avx512(thread_t thread)
struct x86_avx512_thread_state *ifps512 = NULL;
pcb_t pcb = THREAD_TO_PCB(thread);
boolean_t do_avx512_alloc = FALSE;
+ boolean_t intr;
- DBG("fpu_upgrade_savearea(%p)\n", thread);
+ assert(thread == current_thread());
+
+ DBG("fpu_savearea_promote_avx512(%p)\n", thread);
simple_lock(&pcb->lock, LCK_GRP_NULL);
if (ifps == NULL) {
pcb->xstate = AVX512;
simple_unlock(&pcb->lock);
- if (thread != current_thread()) {
- /* nothing to be done */
+ /*
+ * Now that the PCB xstate has been promoted, set XCR0 so
+ * that we don't re-trip #UD on the next AVX-512 instruction.
+ *
+ * Since this branch is taken when the first FP instruction
+ * attempted by this thread is an AVX-512 instruction, we
+ * call fpnoextflt() to allocate an appropriately-sized
+ * AVX-512 save-area, thereby avoiding the overhead of another
+ * fault that would be triggered immediately on return.
+ */
+ intr = ml_set_interrupts_enabled(FALSE);
+ xsetbv(0, AVX512_XMASK);
+ current_cpu_datap()->cpu_xstate = AVX512;
+ (void)ml_set_interrupts_enabled(intr);
- return;
- }
fpnoextflt();
return;
}
if (pcb->xstate != AVX512) {
do_avx512_alloc = TRUE;
}
+
simple_unlock(&pcb->lock);
if (do_avx512_alloc == TRUE) {
}
simple_lock(&pcb->lock, LCK_GRP_NULL);
- if (thread == current_thread()) {
- boolean_t intr;
- intr = ml_set_interrupts_enabled(FALSE);
+ intr = ml_set_interrupts_enabled(FALSE);
- clear_ts();
- fp_save(thread);
- clear_fpu();
+ clear_ts();
+ fp_save(thread);
+ clear_fpu();
+
+ xsetbv(0, AVX512_XMASK);
+ current_cpu_datap()->cpu_xstate = AVX512;
+ (void)ml_set_interrupts_enabled(intr);
- xsetbv(0, AVX512_XMASK);
- current_cpu_datap()->cpu_xstate = AVX512;
- (void)ml_set_interrupts_enabled(intr);
- }
assert(ifps->fp.fp_valid);
/* Allocate an AVX512 savearea and copy AVX state into it */
* If the user is attempting an AVX512 instruction on a machine
* that supports this, we switch the calling thread to use
* a larger savearea, set its XCR0 bit mask to enable AVX512 and
- * return directly via thread_exception_return().
- * Otherwise simply return.
+ * return to user_trap() with a 0 return value.
+ * Otherwise, simply return a nonzero value.
*/
+
#define MAX_X86_INSN_LENGTH (15)
int
fpUDflt(user_addr_t rip)
return 0;
}
-#endif /* !defined(RC_HIDE_XNU_J137) */
void
fp_setvalid(boolean_t value)
return fpu_capability >= AVX;
}
-#if !defined(RC_HIDE_XNU_J137)
boolean_t
ml_fpu_avx512_enabled(void)
{
return fpu_capability == AVX512;
}
-#endif
static xstate_t
task_xstate(task_t task)
#endif /* MONOTONIC */
processor_bootstrap();
- thread_bootstrap();
+ thread_t thread = thread_bootstrap();
+ machine_set_current_thread(thread);
pstate_trace();
kernel_debug_string_early("machine_startup");
/* Adaptive spin before blocking */
extern uint64_t MutexSpin;
+extern uint64_t low_MutexSpin;
+extern int64_t high_MutexSpin;
typedef enum lck_mtx_spinwait_ret_type {
LCK_MTX_SPINWAIT_ACQUIRED = 0,
- LCK_MTX_SPINWAIT_SPUN = 1,
- LCK_MTX_SPINWAIT_NO_SPIN = 2,
+
+ LCK_MTX_SPINWAIT_SPUN_HIGH_THR = 1,
+ LCK_MTX_SPINWAIT_SPUN_OWNER_NOT_CORE = 2,
+ LCK_MTX_SPINWAIT_SPUN_NO_WINDOW_CONTENTION = 3,
+ LCK_MTX_SPINWAIT_SPUN_SLIDING_THR = 4,
+
+ LCK_MTX_SPINWAIT_NO_SPIN = 5,
} lck_mtx_spinwait_ret_type_t;
extern lck_mtx_spinwait_ret_type_t lck_mtx_lock_spinwait_x86(lck_mtx_t *mutex);
#include <machine/atomic.h>
#include <sys/kdebug.h>
#include <i386/locks_i386_inlines.h>
+#include <kern/cpu_number.h>
+#include <os/hash.h>
-#if CONFIG_DTRACE
-#define DTRACE_RW_SHARED 0x0 //reader
-#define DTRACE_RW_EXCL 0x1 //writer
-#define DTRACE_NO_FLAG 0x0 //not applicable
+#if CONFIG_DTRACE
+#define DTRACE_RW_SHARED 0x0 //reader
+#define DTRACE_RW_EXCL 0x1 //writer
+#define DTRACE_NO_FLAG 0x0 //not applicable
#endif /* CONFIG_DTRACE */
-#define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
-#define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
-#define LCK_RW_LCK_SHARED_CODE 0x102
-#define LCK_RW_LCK_SH_TO_EX_CODE 0x103
-#define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
-#define LCK_RW_LCK_EX_TO_SH_CODE 0x105
+#define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
+#define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
+#define LCK_RW_LCK_SHARED_CODE 0x102
+#define LCK_RW_LCK_SH_TO_EX_CODE 0x103
+#define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
+#define LCK_RW_LCK_EX_TO_SH_CODE 0x105
-#define LCK_RW_LCK_EX_WRITER_SPIN_CODE 0x106
-#define LCK_RW_LCK_EX_WRITER_WAIT_CODE 0x107
-#define LCK_RW_LCK_EX_READER_SPIN_CODE 0x108
-#define LCK_RW_LCK_EX_READER_WAIT_CODE 0x109
-#define LCK_RW_LCK_SHARED_SPIN_CODE 0x110
-#define LCK_RW_LCK_SHARED_WAIT_CODE 0x111
-#define LCK_RW_LCK_SH_TO_EX_SPIN_CODE 0x112
-#define LCK_RW_LCK_SH_TO_EX_WAIT_CODE 0x113
+#define LCK_RW_LCK_EX_WRITER_SPIN_CODE 0x106
+#define LCK_RW_LCK_EX_WRITER_WAIT_CODE 0x107
+#define LCK_RW_LCK_EX_READER_SPIN_CODE 0x108
+#define LCK_RW_LCK_EX_READER_WAIT_CODE 0x109
+#define LCK_RW_LCK_SHARED_SPIN_CODE 0x110
+#define LCK_RW_LCK_SHARED_WAIT_CODE 0x111
+#define LCK_RW_LCK_SH_TO_EX_SPIN_CODE 0x112
+#define LCK_RW_LCK_SH_TO_EX_WAIT_CODE 0x113
-#define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
+#define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
-unsigned int LcksOpts=0;
+unsigned int LcksOpts = 0;
#if DEVELOPMENT || DEBUG
unsigned int LckDisablePreemptCheck = 0;
/* Forwards */
-#if USLOCK_DEBUG
+#if USLOCK_DEBUG
/*
* Perform simple lock checks.
*/
-int uslock_check = 1;
-int max_lock_loops = 100000000;
-decl_simple_lock_data(extern , printf_lock);
-decl_simple_lock_data(extern , panic_lock);
-#endif /* USLOCK_DEBUG */
+int uslock_check = 1;
+int max_lock_loops = 100000000;
+decl_simple_lock_data(extern, printf_lock);
+decl_simple_lock_data(extern, panic_lock);
+#endif /* USLOCK_DEBUG */
extern unsigned int not_in_kdp;
* of the various lock routines. However, this information
* is only used for debugging and statistics.
*/
-typedef void *pc_t;
-#define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS)
-#define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS)
-#if ANY_LOCK_DEBUG
-#define OBTAIN_PC(pc) ((pc) = GET_RETURN_PC())
-#define DECL_PC(pc) pc_t pc;
-#else /* ANY_LOCK_DEBUG */
+typedef void *pc_t;
+#define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS)
+#define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS)
+#if ANY_LOCK_DEBUG
+#define OBTAIN_PC(pc) ((pc) = GET_RETURN_PC())
+#define DECL_PC(pc) pc_t pc;
+#else /* ANY_LOCK_DEBUG */
#define DECL_PC(pc)
-#ifdef lint
+#ifdef lint
/*
* Eliminate lint complaints about unused local pc variables.
*/
-#define OBTAIN_PC(pc) ++pc
-#else /* lint */
-#define OBTAIN_PC(pc)
-#endif /* lint */
-#endif /* USLOCK_DEBUG */
+#define OBTAIN_PC(pc) ++pc
+#else /* lint */
+#define OBTAIN_PC(pc)
+#endif /* lint */
+#endif /* USLOCK_DEBUG */
/*
* atomic exchange API is a low level abstraction of the operations
static uint32_t
atomic_exchange_begin32(uint32_t *target, uint32_t *previous, enum memory_order ord)
{
- uint32_t val;
+ uint32_t val;
- (void)ord; // Memory order not used
+ (void)ord; // Memory order not used
val = os_atomic_load(target, relaxed);
*previous = val;
return val;
}
static void
-atomic_exchange_abort(void) { }
+atomic_exchange_abort(void)
+{
+}
static boolean_t
atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
{
- uint32_t value, prev;
+ uint32_t value, prev;
- for ( ; ; ) {
+ for (;;) {
value = atomic_exchange_begin32(target, &prev, ord);
if (value & test_mask) {
- if (wait)
+ if (wait) {
cpu_pause();
- else
+ } else {
atomic_exchange_abort();
+ }
return FALSE;
}
value |= set_mask;
- if (atomic_exchange_complete32(target, prev, value, ord))
+ if (atomic_exchange_complete32(target, prev, value, ord)) {
return TRUE;
+ }
}
}
* Portable lock package implementation of usimple_locks.
*/
-#if USLOCK_DEBUG
-#define USLDBG(stmt) stmt
-void usld_lock_init(usimple_lock_t, unsigned short);
-void usld_lock_pre(usimple_lock_t, pc_t);
-void usld_lock_post(usimple_lock_t, pc_t);
-void usld_unlock(usimple_lock_t, pc_t);
-void usld_lock_try_pre(usimple_lock_t, pc_t);
-void usld_lock_try_post(usimple_lock_t, pc_t);
-int usld_lock_common_checks(usimple_lock_t, char *);
-#else /* USLOCK_DEBUG */
-#define USLDBG(stmt)
-#endif /* USLOCK_DEBUG */
+#if USLOCK_DEBUG
+#define USLDBG(stmt) stmt
+void usld_lock_init(usimple_lock_t, unsigned short);
+void usld_lock_pre(usimple_lock_t, pc_t);
+void usld_lock_post(usimple_lock_t, pc_t);
+void usld_unlock(usimple_lock_t, pc_t);
+void usld_lock_try_pre(usimple_lock_t, pc_t);
+void usld_lock_try_post(usimple_lock_t, pc_t);
+int usld_lock_common_checks(usimple_lock_t, char *);
+#else /* USLOCK_DEBUG */
+#define USLDBG(stmt)
+#endif /* USLOCK_DEBUG */
/*
* Forward definitions
static void lck_mtx_unlock_wakeup_tail(lck_mtx_t *mutex, uint32_t state, boolean_t indirect);
static void lck_mtx_interlock_lock(lck_mtx_t *mutex, uint32_t *new_state);
static void lck_mtx_interlock_lock_clear_flags(lck_mtx_t *mutex, uint32_t and_flags, uint32_t *new_state);
-static int lck_mtx_interlock_try_lock(lck_mtx_t *mutex, uint32_t *new_state);
static int lck_mtx_interlock_try_lock_set_flags(lck_mtx_t *mutex, uint32_t or_flags, uint32_t *new_state);
static boolean_t lck_mtx_lock_wait_interlock_to_clear(lck_mtx_t *lock, uint32_t *new_state);
static boolean_t lck_mtx_try_lock_wait_interlock_to_clear(lck_mtx_t *lock, uint32_t *new_state);
*/
lck_spin_t *
lck_spin_alloc_init(
- lck_grp_t *grp,
- lck_attr_t *attr)
+ lck_grp_t *grp,
+ lck_attr_t *attr)
{
- lck_spin_t *lck;
+ lck_spin_t *lck;
- if ((lck = (lck_spin_t *)kalloc(sizeof(lck_spin_t))) != 0)
+ if ((lck = (lck_spin_t *)kalloc(sizeof(lck_spin_t))) != 0) {
lck_spin_init(lck, grp, attr);
+ }
- return(lck);
+ return lck;
}
/*
*/
void
lck_spin_free(
- lck_spin_t *lck,
- lck_grp_t *grp)
+ lck_spin_t *lck,
+ lck_grp_t *grp)
{
lck_spin_destroy(lck, grp);
kfree(lck, sizeof(lck_spin_t));
*/
void
lck_spin_init(
- lck_spin_t *lck,
- lck_grp_t *grp,
- __unused lck_attr_t *attr)
+ lck_spin_t *lck,
+ lck_grp_t *grp,
+ __unused lck_attr_t *attr)
{
usimple_lock_init((usimple_lock_t) lck, 0);
if (grp) {
*/
void
lck_spin_destroy(
- lck_spin_t *lck,
- lck_grp_t *grp)
+ lck_spin_t *lck,
+ lck_grp_t *grp)
{
- if (lck->interlock == LCK_SPIN_TAG_DESTROYED)
+ if (lck->interlock == LCK_SPIN_TAG_DESTROYED) {
return;
+ }
lck->interlock = LCK_SPIN_TAG_DESTROYED;
if (grp) {
lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN);
*/
void
lck_spin_lock_grp(
- lck_spin_t *lck,
- lck_grp_t *grp)
+ lck_spin_t *lck,
+ lck_grp_t *grp)
{
#pragma unused(grp)
usimple_lock((usimple_lock_t) lck, grp);
void
lck_spin_lock(
- lck_spin_t *lck)
+ lck_spin_t *lck)
{
usimple_lock((usimple_lock_t) lck, NULL);
}
*/
void
lck_spin_unlock(
- lck_spin_t *lck)
+ lck_spin_t *lck)
{
usimple_unlock((usimple_lock_t) lck);
}
boolean_t
lck_spin_try_lock_grp(
- lck_spin_t *lck,
- lck_grp_t *grp)
+ lck_spin_t *lck,
+ lck_grp_t *grp)
{
#pragma unused(grp)
boolean_t lrval = (boolean_t)usimple_lock_try((usimple_lock_t) lck, grp);
-#if DEVELOPMENT || DEBUG
+#if DEVELOPMENT || DEBUG
if (lrval) {
pltrace(FALSE);
}
#endif
- return(lrval);
+ return lrval;
}
*/
boolean_t
lck_spin_try_lock(
- lck_spin_t *lck)
+ lck_spin_t *lck)
{
boolean_t lrval = (boolean_t)usimple_lock_try((usimple_lock_t) lck, LCK_GRP_NULL);
-#if DEVELOPMENT || DEBUG
+#if DEVELOPMENT || DEBUG
if (lrval) {
pltrace(FALSE);
}
#endif
- return(lrval);
+ return lrval;
}
/*
* Returns: TRUE if lock is acquired.
*/
boolean_t
-kdp_lck_spin_is_acquired(lck_spin_t *lck) {
+kdp_lck_spin_is_acquired(lck_spin_t *lck)
+{
if (not_in_kdp) {
panic("panic: spinlock acquired check done outside of kernel debugger");
}
*/
void
usimple_lock_init(
- usimple_lock_t l,
- __unused unsigned short tag)
+ usimple_lock_t l,
+ __unused unsigned short tag)
{
-#ifndef MACHINE_SIMPLE_LOCK
+#ifndef MACHINE_SIMPLE_LOCK
USLDBG(usld_lock_init(l, tag));
hw_lock_init(&l->interlock);
#else
- simple_lock_init((simple_lock_t)l,tag);
+ simple_lock_init((simple_lock_t)l, tag);
#endif
}
volatile uint32_t spinlock_owner_cpu = ~0;
volatile usimple_lock_t spinlock_timed_out;
-uint32_t spinlock_timeout_NMI(uintptr_t thread_addr) {
+uint32_t
+spinlock_timeout_NMI(uintptr_t thread_addr)
+{
uint32_t i;
for (i = 0; i < real_ncpus; i++) {
*/
void
(usimple_lock)(
- usimple_lock_t l
+ usimple_lock_t l
LCK_GRP_ARG(lck_grp_t *grp))
{
-#ifndef MACHINE_SIMPLE_LOCK
+#ifndef MACHINE_SIMPLE_LOCK
DECL_PC(pc);
OBTAIN_PC(pc);
USLDBG(usld_lock_pre(l, pc));
- if(__improbable(hw_lock_to(&l->interlock, LockTimeOutTSC, grp) == 0)) {
+ if (__improbable(hw_lock_to(&l->interlock, LockTimeOutTSC, grp) == 0)) {
boolean_t uslock_acquired = FALSE;
while (machine_timeout_suspended()) {
enable_preemption();
- if ((uslock_acquired = hw_lock_to(&l->interlock, LockTimeOutTSC, grp)))
+ if ((uslock_acquired = hw_lock_to(&l->interlock, LockTimeOutTSC, grp))) {
break;
+ }
}
if (uslock_acquired == FALSE) {
spinlock_timed_out = l;
lock_cpu = spinlock_timeout_NMI(lowner);
panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p, lock owner active on CPU 0x%x, current owner: 0x%lx, time: %llu",
- l, lowner, current_thread(), lock_cpu, (uintptr_t)l->interlock.lock_data, mach_absolute_time());
+ l, lowner, current_thread(), lock_cpu, (uintptr_t)l->interlock.lock_data, mach_absolute_time());
}
}
#if DEVELOPMENT || DEBUG
- pltrace(FALSE);
+ pltrace(FALSE);
#endif
USLDBG(usld_lock_post(l, pc));
*/
void
usimple_unlock(
- usimple_lock_t l)
+ usimple_lock_t l)
{
-#ifndef MACHINE_SIMPLE_LOCK
+#ifndef MACHINE_SIMPLE_LOCK
DECL_PC(pc);
OBTAIN_PC(pc);
USLDBG(usld_unlock(l, pc));
#if DEVELOPMENT || DEBUG
- pltrace(TRUE);
+ pltrace(TRUE);
#endif
hw_lock_unlock(&l->interlock);
#else
*/
unsigned int
usimple_lock_try(
- usimple_lock_t l,
+ usimple_lock_t l,
lck_grp_t *grp)
{
-#ifndef MACHINE_SIMPLE_LOCK
- unsigned int success;
+#ifndef MACHINE_SIMPLE_LOCK
+ unsigned int success;
DECL_PC(pc);
OBTAIN_PC(pc);
#if DEVELOPMENT || DEBUG
pltrace(FALSE);
#endif
- USLDBG(usld_lock_try_post(l, pc));
+ USLDBG(usld_lock_try_post(l, pc));
}
return success;
#else
- return(simple_lock_try((simple_lock_t)l, grp));
+ return simple_lock_try((simple_lock_t)l, grp);
#endif
}
* and spinning on a lock.
*
*/
-unsigned int
+unsigned
+int
(usimple_lock_try_lock_mp_signal_safe_loop_deadline)(usimple_lock_t l,
- uint64_t deadline
- LCK_GRP_ARG(lck_grp_t *grp))
+ uint64_t deadline
+ LCK_GRP_ARG(lck_grp_t *grp))
{
boolean_t istate = ml_get_interrupts_enabled();
}
while (!simple_lock_try(l, grp)) {
- if (!istate)
+ if (!istate) {
cpu_signal_handler(NULL);
-
+ }
+
if (deadline < mach_absolute_time()) {
return 0;
}
void
(usimple_lock_try_lock_loop)(usimple_lock_t l
- LCK_GRP_ARG(lck_grp_t *grp))
+ LCK_GRP_ARG(lck_grp_t *grp))
{
usimple_lock_try_lock_mp_signal_safe_loop_deadline(l, ULLONG_MAX, grp);
}
-unsigned int
+unsigned
+int
(usimple_lock_try_lock_mp_signal_safe_loop_duration)(usimple_lock_t l,
- uint64_t duration
- LCK_GRP_ARG(lck_grp_t *grp))
+ uint64_t duration
+ LCK_GRP_ARG(lck_grp_t *grp))
{
uint64_t deadline;
uint64_t base_at = mach_absolute_time();
return usimple_lock_try_lock_mp_signal_safe_loop_deadline(l, deadline, grp);
}
-#if USLOCK_DEBUG
+#if USLOCK_DEBUG
/*
* States of a usimple_lock. The default when initializing
* a usimple_lock is setting it up for debug checking.
*/
-#define USLOCK_CHECKED 0x0001 /* lock is being checked */
-#define USLOCK_TAKEN 0x0002 /* lock has been taken */
-#define USLOCK_INIT 0xBAA0 /* lock has been initialized */
-#define USLOCK_INITIALIZED (USLOCK_INIT|USLOCK_CHECKED)
-#define USLOCK_CHECKING(l) (uslock_check && \
- ((l)->debug.state & USLOCK_CHECKED))
+#define USLOCK_CHECKED 0x0001 /* lock is being checked */
+#define USLOCK_TAKEN 0x0002 /* lock has been taken */
+#define USLOCK_INIT 0xBAA0 /* lock has been initialized */
+#define USLOCK_INITIALIZED (USLOCK_INIT|USLOCK_CHECKED)
+#define USLOCK_CHECKING(l) (uslock_check && \
+ ((l)->debug.state & USLOCK_CHECKED))
/*
* Initialize the debugging information contained
*/
void
usld_lock_init(
- usimple_lock_t l,
- __unused unsigned short tag)
+ usimple_lock_t l,
+ __unused unsigned short tag)
{
- if (l == USIMPLE_LOCK_NULL)
+ if (l == USIMPLE_LOCK_NULL) {
panic("lock initialization: null lock pointer");
+ }
l->lock_type = USLOCK_TAG;
l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0;
l->debug.lock_cpu = l->debug.unlock_cpu = 0;
*/
int
usld_lock_common_checks(
- usimple_lock_t l,
- char *caller)
+ usimple_lock_t l,
+ char *caller)
{
- if (l == USIMPLE_LOCK_NULL)
+ if (l == USIMPLE_LOCK_NULL) {
panic("%s: null lock pointer", caller);
- if (l->lock_type != USLOCK_TAG)
+ }
+ if (l->lock_type != USLOCK_TAG) {
panic("%s: %p is not a usimple lock, 0x%x", caller, l, l->lock_type);
- if (!(l->debug.state & USLOCK_INIT))
+ }
+ if (!(l->debug.state & USLOCK_INIT)) {
panic("%s: %p is not an initialized lock, 0x%x", caller, l, l->debug.state);
+ }
return USLOCK_CHECKING(l);
}
/* ARGSUSED */
void
usld_lock_pre(
- usimple_lock_t l,
- pc_t pc)
+ usimple_lock_t l,
+ pc_t pc)
{
- char caller[] = "usimple_lock";
+ char caller[] = "usimple_lock";
- if (!usld_lock_common_checks(l, caller))
+ if (!usld_lock_common_checks(l, caller)) {
return;
+ }
/*
* Note that we have a weird case where we are getting a lock when we are]
if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread &&
l->debug.lock_thread == (void *) current_thread()) {
printf("%s: lock %p already locked (at %p) by",
- caller, l, l->debug.lock_pc);
+ caller, l, l->debug.lock_pc);
printf(" current thread %p (new attempt at pc %p)\n",
- l->debug.lock_thread, pc);
+ l->debug.lock_thread, pc);
panic("%s", caller);
}
mp_disable_preemption();
*/
void
usld_lock_post(
- usimple_lock_t l,
- pc_t pc)
+ usimple_lock_t l,
+ pc_t pc)
{
- int mycpu;
- char caller[] = "successful usimple_lock";
+ int mycpu;
+ char caller[] = "successful usimple_lock";
- if (!usld_lock_common_checks(l, caller))
+ if (!usld_lock_common_checks(l, caller)) {
return;
+ }
- if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
+ if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) {
panic("%s: lock %p became uninitialized",
- caller, l);
- if ((l->debug.state & USLOCK_TAKEN))
+ caller, l);
+ }
+ if ((l->debug.state & USLOCK_TAKEN)) {
panic("%s: lock 0x%p became TAKEN by someone else",
- caller, l);
+ caller, l);
+ }
mycpu = cpu_number();
l->debug.lock_thread = (void *)current_thread();
*/
void
usld_unlock(
- usimple_lock_t l,
- pc_t pc)
+ usimple_lock_t l,
+ pc_t pc)
{
- int mycpu;
- char caller[] = "usimple_unlock";
+ int mycpu;
+ char caller[] = "usimple_unlock";
- if (!usld_lock_common_checks(l, caller))
+ if (!usld_lock_common_checks(l, caller)) {
return;
+ }
mycpu = cpu_number();
- if (!(l->debug.state & USLOCK_TAKEN))
+ if (!(l->debug.state & USLOCK_TAKEN)) {
panic("%s: lock 0x%p hasn't been taken",
- caller, l);
- if (l->debug.lock_thread != (void *) current_thread())
+ caller, l);
+ }
+ if (l->debug.lock_thread != (void *) current_thread()) {
panic("%s: unlocking lock 0x%p, owned by thread %p",
- caller, l, l->debug.lock_thread);
+ caller, l, l->debug.lock_thread);
+ }
if (l->debug.lock_cpu != mycpu) {
printf("%s: unlocking lock 0x%p on cpu 0x%x",
- caller, l, mycpu);
+ caller, l, mycpu);
printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu);
panic("%s", caller);
}
*/
void
usld_lock_try_pre(
- usimple_lock_t l,
- __unused pc_t pc)
+ usimple_lock_t l,
+ __unused pc_t pc)
{
- char caller[] = "usimple_lock_try";
+ char caller[] = "usimple_lock_try";
- if (!usld_lock_common_checks(l, caller))
+ if (!usld_lock_common_checks(l, caller)) {
return;
+ }
}
*/
void
usld_lock_try_post(
- usimple_lock_t l,
- pc_t pc)
+ usimple_lock_t l,
+ pc_t pc)
{
- int mycpu;
- char caller[] = "successful usimple_lock_try";
+ int mycpu;
+ char caller[] = "successful usimple_lock_try";
- if (!usld_lock_common_checks(l, caller))
+ if (!usld_lock_common_checks(l, caller)) {
return;
+ }
- if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
+ if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) {
panic("%s: lock 0x%p became uninitialized",
- caller, l);
- if ((l->debug.state & USLOCK_TAKEN))
+ caller, l);
+ }
+ if ((l->debug.state & USLOCK_TAKEN)) {
panic("%s: lock 0x%p became TAKEN by someone else",
- caller, l);
+ caller, l);
+ }
mycpu = cpu_number();
l->debug.lock_thread = (void *) current_thread();
l->debug.lock_pc = pc;
l->debug.lock_cpu = mycpu;
}
-#endif /* USLOCK_DEBUG */
+#endif /* USLOCK_DEBUG */
/*
* Routine: lck_rw_alloc_init
*/
lck_rw_t *
lck_rw_alloc_init(
- lck_grp_t *grp,
- lck_attr_t *attr) {
- lck_rw_t *lck;
+ lck_grp_t *grp,
+ lck_attr_t *attr)
+{
+ lck_rw_t *lck;
if ((lck = (lck_rw_t *)kalloc(sizeof(lck_rw_t))) != 0) {
bzero(lck, sizeof(lck_rw_t));
lck_rw_init(lck, grp, attr);
}
- return(lck);
+ return lck;
}
/*
*/
void
lck_rw_free(
- lck_rw_t *lck,
- lck_grp_t *grp) {
+ lck_rw_t *lck,
+ lck_grp_t *grp)
+{
lck_rw_destroy(lck, grp);
kfree(lck, sizeof(lck_rw_t));
}
*/
void
lck_rw_init(
- lck_rw_t *lck,
- lck_grp_t *grp,
- lck_attr_t *attr)
+ lck_rw_t *lck,
+ lck_grp_t *grp,
+ lck_attr_t *attr)
{
- lck_attr_t *lck_attr = (attr != LCK_ATTR_NULL) ?
- attr : &LockDefaultLckAttr;
+ lck_attr_t *lck_attr = (attr != LCK_ATTR_NULL) ?
+ attr : &LockDefaultLckAttr;
hw_lock_byte_init(&lck->lck_rw_interlock);
lck->lck_rw_want_write = FALSE;
lck->lck_r_waiting = lck->lck_w_waiting = 0;
lck->lck_rw_tag = 0;
lck->lck_rw_priv_excl = ((lck_attr->lck_attr_val &
- LCK_ATTR_RW_SHARED_PRIORITY) == 0);
+ LCK_ATTR_RW_SHARED_PRIORITY) == 0);
lck_grp_reference(grp);
lck_grp_lckcnt_incr(grp, LCK_TYPE_RW);
*/
void
lck_rw_destroy(
- lck_rw_t *lck,
- lck_grp_t *grp)
+ lck_rw_t *lck,
+ lck_grp_t *grp)
{
- if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED)
+ if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED) {
return;
+ }
#if MACH_LDEBUG
lck_rw_assert(lck, LCK_RW_ASSERT_NOTHELD);
#endif
static inline boolean_t
lck_interlock_lock(lck_rw_t *lck)
{
- boolean_t istate;
+ boolean_t istate;
istate = ml_set_interrupts_enabled(FALSE);
hw_lock_byte_lock(&lck->lck_rw_interlock);
static inline void
lck_rw_lock_pause(boolean_t interrupts_enabled)
{
- if (!interrupts_enabled)
+ if (!interrupts_enabled) {
handle_pending_TLB_flushes();
+ }
cpu_pause();
}
static inline boolean_t
lck_rw_held_read_or_upgrade(lck_rw_t *lock)
{
- if (ordered_load(&lock->data) & (LCK_RW_SHARED_MASK | LCK_RW_INTERLOCK | LCK_RW_WANT_UPGRADE))
+ if (ordered_load(&lock->data) & (LCK_RW_SHARED_MASK | LCK_RW_INTERLOCK | LCK_RW_WANT_UPGRADE)) {
return TRUE;
+ }
return FALSE;
}
if (lck->lck_r_waiting || lck->lck_w_waiting || lck->lck_rw_shared_count > machine_info.max_cpus) {
/*
* there are already threads waiting on this lock... this
- * implies that they have spun beyond their deadlines waiting for
+ * implies that they have spun beyond their deadlines waiting for
* the desired state to show up so we will not bother spinning at this time...
* or
* the current number of threads sharing this lock exceeds our capacity to run them
* to be at 0, we'll not bother spinning since the latency for this to happen is
* unpredictable...
*/
- return (mach_absolute_time());
+ return mach_absolute_time();
}
- return (mach_absolute_time() + MutexSpin);
- } else
- return (mach_absolute_time() + (100000LL * 1000000000LL));
+ return mach_absolute_time() + MutexSpin;
+ } else {
+ return mach_absolute_time() + (100000LL * 1000000000LL);
+ }
}
static boolean_t
lck_rw_grab_want(lck_rw_t *lock)
{
- uint32_t data, prev;
+ uint32_t data, prev;
- for ( ; ; ) {
+ for (;;) {
data = atomic_exchange_begin32(&lock->data, &prev, memory_order_relaxed);
- if ((data & LCK_RW_INTERLOCK) == 0)
+ if ((data & LCK_RW_INTERLOCK) == 0) {
break;
+ }
atomic_exchange_abort();
lck_rw_interlock_spin(lock);
}
static boolean_t
lck_rw_grab_shared(lck_rw_t *lock)
{
- uint32_t data, prev;
+ uint32_t data, prev;
- for ( ; ; ) {
+ for (;;) {
data = atomic_exchange_begin32(&lock->data, &prev, memory_order_acquire_smp);
- if ((data & LCK_RW_INTERLOCK) == 0)
+ if ((data & LCK_RW_INTERLOCK) == 0) {
break;
+ }
atomic_exchange_abort();
lck_rw_interlock_spin(lock);
}
*/
static void
lck_rw_lock_exclusive_gen(
- lck_rw_t *lck)
+ lck_rw_t *lck)
{
- __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
- uint64_t deadline = 0;
- int slept = 0;
- int gotlock = 0;
- int lockheld = 0;
- wait_result_t res = 0;
- boolean_t istate = -1;
+ __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
+ uint64_t deadline = 0;
+ int slept = 0;
+ int gotlock = 0;
+ int lockheld = 0;
+ wait_result_t res = 0;
+ boolean_t istate = -1;
-#if CONFIG_DTRACE
+#if CONFIG_DTRACE
boolean_t dtrace_ls_initialized = FALSE;
- boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled= FALSE;
+ boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled = FALSE;
uint64_t wait_interval = 0;
int readers_at_sleep = 0;
#endif
/*
* Try to acquire the lck_rw_want_write bit.
*/
- while ( !lck_rw_grab_want(lck)) {
-
-#if CONFIG_DTRACE
+ while (!lck_rw_grab_want(lck)) {
+#if CONFIG_DTRACE
if (dtrace_ls_initialized == FALSE) {
dtrace_ls_initialized = TRUE;
dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
}
}
#endif
- if (istate == -1)
+ if (istate == -1) {
istate = ml_get_interrupts_enabled();
+ }
deadline = lck_rw_deadline_for_spin(lck);
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
- while (((gotlock = lck_rw_grab_want(lck)) == 0) && mach_absolute_time() < deadline)
+ while (((gotlock = lck_rw_grab_want(lck)) == 0) && mach_absolute_time() < deadline) {
lck_rw_lock_pause(istate);
+ }
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, gotlock, 0);
- if (gotlock)
+ if (gotlock) {
break;
+ }
/*
* if we get here, the deadline has expired w/o us
* being able to grab the lock exclusively
* check to see if we're allowed to do a thread_block
*/
if (lck->lck_rw_can_sleep) {
-
istate = lck_interlock_lock(lck);
if (lck->lck_rw_want_write) {
-
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
lck->lck_w_waiting = TRUE;
thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
res = assert_wait(RW_LOCK_WRITER_EVENT(lck),
- THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
+ THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
lck_interlock_unlock(lck, istate);
if (res == THREAD_WAITING) {
* and the interlock not held, we are safe to proceed
*/
while (lck_rw_held_read_or_upgrade(lck)) {
-
-#if CONFIG_DTRACE
+#if CONFIG_DTRACE
/*
* Either sleeping or spinning is happening, start
* a timing of our delay interval now. If we set it
}
}
#endif
- if (istate == -1)
+ if (istate == -1) {
istate = ml_get_interrupts_enabled();
+ }
deadline = lck_rw_deadline_for_spin(lck);
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
- while ((lockheld = lck_rw_held_read_or_upgrade(lck)) && mach_absolute_time() < deadline)
+ while ((lockheld = lck_rw_held_read_or_upgrade(lck)) && mach_absolute_time() < deadline) {
lck_rw_lock_pause(istate);
+ }
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, lockheld, 0);
- if ( !lockheld)
+ if (!lockheld) {
break;
+ }
/*
* if we get here, the deadline has expired w/o us
* being able to grab the lock exclusively
* check to see if we're allowed to do a thread_block
*/
if (lck->lck_rw_can_sleep) {
-
istate = lck_interlock_lock(lck);
if (lck->lck_rw_shared_count != 0 || lck->lck_rw_want_upgrade) {
thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
res = assert_wait(RW_LOCK_WRITER_EVENT(lck),
- THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
+ THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
lck_interlock_unlock(lck, istate);
if (res == THREAD_WAITING) {
}
}
-#if CONFIG_DTRACE
+#if CONFIG_DTRACE
/*
* Decide what latencies we suffered that are Dtrace events.
* If we have set wait_interval, then we either spun or slept.
* Routine: lck_rw_done
*/
-lck_rw_type_t lck_rw_done(lck_rw_t *lock)
+lck_rw_type_t
+lck_rw_done(lck_rw_t *lock)
{
- uint32_t data, prev;
+ uint32_t data, prev;
- for ( ; ; ) {
+ for (;;) {
data = atomic_exchange_begin32(&lock->data, &prev, memory_order_release_smp);
- if (data & LCK_RW_INTERLOCK) { /* wait for interlock to clear */
+ if (data & LCK_RW_INTERLOCK) { /* wait for interlock to clear */
atomic_exchange_abort();
lck_rw_interlock_spin(lock);
continue;
}
if (data & LCK_RW_SHARED_MASK) {
data -= LCK_RW_SHARED_READER;
- if ((data & LCK_RW_SHARED_MASK) == 0) /* if reader count has now gone to 0, check for waiters */
+ if ((data & LCK_RW_SHARED_MASK) == 0) { /* if reader count has now gone to 0, check for waiters */
goto check_waiters;
- } else { /* if reader count == 0, must be exclusive lock */
+ }
+ } else { /* if reader count == 0, must be exclusive lock */
if (data & LCK_RW_WANT_UPGRADE) {
data &= ~(LCK_RW_WANT_UPGRADE);
} else {
- if (data & LCK_RW_WANT_WRITE)
+ if (data & LCK_RW_WANT_WRITE) {
data &= ~(LCK_RW_WANT_EXCL);
- else /* lock is not 'owned', panic */
+ } else { /* lock is not 'owned', panic */
panic("Releasing non-exclusive RW lock without a reader refcount!");
+ }
}
check_waiters:
if (prev & LCK_RW_W_WAITING) {
data &= ~(LCK_RW_W_WAITING);
- if ((prev & LCK_RW_PRIV_EXCL) == 0)
+ if ((prev & LCK_RW_PRIV_EXCL) == 0) {
data &= ~(LCK_RW_R_WAITING);
- } else
+ }
+ } else {
data &= ~(LCK_RW_R_WAITING);
+ }
}
- if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_release_smp))
+ if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_release_smp)) {
break;
+ }
cpu_pause();
}
return lck_rw_done_gen(lock, prev);
*
* called from lck_rw_done()
* prior_lock_state is the value in the 1st
- * word of the lock at the time of a successful
+ * word of the lock at the time of a successful
* atomic compare and exchange with the new value...
- * it represents the state of the lock before we
+ * it represents the state of the lock before we
* decremented the rw_shared_count or cleared either
- * rw_want_upgrade or rw_want_write and
+ * rw_want_upgrade or rw_want_write and
* the lck_x_waiting bits... since the wrapper
- * routine has already changed the state atomically,
+ * routine has already changed the state atomically,
* we just need to decide if we should
* wake up anyone and what value to return... we do
* this by examining the state of the lock before
*/
void
lck_rw_unlock(
- lck_rw_t *lck,
- lck_rw_type_t lck_rw_type)
+ lck_rw_t *lck,
+ lck_rw_type_t lck_rw_type)
{
- if (lck_rw_type == LCK_RW_TYPE_SHARED)
+ if (lck_rw_type == LCK_RW_TYPE_SHARED) {
lck_rw_unlock_shared(lck);
- else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
+ } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
lck_rw_unlock_exclusive(lck);
- else
+ } else {
panic("lck_rw_unlock(): Invalid RW lock type: %d\n", lck_rw_type);
+ }
}
*/
void
lck_rw_unlock_shared(
- lck_rw_t *lck)
+ lck_rw_t *lck)
{
- lck_rw_type_t ret;
+ lck_rw_type_t ret;
assertf(lck->lck_rw_shared_count > 0, "lck %p has shared_count=0x%x", lck, lck->lck_rw_shared_count);
ret = lck_rw_done(lck);
- if (ret != LCK_RW_TYPE_SHARED)
+ if (ret != LCK_RW_TYPE_SHARED) {
panic("lck_rw_unlock_shared(): lock %p held in mode: %d\n", lck, ret);
+ }
}
*/
void
lck_rw_unlock_exclusive(
- lck_rw_t *lck)
+ lck_rw_t *lck)
{
- lck_rw_type_t ret;
+ lck_rw_type_t ret;
ret = lck_rw_done(lck);
- if (ret != LCK_RW_TYPE_EXCLUSIVE)
+ if (ret != LCK_RW_TYPE_EXCLUSIVE) {
panic("lck_rw_unlock_exclusive(): lock held in mode: %d\n", ret);
+ }
}
*/
void
lck_rw_lock(
- lck_rw_t *lck,
- lck_rw_type_t lck_rw_type)
+ lck_rw_t *lck,
+ lck_rw_type_t lck_rw_type)
{
- if (lck_rw_type == LCK_RW_TYPE_SHARED)
+ if (lck_rw_type == LCK_RW_TYPE_SHARED) {
lck_rw_lock_shared(lck);
- else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
+ } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
lck_rw_lock_exclusive(lck);
- else
+ } else {
panic("lck_rw_lock(): Invalid RW lock type: %x\n", lck_rw_type);
+ }
}
/*
void
lck_rw_lock_shared(lck_rw_t *lock)
{
- uint32_t data, prev;
+ uint32_t data, prev;
current_thread()->rwlock_count++;
- for ( ; ; ) {
+ for (;;) {
data = atomic_exchange_begin32(&lock->data, &prev, memory_order_acquire_smp);
if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK)) {
atomic_exchange_abort();
break;
}
data += LCK_RW_SHARED_READER;
- if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp))
+ if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp)) {
break;
+ }
cpu_pause();
}
-#if CONFIG_DTRACE
+#if CONFIG_DTRACE
LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
-#endif /* CONFIG_DTRACE */
+#endif /* CONFIG_DTRACE */
return;
}
*/
static void
lck_rw_lock_shared_gen(
- lck_rw_t *lck)
+ lck_rw_t *lck)
{
- __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
- uint64_t deadline = 0;
- int gotlock = 0;
- int slept = 0;
- wait_result_t res = 0;
- boolean_t istate = -1;
+ __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
+ uint64_t deadline = 0;
+ int gotlock = 0;
+ int slept = 0;
+ wait_result_t res = 0;
+ boolean_t istate = -1;
-#if CONFIG_DTRACE
+#if CONFIG_DTRACE
uint64_t wait_interval = 0;
int readers_at_sleep = 0;
boolean_t dtrace_ls_initialized = FALSE;
boolean_t dtrace_rwl_shared_spin, dtrace_rwl_shared_block, dtrace_ls_enabled = FALSE;
#endif
- while ( !lck_rw_grab_shared(lck)) {
-
-#if CONFIG_DTRACE
+ while (!lck_rw_grab_shared(lck)) {
+#if CONFIG_DTRACE
if (dtrace_ls_initialized == FALSE) {
dtrace_ls_initialized = TRUE;
dtrace_rwl_shared_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] != 0);
}
}
#endif
- if (istate == -1)
+ if (istate == -1) {
istate = ml_get_interrupts_enabled();
+ }
deadline = lck_rw_deadline_for_spin(lck);
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START,
- trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
+ trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
- while (((gotlock = lck_rw_grab_shared(lck)) == 0) && mach_absolute_time() < deadline)
+ while (((gotlock = lck_rw_grab_shared(lck)) == 0) && mach_absolute_time() < deadline) {
lck_rw_lock_pause(istate);
+ }
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END,
- trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, gotlock, 0);
+ trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, gotlock, 0);
- if (gotlock)
+ if (gotlock) {
break;
+ }
/*
* if we get here, the deadline has expired w/o us
* being able to grab the lock for read
* check to see if we're allowed to do a thread_block
*/
if (lck->lck_rw_can_sleep) {
-
istate = lck_interlock_lock(lck);
if ((lck->lck_rw_want_write || lck->lck_rw_want_upgrade) &&
((lck->lck_rw_shared_count == 0) || lck->lck_rw_priv_excl)) {
-
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START,
- trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
+ trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);
lck->lck_r_waiting = TRUE;
thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockRead);
res = assert_wait(RW_LOCK_READER_EVENT(lck),
- THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
+ THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
lck_interlock_unlock(lck, istate);
if (res == THREAD_WAITING) {
slept++;
}
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END,
- trace_lck, res, slept, 0, 0);
+ trace_lck, res, slept, 0, 0);
} else {
lck->lck_rw_shared_count++;
lck_interlock_unlock(lck, istate);
}
}
-#if CONFIG_DTRACE
+#if CONFIG_DTRACE
if (dtrace_ls_enabled == TRUE) {
if (slept == 0) {
LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
{
current_thread()->rwlock_count++;
if (atomic_test_and_set32(&lock->data,
- (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK),
- LCK_RW_WANT_EXCL, memory_order_acquire_smp, FALSE)) {
-#if CONFIG_DTRACE
+ (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK),
+ LCK_RW_WANT_EXCL, memory_order_acquire_smp, FALSE)) {
+#if CONFIG_DTRACE
LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
-#endif /* CONFIG_DTRACE */
- } else
+#endif /* CONFIG_DTRACE */
+ } else {
lck_rw_lock_exclusive_gen(lock);
+ }
}
boolean_t
lck_rw_lock_shared_to_exclusive(lck_rw_t *lock)
{
- uint32_t data, prev;
+ uint32_t data, prev;
- for ( ; ; ) {
+ for (;;) {
data = atomic_exchange_begin32(&lock->data, &prev, memory_order_acquire_smp);
if (data & LCK_RW_INTERLOCK) {
atomic_exchange_abort();
}
if (data & LCK_RW_WANT_UPGRADE) {
data -= LCK_RW_SHARED_READER;
- if ((data & LCK_RW_SHARED_MASK) == 0) /* we were the last reader */
- data &= ~(LCK_RW_W_WAITING); /* so clear the wait indicator */
- if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp))
+ if ((data & LCK_RW_SHARED_MASK) == 0) { /* we were the last reader */
+ data &= ~(LCK_RW_W_WAITING); /* so clear the wait indicator */
+ }
+ if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp)) {
return lck_rw_lock_shared_to_exclusive_failure(lock, prev);
+ }
} else {
- data |= LCK_RW_WANT_UPGRADE; /* ask for WANT_UPGRADE */
- data -= LCK_RW_SHARED_READER; /* and shed our read count */
- if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp))
+ data |= LCK_RW_WANT_UPGRADE; /* ask for WANT_UPGRADE */
+ data -= LCK_RW_SHARED_READER; /* and shed our read count */
+ if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp)) {
break;
+ }
}
cpu_pause();
}
- /* we now own the WANT_UPGRADE */
- if (data & LCK_RW_SHARED_MASK) /* check to see if all of the readers are drained */
- lck_rw_lock_shared_to_exclusive_success(lock); /* if not, we need to go wait */
-#if CONFIG_DTRACE
+ /* we now own the WANT_UPGRADE */
+ if (data & LCK_RW_SHARED_MASK) { /* check to see if all of the readers are drained */
+ lck_rw_lock_shared_to_exclusive_success(lock); /* if not, we need to go wait */
+ }
+#if CONFIG_DTRACE
LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 0);
#endif
return TRUE;
*/
static boolean_t
lck_rw_lock_shared_to_exclusive_failure(
- lck_rw_t *lck,
- uint32_t prior_lock_state)
+ lck_rw_t *lck,
+ uint32_t prior_lock_state)
{
- lck_rw_t *fake_lck;
- thread_t thread = current_thread();
- uint32_t rwlock_count;
+ lck_rw_t *fake_lck;
+ thread_t thread = current_thread();
+ uint32_t rwlock_count;
/* Check if dropping the lock means that we need to unpromote */
rwlock_count = thread->rwlock_count--;
}
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE,
- VM_KERNEL_UNSLIDE_OR_PERM(lck), lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
+ VM_KERNEL_UNSLIDE_OR_PERM(lck), lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
- return (FALSE);
+ return FALSE;
}
*/
static boolean_t
lck_rw_lock_shared_to_exclusive_success(
- lck_rw_t *lck)
+ lck_rw_t *lck)
{
- __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
- uint64_t deadline = 0;
- int slept = 0;
- int still_shared = 0;
- wait_result_t res;
- boolean_t istate = -1;
+ __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
+ uint64_t deadline = 0;
+ int slept = 0;
+ int still_shared = 0;
+ wait_result_t res;
+ boolean_t istate = -1;
-#if CONFIG_DTRACE
+#if CONFIG_DTRACE
uint64_t wait_interval = 0;
int readers_at_sleep = 0;
boolean_t dtrace_ls_initialized = FALSE;
#endif
while (lck->lck_rw_shared_count != 0) {
-
-#if CONFIG_DTRACE
+#if CONFIG_DTRACE
if (dtrace_ls_initialized == FALSE) {
dtrace_ls_initialized = TRUE;
dtrace_rwl_shared_to_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] != 0);
}
}
#endif
- if (istate == -1)
+ if (istate == -1) {
istate = ml_get_interrupts_enabled();
+ }
deadline = lck_rw_deadline_for_spin(lck);
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START,
- trace_lck, lck->lck_rw_shared_count, 0, 0, 0);
+ trace_lck, lck->lck_rw_shared_count, 0, 0, 0);
- while ((still_shared = lck->lck_rw_shared_count) && mach_absolute_time() < deadline)
+ while ((still_shared = lck->lck_rw_shared_count) && mach_absolute_time() < deadline) {
lck_rw_lock_pause(istate);
+ }
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END,
- trace_lck, lck->lck_rw_shared_count, 0, 0, 0);
+ trace_lck, lck->lck_rw_shared_count, 0, 0, 0);
- if ( !still_shared)
+ if (!still_shared) {
break;
+ }
/*
* if we get here, the deadline has expired w/o
* the rw_shared_count having drained to 0
* check to see if we're allowed to do a thread_block
*/
if (lck->lck_rw_can_sleep) {
-
istate = lck_interlock_lock(lck);
if (lck->lck_rw_shared_count != 0) {
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START,
- trace_lck, lck->lck_rw_shared_count, 0, 0, 0);
+ trace_lck, lck->lck_rw_shared_count, 0, 0, 0);
lck->lck_w_waiting = TRUE;
thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockUpgrade);
res = assert_wait(RW_LOCK_WRITER_EVENT(lck),
- THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
+ THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
lck_interlock_unlock(lck, istate);
if (res == THREAD_WAITING) {
slept++;
}
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END,
- trace_lck, res, slept, 0, 0);
+ trace_lck, res, slept, 0, 0);
} else {
lck_interlock_unlock(lck, istate);
break;
}
}
}
-#if CONFIG_DTRACE
+#if CONFIG_DTRACE
/*
* We infer whether we took the sleep/spin path above by checking readers_at_sleep.
*/
}
LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lck, 1);
#endif
- return (TRUE);
+ return TRUE;
}
/*
* Routine: lck_rw_lock_exclusive_to_shared
*/
-void lck_rw_lock_exclusive_to_shared(lck_rw_t *lock)
+void
+lck_rw_lock_exclusive_to_shared(lck_rw_t *lock)
{
- uint32_t data, prev;
+ uint32_t data, prev;
- for ( ; ; ) {
+ for (;;) {
data = atomic_exchange_begin32(&lock->data, &prev, memory_order_release_smp);
if (data & LCK_RW_INTERLOCK) {
atomic_exchange_abort();
- lck_rw_interlock_spin(lock); /* wait for interlock to clear */
+ lck_rw_interlock_spin(lock); /* wait for interlock to clear */
continue;
}
data += LCK_RW_SHARED_READER;
- if (data & LCK_RW_WANT_UPGRADE)
+ if (data & LCK_RW_WANT_UPGRADE) {
data &= ~(LCK_RW_WANT_UPGRADE);
- else
+ } else {
data &= ~(LCK_RW_WANT_EXCL);
- if (!((prev & LCK_RW_W_WAITING) && (prev & LCK_RW_PRIV_EXCL)))
+ }
+ if (!((prev & LCK_RW_W_WAITING) && (prev & LCK_RW_PRIV_EXCL))) {
data &= ~(LCK_RW_W_WAITING);
- if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_release_smp))
+ }
+ if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_release_smp)) {
break;
+ }
cpu_pause();
}
return lck_rw_lock_exclusive_to_shared_gen(lock, prev);
/*
* Routine: lck_rw_lock_exclusive_to_shared_gen
- * Function:
+ * Function:
* assembly fast path has already dropped
* our exclusive state and bumped lck_rw_shared_count
* all we need to do here is determine if anyone
*/
static void
lck_rw_lock_exclusive_to_shared_gen(
- lck_rw_t *lck,
- uint32_t prior_lock_state)
+ lck_rw_t *lck,
+ uint32_t prior_lock_state)
{
- __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
- lck_rw_t *fake_lck;
+ __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
+ lck_rw_t *fake_lck;
fake_lck = (lck_rw_t *)&prior_lock_state;
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
- trace_lck, fake_lck->lck_rw_want_write, fake_lck->lck_rw_want_upgrade, 0, 0);
+ trace_lck, fake_lck->lck_rw_want_write, fake_lck->lck_rw_want_upgrade, 0, 0);
/*
* don't wake up anyone waiting to take the lock exclusively
* wake up any waiting readers if we don't have any writers waiting,
* or the lock is NOT marked as rw_priv_excl (writers have privilege)
*/
- if (!(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting) && fake_lck->lck_r_waiting)
+ if (!(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting) && fake_lck->lck_r_waiting) {
thread_wakeup(RW_LOCK_READER_EVENT(lck));
+ }
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
- trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
+ trace_lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
#if CONFIG_DTRACE
LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
*/
boolean_t
lck_rw_try_lock(
- lck_rw_t *lck,
- lck_rw_type_t lck_rw_type)
-{
- if (lck_rw_type == LCK_RW_TYPE_SHARED)
- return(lck_rw_try_lock_shared(lck));
- else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
- return(lck_rw_try_lock_exclusive(lck));
- else
+ lck_rw_t *lck,
+ lck_rw_type_t lck_rw_type)
+{
+ if (lck_rw_type == LCK_RW_TYPE_SHARED) {
+ return lck_rw_try_lock_shared(lck);
+ } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
+ return lck_rw_try_lock_exclusive(lck);
+ } else {
panic("lck_rw_try_lock(): Invalid rw lock type: %x\n", lck_rw_type);
- return(FALSE);
+ }
+ return FALSE;
}
/*
* Routine: lck_rw_try_lock_shared
*/
-boolean_t lck_rw_try_lock_shared(lck_rw_t *lock)
+boolean_t
+lck_rw_try_lock_shared(lck_rw_t *lock)
{
- uint32_t data, prev;
+ uint32_t data, prev;
- for ( ; ; ) {
+ for (;;) {
data = atomic_exchange_begin32(&lock->data, &prev, memory_order_acquire_smp);
if (data & LCK_RW_INTERLOCK) {
atomic_exchange_abort();
}
if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
atomic_exchange_abort();
- return FALSE; /* lock is busy */
+ return FALSE; /* lock is busy */
}
- data += LCK_RW_SHARED_READER; /* Increment reader refcount */
- if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp))
+ data += LCK_RW_SHARED_READER; /* Increment reader refcount */
+ if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp)) {
break;
+ }
cpu_pause();
}
current_thread()->rwlock_count++;
/* There is a 3 instr window where preemption may not notice rwlock_count after cmpxchg */
-#if CONFIG_DTRACE
+#if CONFIG_DTRACE
LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
-#endif /* CONFIG_DTRACE */
+#endif /* CONFIG_DTRACE */
return TRUE;
}
* Routine: lck_rw_try_lock_exclusive
*/
-boolean_t lck_rw_try_lock_exclusive(lck_rw_t *lock)
+boolean_t
+lck_rw_try_lock_exclusive(lck_rw_t *lock)
{
- uint32_t data, prev;
+ uint32_t data, prev;
- for ( ; ; ) {
+ for (;;) {
data = atomic_exchange_begin32(&lock->data, &prev, memory_order_acquire_smp);
if (data & LCK_RW_INTERLOCK) {
atomic_exchange_abort();
}
if (data & (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
atomic_exchange_abort();
- return FALSE; /* can't get it */
+ return FALSE; /* can't get it */
}
data |= LCK_RW_WANT_EXCL;
- if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp))
+ if (atomic_exchange_complete32(&lock->data, prev, data, memory_order_acquire_smp)) {
break;
+ }
cpu_pause();
}
current_thread()->rwlock_count++;
-#if CONFIG_DTRACE
+#if CONFIG_DTRACE
LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
-#endif /* CONFIG_DTRACE */
+#endif /* CONFIG_DTRACE */
return TRUE;
}
void
lck_rw_assert(
- lck_rw_t *lck,
- unsigned int type)
+ lck_rw_t *lck,
+ unsigned int type)
{
switch (type) {
case LCK_RW_ASSERT_SHARED:
break;
case LCK_RW_ASSERT_EXCLUSIVE:
if ((lck->lck_rw_want_write ||
- lck->lck_rw_want_upgrade) &&
+ lck->lck_rw_want_upgrade) &&
lck->lck_rw_shared_count == 0) {
return;
}
break;
case LCK_RW_ASSERT_NOTHELD:
if (!(lck->lck_rw_want_write ||
- lck->lck_rw_want_upgrade ||
- lck->lck_rw_shared_count != 0)) {
+ lck->lck_rw_want_upgrade ||
+ lck->lck_rw_shared_count != 0)) {
return;
}
break;
* NOT SAFE: To be used only by kernel debugger to avoid deadlock.
*/
boolean_t
-kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t *lck) {
+kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t *lck)
+{
if (not_in_kdp) {
panic("panic: rw lock exclusive check done outside of kernel debugger");
}
* on acquire.
*/
-#ifdef MUTEX_ZONE
+#ifdef MUTEX_ZONE
extern zone_t lck_mtx_zone;
#endif
*/
lck_mtx_t *
lck_mtx_alloc_init(
- lck_grp_t *grp,
- lck_attr_t *attr)
+ lck_grp_t *grp,
+ lck_attr_t *attr)
{
- lck_mtx_t *lck;
-#ifdef MUTEX_ZONE
- if ((lck = (lck_mtx_t *)zalloc(lck_mtx_zone)) != 0)
+ lck_mtx_t *lck;
+#ifdef MUTEX_ZONE
+ if ((lck = (lck_mtx_t *)zalloc(lck_mtx_zone)) != 0) {
lck_mtx_init(lck, grp, attr);
+ }
#else
- if ((lck = (lck_mtx_t *)kalloc(sizeof(lck_mtx_t))) != 0)
+ if ((lck = (lck_mtx_t *)kalloc(sizeof(lck_mtx_t))) != 0) {
lck_mtx_init(lck, grp, attr);
+ }
#endif
- return(lck);
+ return lck;
}
/*
*/
void
lck_mtx_free(
- lck_mtx_t *lck,
- lck_grp_t *grp)
+ lck_mtx_t *lck,
+ lck_grp_t *grp)
{
lck_mtx_destroy(lck, grp);
-#ifdef MUTEX_ZONE
+#ifdef MUTEX_ZONE
zfree(lck_mtx_zone, lck);
#else
kfree(lck, sizeof(lck_mtx_t));
*/
static void
lck_mtx_ext_init(
- lck_mtx_ext_t *lck,
- lck_grp_t *grp,
- lck_attr_t *attr)
+ lck_mtx_ext_t *lck,
+ lck_grp_t *grp,
+ lck_attr_t *attr)
{
bzero((void *)lck, sizeof(lck_mtx_ext_t));
lck->lck_mtx_grp = grp;
- if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT)
+ if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT) {
lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT;
+ }
lck->lck_mtx.lck_mtx_is_ext = 1;
lck->lck_mtx.lck_mtx_pad32 = 0xFFFFFFFF;
*/
void
lck_mtx_init(
- lck_mtx_t *lck,
- lck_grp_t *grp,
- lck_attr_t *attr)
+ lck_mtx_t *lck,
+ lck_grp_t *grp,
+ lck_attr_t *attr)
{
- lck_mtx_ext_t *lck_ext;
- lck_attr_t *lck_attr;
+ lck_mtx_ext_t *lck_ext;
+ lck_attr_t *lck_attr;
- if (attr != LCK_ATTR_NULL)
+ if (attr != LCK_ATTR_NULL) {
lck_attr = attr;
- else
+ } else {
lck_attr = &LockDefaultLckAttr;
+ }
if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
if ((lck_ext = (lck_mtx_ext_t *)kalloc(sizeof(lck_mtx_ext_t))) != 0) {
*/
void
lck_mtx_init_ext(
- lck_mtx_t *lck,
- lck_mtx_ext_t *lck_ext,
- lck_grp_t *grp,
- lck_attr_t *attr)
+ lck_mtx_t *lck,
+ lck_mtx_ext_t *lck_ext,
+ lck_grp_t *grp,
+ lck_attr_t *attr)
{
- lck_attr_t *lck_attr;
+ lck_attr_t *lck_attr;
- if (attr != LCK_ATTR_NULL)
+ if (attr != LCK_ATTR_NULL) {
lck_attr = attr;
- else
+ } else {
lck_attr = &LockDefaultLckAttr;
+ }
if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
lck_mtx_ext_init(lck_ext, grp, lck_attr);
*/
void
lck_mtx_destroy(
- lck_mtx_t *lck,
- lck_grp_t *grp)
+ lck_mtx_t *lck,
+ lck_grp_t *grp)
{
boolean_t indirect;
- if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED)
+ if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED) {
return;
+ }
#if MACH_LDEBUG
lck_mtx_assert(lck, LCK_MTX_ASSERT_NOTOWNED);
#endif
lck_mtx_lock_mark_destroyed(lck, indirect);
- if (indirect)
+ if (indirect) {
kfree(lck->lck_mtx_ptr, sizeof(lck_mtx_ext_t));
+ }
lck_grp_lckcnt_decr(grp, LCK_TYPE_MTX);
lck_grp_deallocate(grp);
return;
static boolean_t
get_indirect_mutex(
lck_mtx_t **lock,
- uint32_t *state)
+ uint32_t *state)
{
*lock = &((*lock)->lck_mtx_ptr->lck_mtx);
*state = ordered_load_mtx_state(*lock);
}
/*
- * Routine: lck_mtx_unlock_slow
+ * Routine: lck_mtx_unlock_slow
*
* Unlocks a mutex held by current thread.
*
__attribute__((noinline))
void
lck_mtx_unlock_slow(
- lck_mtx_t *lock)
+ lck_mtx_t *lock)
{
- thread_t thread;
- uint32_t state, prev;
- boolean_t indirect = FALSE;
+ thread_t thread;
+ uint32_t state, prev;
+ boolean_t indirect = FALSE;
state = ordered_load_mtx_state(lock);
#if DEVELOPMENT | DEBUG
thread_t owner = (thread_t)lock->lck_mtx_owner;
- if(__improbable(owner != thread))
+ if (__improbable(owner != thread)) {
lck_mtx_owner_check_panic(lock);
+ }
#endif
/* check if it is held as a spinlock */
- if (__improbable((state & LCK_MTX_MLOCKED_MSK) == 0))
+ if (__improbable((state & LCK_MTX_MLOCKED_MSK) == 0)) {
goto unlock;
+ }
lck_mtx_interlock_lock_clear_flags(lock, LCK_MTX_MLOCKED_MSK, &state);
if (__improbable(state & LCK_MTX_WAITERS_MSK)) {
#if MACH_LDEBUG
- if (thread)
+ if (thread) {
thread->mutex_count--;
+ }
#endif
return lck_mtx_unlock_wakeup_tail(lock, state, indirect);
}
/* release interlock, promotion and clear spin flag */
state &= (~(LCK_MTX_ILOCKED_MSK | LCK_MTX_SPIN_MSK));
- ordered_store_mtx_state_release(lock, state); /* since I own the interlock, I don't need an atomic update */
+ ordered_store_mtx_state_release(lock, state); /* since I own the interlock, I don't need an atomic update */
-#if MACH_LDEBUG
+#if MACH_LDEBUG
/* perform lock statistics after drop to prevent delay */
- if (thread)
- thread->mutex_count--; /* lock statistic */
-#endif /* MACH_LDEBUG */
+ if (thread) {
+ thread->mutex_count--; /* lock statistic */
+ }
+#endif /* MACH_LDEBUG */
/* re-enable preemption */
lck_mtx_unlock_finish_inline(lock, FALSE);
return;
}
-#define LCK_MTX_LCK_WAIT_CODE 0x20
-#define LCK_MTX_LCK_WAKEUP_CODE 0x21
-#define LCK_MTX_LCK_SPIN_CODE 0x22
-#define LCK_MTX_LCK_ACQUIRE_CODE 0x23
-#define LCK_MTX_LCK_DEMOTE_CODE 0x24
+#define LCK_MTX_LCK_WAIT_CODE 0x20
+#define LCK_MTX_LCK_WAKEUP_CODE 0x21
+#define LCK_MTX_LCK_SPIN_CODE 0x22
+#define LCK_MTX_LCK_ACQUIRE_CODE 0x23
+#define LCK_MTX_LCK_DEMOTE_CODE 0x24
/*
* Routine: lck_mtx_unlock_wakeup_tail
*/
__attribute__((noinline))
static void
-lck_mtx_unlock_wakeup_tail (
- lck_mtx_t *mutex,
+lck_mtx_unlock_wakeup_tail(
+ lck_mtx_t *mutex,
uint32_t state,
- boolean_t indirect)
+ boolean_t indirect)
{
struct turnstile *ts;
- __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
+ __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
kern_return_t did_wake;
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_START,
- trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
+ trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
turnstile_complete((uintptr_t)mutex, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
state -= LCK_MTX_WAITER;
- state &= (~(LCK_MTX_SPIN_MSK | LCK_MTX_ILOCKED_MSK));
+ state &= (~(LCK_MTX_SPIN_MSK | LCK_MTX_ILOCKED_MSK));
ordered_store_mtx_state_release(mutex, state);
assert(current_thread()->turnstile != NULL);
turnstile_cleanup();
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_END,
- trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
+ trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
lck_mtx_unlock_finish_inline(mutex, indirect);
}
/*
- * Routine: lck_mtx_lock_acquire_x86
+ * Routine: lck_mtx_lock_acquire_x86
*
* Invoked on acquiring the mutex when there is
* contention (i.e. the assembly routine sees that
__attribute__((always_inline))
static void
lck_mtx_lock_acquire_inline(
- lck_mtx_t *mutex,
+ lck_mtx_t *mutex,
struct turnstile *ts)
{
- __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
+ __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_START,
- trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
+ trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
thread_t thread = (thread_t)mutex->lck_mtx_owner; /* faster than current_thread() */
assert(thread->waiting_for_mutex == NULL);
assert(current_thread()->turnstile != NULL);
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_END,
- trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
+ trace_lck, 0, mutex->lck_mtx_waiters, 0, 0);
}
void
lck_mtx_lock_acquire_x86(
- lck_mtx_t *mutex)
+ lck_mtx_t *mutex)
{
return lck_mtx_lock_acquire_inline(mutex, NULL);
}
__attribute__((noinline))
static void
lck_mtx_lock_acquire_tail(
- lck_mtx_t *mutex,
- boolean_t indirect,
+ lck_mtx_t *mutex,
+ boolean_t indirect,
struct turnstile *ts)
{
lck_mtx_lock_acquire_inline(mutex, ts);
__attribute__((noinline))
static boolean_t
lck_mtx_try_lock_acquire_tail(
- lck_mtx_t *mutex)
+ lck_mtx_t *mutex)
{
lck_mtx_lock_acquire_inline(mutex, NULL);
lck_mtx_try_lock_finish_inline(mutex, ordered_load_mtx_state(mutex));
__attribute__((noinline))
static void
lck_mtx_convert_spin_acquire_tail(
- lck_mtx_t *mutex)
+ lck_mtx_t *mutex)
{
lck_mtx_lock_acquire_inline(mutex, NULL);
lck_mtx_convert_spin_finish_inline(mutex, ordered_load_mtx_state(mutex));
uint32_t state, prev;
state = *new_state;
- for ( ; ; ) {
+ for (;;) {
/* have to wait for interlock to clear */
while (__improbable(state & (LCK_MTX_ILOCKED_MSK | xor_flags))) {
cpu_pause();
}
prev = state; /* prev contains snapshot for exchange */
state |= LCK_MTX_ILOCKED_MSK | xor_flags; /* pick up interlock */
- state &= ~and_flags; /* clear flags */
+ state &= ~and_flags; /* clear flags */
disable_preemption();
- if (os_atomic_cmpxchg(&mutex->lck_mtx_state, prev, state, acquire))
+ if (os_atomic_cmpxchg(&mutex->lck_mtx_state, prev, state, acquire)) {
break;
+ }
enable_preemption();
cpu_pause();
state = ordered_load_mtx_state(mutex);
if (state & (LCK_MTX_ILOCKED_MSK | or_flags)) {
return 0;
}
- prev = state; /* prev contains snapshot for exchange */
- state |= LCK_MTX_ILOCKED_MSK | or_flags; /* pick up interlock */
+ prev = state; /* prev contains snapshot for exchange */
+ state |= LCK_MTX_ILOCKED_MSK | or_flags; /* pick up interlock */
disable_preemption();
if (os_atomic_cmpxchg(&mutex->lck_mtx_state, prev, state, acquire)) {
- *new_state = state;
- return 1;
+ *new_state = state;
+ return 1;
}
enable_preemption();
return 0;
}
-static inline int
-lck_mtx_interlock_try_lock(
- lck_mtx_t *mutex,
- uint32_t *new_state)
-{
- return lck_mtx_interlock_try_lock_set_flags(mutex, 0, new_state);
-}
-
-static inline int
-lck_mtx_interlock_try_lock_disable_interrupts(
- lck_mtx_t *mutex,
- boolean_t *istate)
-{
- uint32_t state;
-
- *istate = ml_set_interrupts_enabled(FALSE);
- state = ordered_load_mtx_state(mutex);
-
- if (lck_mtx_interlock_try_lock(mutex, &state)) {
- return 1;
- } else {
- ml_set_interrupts_enabled(*istate);
- return 0;
- }
-}
-
-static inline void
-lck_mtx_interlock_unlock_enable_interrupts(
- lck_mtx_t *mutex,
- boolean_t istate)
-{
- lck_mtx_ilk_unlock(mutex);
- ml_set_interrupts_enabled(istate);
-}
-
__attribute__((noinline))
static void
lck_mtx_lock_contended(
lck_grp_mtx_update_direct_wait((struct _lck_mtx_ext_*)lock);
}
- /* just fall through case LCK_MTX_SPINWAIT_SPUN */
- case LCK_MTX_SPINWAIT_SPUN:
+ /* just fall through case LCK_MTX_SPINWAIT_SPUN */
+ case LCK_MTX_SPINWAIT_SPUN_HIGH_THR:
+ case LCK_MTX_SPINWAIT_SPUN_OWNER_NOT_CORE:
+ case LCK_MTX_SPINWAIT_SPUN_NO_WINDOW_CONTENTION:
+ case LCK_MTX_SPINWAIT_SPUN_SLIDING_THR:
/*
* mutex not acquired but lck_mtx_lock_spinwait_x86 tried to spin
* interlock not held
*/
goto try_again;
} else {
-
/* grab the mutex */
state |= LCK_MTX_MLOCKED_MSK;
ordered_store_mtx_state_release(lock, state);
{
uint32_t state;
- for ( ; ; ) {
+ for (;;) {
cpu_pause();
state = ordered_load_mtx_state(lock);
if (!(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
{
uint32_t state;
- for ( ; ; ) {
+ for (;;) {
cpu_pause();
state = ordered_load_mtx_state(lock);
if (state & (LCK_MTX_MLOCKED_MSK | LCK_MTX_SPIN_MSK)) {
lck_mtx_lock_slow(
lck_mtx_t *lock)
{
- boolean_t indirect = FALSE;
- uint32_t state;
- int first_miss = 0;
+ boolean_t indirect = FALSE;
+ uint32_t state;
+ int first_miss = 0;
state = ordered_load_mtx_state(lock);
/* is the mutex already held and not indirect */
- if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))){
+ if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))) {
/* no, must have been the mutex */
return lck_mtx_lock_contended(lock, indirect, &first_miss);
}
lck_grp_mtx_update_held((struct _lck_mtx_ext_*)lock);
if (state & LCK_MTX_SPIN_MSK) {
- /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
+ /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
assert(state & LCK_MTX_ILOCKED_MSK);
lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
}
#if MACH_LDEBUG
if (thread) {
- thread->mutex_count++; /* lock statistic */
+ thread->mutex_count++; /* lock statistic */
}
#endif
/*
*/
/* is the mutex already held and not indirect */
- if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))){
+ if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))) {
return FALSE;
}
}
if (!lck_mtx_try_lock_wait_interlock_to_clear(lock, &state)) {
- if (indirect)
+ if (indirect) {
lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
+ }
return FALSE;
}
}
/* no - can't be INDIRECT, DESTROYED or locked */
while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_MLOCKED_MSK, &state))) {
if (!lck_mtx_try_lock_wait_interlock_to_clear(lock, &state)) {
- if (indirect)
+ if (indirect) {
lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
+ }
return FALSE;
}
}
#if MACH_LDEBUG
if (thread) {
- thread->mutex_count++; /* lock statistic */
+ thread->mutex_count++; /* lock statistic */
}
#endif
/*
lck_mtx_try_lock_finish_inline(lock, ordered_load_mtx_state(lock));
return TRUE;
-
}
__attribute__((noinline))
void
lck_mtx_lock_spin_slow(
- lck_mtx_t *lock)
+ lck_mtx_t *lock)
{
boolean_t indirect = FALSE;
uint32_t state;
/* is the mutex already held and not indirect */
- if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))){
+ if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))) {
/* no, must have been the mutex */
return lck_mtx_lock_contended(lock, indirect, &first_miss);
}
lck_grp_mtx_update_held((struct _lck_mtx_ext_*)lock);
if (state & LCK_MTX_SPIN_MSK) {
- /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
+ /* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
assert(state & LCK_MTX_ILOCKED_MSK);
lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
}
}
/* no - can't be INDIRECT, DESTROYED or locked */
- while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_SPIN_MSK, &state) )) {
+ while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_SPIN_MSK, &state))) {
if (!lck_mtx_lock_wait_interlock_to_clear(lock, &state)) {
return lck_mtx_lock_contended(lock, indirect, &first_miss);
}
}
#endif
-#if CONFIG_DTRACE
+#if CONFIG_DTRACE
LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE, lock, 0);
#endif
/* return with the interlock held and preemption disabled */
*/
/* is the mutex already held and not indirect */
- if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))){
+ if (__improbable(!(state & LCK_MTX_ILOCKED_MSK))) {
return FALSE;
}
}
if (!lck_mtx_try_lock_wait_interlock_to_clear(lock, &state)) {
- if (indirect)
+ if (indirect) {
lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
+ }
return FALSE;
}
}
/* no - can't be INDIRECT, DESTROYED or locked */
while (__improbable(!lck_mtx_interlock_try_lock_set_flags(lock, LCK_MTX_SPIN_MSK, &state))) {
if (!lck_mtx_try_lock_wait_interlock_to_clear(lock, &state)) {
- if (indirect)
+ if (indirect) {
lck_grp_mtx_update_miss((struct _lck_mtx_ext_*)lock, &first_miss);
+ }
return FALSE;
}
}
#if MACH_LDEBUG
if (thread) {
- thread->mutex_count++; /* lock statistic */
+ thread->mutex_count++; /* lock statistic */
}
#endif
LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE, lock, 0);
#endif
return TRUE;
-
}
__attribute__((noinline))
void
lck_mtx_convert_spin(
- lck_mtx_t *lock)
+ lck_mtx_t *lock)
{
uint32_t state;
static inline boolean_t
lck_mtx_lock_grab_mutex(
- lck_mtx_t *lock)
+ lck_mtx_t *lock)
{
uint32_t state;
#if MACH_LDEBUG
if (thread) {
- thread->mutex_count++; /* lock statistic */
+ thread->mutex_count++; /* lock statistic */
}
#endif
return TRUE;
__attribute__((noinline))
void
lck_mtx_assert(
- lck_mtx_t *lock,
- unsigned int type)
+ lck_mtx_t *lock,
+ unsigned int type)
{
thread_t thread, owner;
uint32_t state;
owner = (thread_t)lock->lck_mtx_owner;
if (type == LCK_MTX_ASSERT_OWNED) {
- if (owner != thread || !(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK)))
+ if (owner != thread || !(state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK))) {
panic("mutex (%p) not owned\n", lock);
+ }
} else {
- assert (type == LCK_MTX_ASSERT_NOTOWNED);
- if (owner == thread)
+ assert(type == LCK_MTX_ASSERT_NOTOWNED);
+ if (owner == thread) {
panic("mutex (%p) owned\n", lock);
+ }
}
}
/*
- * Routine: lck_mtx_lock_spinwait_x86
+ * Routine: lck_mtx_lock_spinwait_x86
*
* Invoked trying to acquire a mutex when there is contention but
* the holder is running on another processor. We spin for up to a maximum
__attribute__((noinline))
lck_mtx_spinwait_ret_type_t
lck_mtx_lock_spinwait_x86(
- lck_mtx_t *mutex)
+ lck_mtx_t *mutex)
{
- __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
- thread_t holder;
- uint64_t overall_deadline;
- uint64_t check_owner_deadline;
- uint64_t cur_time;
- lck_mtx_spinwait_ret_type_t retval = LCK_MTX_SPINWAIT_SPUN;
- int loopcount = 0;
+ __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
+ thread_t owner, prev_owner;
+ uint64_t window_deadline, sliding_deadline, high_deadline;
+ uint64_t start_time, cur_time, avg_hold_time, bias, delta;
+ lck_mtx_spinwait_ret_type_t retval = LCK_MTX_SPINWAIT_SPUN_HIGH_THR;
+ int loopcount = 0;
+ int total_hold_time_samples, window_hold_time_samples, unfairness;
+ uint i, prev_owner_cpu;
+ bool owner_on_core, adjust;
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
- trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, 0, 0);
+ trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, 0, 0);
- cur_time = mach_absolute_time();
- overall_deadline = cur_time + MutexSpin;
- check_owner_deadline = cur_time;
+ start_time = mach_absolute_time();
+ /*
+ * window_deadline represents the "learning" phase.
+ * The thread collects statistics about the lock during
+ * window_deadline and then it makes a decision on whether to spin more
+ * or block according to the concurrency behavior
+ * observed.
+ *
+ * Every thread can spin at least low_MutexSpin.
+ */
+ window_deadline = start_time + low_MutexSpin;
+ /*
+ * Sliding_deadline is the adjusted spin deadline
+ * computed after the "learning" phase.
+ */
+ sliding_deadline = window_deadline;
+ /*
+ * High_deadline is a hard deadline. No thread
+ * can spin more than this deadline.
+ */
+ if (high_MutexSpin >= 0) {
+ high_deadline = start_time + high_MutexSpin;
+ } else {
+ high_deadline = start_time + low_MutexSpin * real_ncpus;
+ }
+ /*
+ * Do not know yet which is the owner cpu.
+ * Initialize prev_owner_cpu with next cpu.
+ */
+ prev_owner_cpu = (cpu_number() + 1) % real_ncpus;
+ total_hold_time_samples = 0;
+ window_hold_time_samples = 0;
+ avg_hold_time = 0;
+ adjust = TRUE;
+ bias = (os_hash_kernel_pointer(mutex) + cpu_number()) % real_ncpus;
+
+ prev_owner = (thread_t) mutex->lck_mtx_owner;
/*
* Spin while:
* - mutex is locked, and
- * - its locked as a spin lock, and
+ * - it's locked as a spin lock, and
* - owner is running on another processor, and
- * - owner (processor) is not idling, and
* - we haven't spun for long enough.
*/
do {
+ /*
+ * Try to acquire the lock.
+ */
if (__probable(lck_mtx_lock_grab_mutex(mutex))) {
retval = LCK_MTX_SPINWAIT_ACQUIRED;
break;
}
+
cur_time = mach_absolute_time();
- if (cur_time >= overall_deadline)
+ /*
+ * Never spin past high_deadline.
+ */
+ if (cur_time >= high_deadline) {
+ retval = LCK_MTX_SPINWAIT_SPUN_HIGH_THR;
break;
+ }
- if (cur_time >= check_owner_deadline && mutex->lck_mtx_owner) {
- boolean_t istate;
+ /*
+ * Check if owner is on core. If not block.
+ */
+ owner = (thread_t) mutex->lck_mtx_owner;
+ if (owner) {
+ i = prev_owner_cpu;
+ owner_on_core = FALSE;
+
+ disable_preemption();
+ owner = (thread_t) mutex->lck_mtx_owner;
/*
- * We will repeatedly peek at the state of the lock while spinning,
- * and we will acquire the interlock to do so.
- * The thread that will unlock the mutex will also need to acquire
- * the interlock, and we want to avoid to slow it down.
- * To avoid to get an interrupt while holding the interlock
- * and increase the time we are holding it, we
- * will try to acquire the interlock with interrupts disabled.
- * This is safe because it is a "try_lock", if we can't acquire
- * the interlock we re-enable the interrupts and fail, so it is
- * ok to call it even if the interlock was already held.
- */
- if (lck_mtx_interlock_try_lock_disable_interrupts(mutex, &istate)) {
-
- if ((holder = (thread_t) mutex->lck_mtx_owner) != NULL) {
-
- if ( !(holder->machine.specFlags & OnProc) ||
- (holder->state & TH_IDLE)) {
-
- lck_mtx_interlock_unlock_enable_interrupts(mutex, istate);
-
- if (loopcount == 0)
+ * For scalability we want to check if the owner is on core
+ * without locking the mutex interlock.
+ * If we do not lock the mutex interlock, the owner that we see might be
+ * invalid, so we cannot dereference it. Therefore we cannot check
+ * any field of the thread to tell us if it is on core.
+ * Check if the thread that is running on the other cpus matches the owner.
+ */
+ if (owner) {
+ do {
+ if ((cpu_data_ptr[i] != NULL) && (cpu_data_ptr[i]->cpu_active_thread == owner)) {
+ owner_on_core = TRUE;
+ break;
+ }
+ if (++i >= real_ncpus) {
+ i = 0;
+ }
+ } while (i != prev_owner_cpu);
+ enable_preemption();
+
+ if (owner_on_core) {
+ prev_owner_cpu = i;
+ } else {
+ prev_owner = owner;
+ owner = (thread_t) mutex->lck_mtx_owner;
+ if (owner == prev_owner) {
+ /*
+ * Owner is not on core.
+ * Stop spinning.
+ */
+ if (loopcount == 0) {
retval = LCK_MTX_SPINWAIT_NO_SPIN;
+ } else {
+ retval = LCK_MTX_SPINWAIT_SPUN_OWNER_NOT_CORE;
+ }
break;
}
+ /*
+ * Fall through if the owner changed while we were scanning.
+ * The new owner could potentially be on core, so loop
+ * again.
+ */
}
- lck_mtx_interlock_unlock_enable_interrupts(mutex, istate);
+ } else {
+ enable_preemption();
+ }
+ }
- check_owner_deadline = cur_time + (MutexSpin / 4);
+ /*
+ * Save how many times we see the owner changing.
+ * We can roughly estimate the mutex hold
+ * time and the fairness with that.
+ */
+ if (owner != prev_owner) {
+ prev_owner = owner;
+ total_hold_time_samples++;
+ window_hold_time_samples++;
+ }
+
+ /*
+ * Learning window expired.
+ * Try to adjust the sliding_deadline.
+ */
+ if (cur_time >= window_deadline) {
+ /*
+ * If there was not contention during the window
+ * stop spinning.
+ */
+ if (window_hold_time_samples < 1) {
+ retval = LCK_MTX_SPINWAIT_SPUN_NO_WINDOW_CONTENTION;
+ break;
}
+
+ if (adjust) {
+ /*
+ * For a fair lock, we'd wait for at most (NCPU-1) periods,
+ * but the lock is unfair, so let's try to estimate by how much.
+ */
+ unfairness = total_hold_time_samples / real_ncpus;
+
+ if (unfairness == 0) {
+ /*
+ * We observed the owner changing `total_hold_time_samples` times which
+ * let us estimate the average hold time of this mutex for the duration
+ * of the spin time.
+ * avg_hold_time = (cur_time - start_time) / total_hold_time_samples;
+ *
+ * In this case spin at max avg_hold_time * (real_ncpus - 1)
+ */
+ delta = cur_time - start_time;
+ sliding_deadline = start_time + (delta * (real_ncpus - 1)) / total_hold_time_samples;
+ } else {
+ /*
+ * In this case at least one of the other cpus was able to get the lock twice
+ * while I was spinning.
+ * We could spin longer but it won't necessarily help if the system is unfair.
+ * Try to randomize the wait to reduce contention.
+ *
+ * We compute how much time we could potentially spin
+ * and distribute it over the cpus.
+ *
+ * bias is an integer between 0 and real_ncpus.
+ * distributed_increment = ((high_deadline - cur_time) / real_ncpus) * bias
+ */
+ delta = high_deadline - cur_time;
+ sliding_deadline = cur_time + ((delta * bias) / real_ncpus);
+ adjust = FALSE;
+ }
+ }
+
+ window_deadline += low_MutexSpin;
+ window_hold_time_samples = 0;
}
- cpu_pause();
- loopcount++;
+ /*
+ * Stop spinning if we past
+ * the adjusted deadline.
+ */
+ if (cur_time >= sliding_deadline) {
+ retval = LCK_MTX_SPINWAIT_SPUN_SLIDING_THR;
+ break;
+ }
+
+ if ((thread_t) mutex->lck_mtx_owner != NULL) {
+ cpu_pause();
+ }
+ loopcount++;
} while (TRUE);
-#if CONFIG_DTRACE
+#if CONFIG_DTRACE
/*
- * We've already kept a count via overall_deadline of how long we spun.
- * If dtrace is active, then we compute backwards to decide how
- * long we spun.
- *
* Note that we record a different probe id depending on whether
- * this is a direct or indirect mutex. This allows us to
+ * this is a direct or indirect mutex. This allows us to
* penalize only lock groups that have debug/stats enabled
* with dtrace processing if desired.
*/
if (__probable(mutex->lck_mtx_is_ext == 0)) {
LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, mutex,
- mach_absolute_time() - (overall_deadline - MutexSpin));
+ mach_absolute_time() - start_time);
} else {
LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN, mutex,
- mach_absolute_time() - (overall_deadline - MutexSpin));
+ mach_absolute_time() - start_time);
}
/* The lockstat acquire event is recorded by the assembly code beneath us. */
#endif
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
- trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, retval, 0);
+ trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner), mutex->lck_mtx_waiters, retval, 0);
return retval;
}
/*
- * Routine: lck_mtx_lock_wait_x86
+ * Routine: lck_mtx_lock_wait_x86
*
* Invoked in order to wait on contention.
*
*/
__attribute__((noinline))
void
-lck_mtx_lock_wait_x86 (
- lck_mtx_t *mutex,
+lck_mtx_lock_wait_x86(
+ lck_mtx_t *mutex,
struct turnstile **ts)
{
thread_t self = current_thread();
-#if CONFIG_DTRACE
+#if CONFIG_DTRACE
uint64_t sleep_start = 0;
if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
__kdebug_only uintptr_t trace_lck = unslide_for_kdebug(mutex);
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START,
- trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner),
- mutex->lck_mtx_waiters, 0, 0);
+ trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner),
+ mutex->lck_mtx_waiters, 0, 0);
assert(self->waiting_for_mutex == NULL);
self->waiting_for_mutex = mutex;
self->waiting_for_mutex = NULL;
KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END,
- trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner),
- mutex->lck_mtx_waiters, 0, 0);
+ trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(mutex->lck_mtx_owner),
+ mutex->lck_mtx_waiters, 0, 0);
-#if CONFIG_DTRACE
+#if CONFIG_DTRACE
/*
* Record the Dtrace lockstat probe for blocking, block time
* measured from when we were entered.
* Returns: TRUE if lock is acquired.
*/
boolean_t
-kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t *lck)
+kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t *lck)
{
if (not_in_kdp) {
panic("panic: kdp_lck_mtx_lock_spin_is_acquired called outside of kernel debugger");
kdp_rwlck_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
{
lck_rw_t *rwlck = NULL;
- switch(waitinfo->wait_type) {
- case kThreadWaitKernelRWLockRead:
- rwlck = READ_EVENT_TO_RWLOCK(event);
- break;
- case kThreadWaitKernelRWLockWrite:
- case kThreadWaitKernelRWLockUpgrade:
- rwlck = WRITE_EVENT_TO_RWLOCK(event);
- break;
- default:
- panic("%s was called with an invalid blocking type", __FUNCTION__);
- break;
+ switch (waitinfo->wait_type) {
+ case kThreadWaitKernelRWLockRead:
+ rwlck = READ_EVENT_TO_RWLOCK(event);
+ break;
+ case kThreadWaitKernelRWLockWrite:
+ case kThreadWaitKernelRWLockUpgrade:
+ rwlck = WRITE_EVENT_TO_RWLOCK(event);
+ break;
+ default:
+ panic("%s was called with an invalid blocking type", __FUNCTION__);
+ break;
}
waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(rwlck);
waitinfo->owner = 0;
* well as destroyed mutexes.
*/
+ if (state & (LCK_MTX_ILOCKED_MSK | LCK_MTX_SPIN_MSK)) {
+ return lck_mtx_lock_spin_slow(lock);
+ }
+
/* Note LCK_MTX_SPIN_MSK is set only if LCK_MTX_ILOCKED_MSK is set */
prev = state & ~(LCK_MTX_ILOCKED_MSK | LCK_MTX_MLOCKED_MSK);
state = prev | LCK_MTX_ILOCKED_MSK | LCK_MTX_SPIN_MSK;
uint64_t LockTimeOutTSC;
uint32_t LockTimeOutUsec;
uint64_t MutexSpin;
+uint64_t low_MutexSpin;
+int64_t high_MutexSpin;
uint64_t LastDebuggerEntryAllowance;
uint64_t delay_spin_threshold;
nanoseconds_to_absolutetime(10 * NSEC_PER_USEC, &abstime);
}
MutexSpin = (unsigned int)abstime;
+ low_MutexSpin = MutexSpin;
+ /*
+ * high_MutexSpin should be initialized as low_MutexSpin * real_ncpus, but
+ * real_ncpus is not set at this time
+ */
+ high_MutexSpin = -1;
nanoseconds_to_absolutetime(4ULL * NSEC_PER_SEC, &LastDebuggerEntryAllowance);
if (PE_parse_boot_argn("panic_restart_timeout", &prt, sizeof(prt))) {
VIRTUAL_TIMEOUT_INFLATE64(LockTimeOutTSC);
VIRTUAL_TIMEOUT_INFLATE64(TLBTimeOut);
VIRTUAL_TIMEOUT_INFLATE64(MutexSpin);
+ VIRTUAL_TIMEOUT_INFLATE64(low_MutexSpin);
VIRTUAL_TIMEOUT_INFLATE64(reportphyreaddelayabs);
}
#ifdef XNU_KERNEL_PRIVATE
boolean_t ml_fpu_avx_enabled(void);
-#if !defined(RC_HIDE_XNU_J137)
boolean_t ml_fpu_avx512_enabled(void);
-#endif
void interrupt_latency_tracker_setup(void);
void interrupt_reset_latency_stats(void);
fpu_module_init();
}
+/*
+ * machine_thread_template_init: Initialize machine-specific portion of
+ * the thread template.
+ */
+void
+machine_thread_template_init(thread_t thr_template)
+{
+ assert(fpu_default != UNDEFINED);
+ THREAD_TO_PCB(thr_template)->xstate = fpu_default;
+}
user_addr_t
get_useraddr(void)
#define XCR0_YMM (1ULL << 2) /* YMM state available */
#define XCR0_BNDREGS (1ULL << 3) /* MPX Bounds register state */
#define XCR0_BNDCSR (1ULL << 4) /* MPX Bounds configuration/state */
-#if !defined(RC_HIDE_XNU_J137)
#define XCR0_OPMASK (1ULL << 5) /* Opmask register state */
#define XCR0_ZMM_HI256 (1ULL << 6) /* ZMM upper 256-bit state */
#define XCR0_HI16_ZMM (1ULL << 7) /* ZMM16..ZMM31 512-bit state */
-#endif /* not RC_HIDE_XNU_J137 */
#define XFEM_X87 XCR0_X87
#define XFEM_SSE XCR0_SSE
#define XFEM_YMM XCR0_YMM
#define XFEM_BNDREGS XCR0_BNDREGS
#define XFEM_BNDCSR XCR0_BNDCSR
-#if !defined(XNU_HODE_J137)
#define XFEM_OPMASK XCR0_OPMASK
#define XFEM_ZMM_HI256 XCR0_ZMM_HI256
#define XFEM_HI16_ZMM XCR0_HI16_ZMM
#define XFEM_ZMM (XFEM_ZMM_HI256 | XFEM_HI16_ZMM | XFEM_OPMASK)
-#endif /* not XNU_HODE_J137 */
#define XCR0 (0)
#define PMAP_PCID_PRESERVE (1ULL << 63)
#include <i386/seg.h>
#include <i386/thread.h>
-#include <IOKit/IOBSD.h> /* for IOTaskHasEntitlement */
-#include <sys/csr.h> /* for csr_check */
-
#include <sys/errno.h>
static void user_ldt_set_action(void *);
static int i386_get_ldt_impl(uint32_t *retval, uint64_t start_sel, uint64_t descs,
uint64_t num_sels);
-#define LDT_IN_64BITPROC_ENTITLEMENT "com.apple.security.ldt-in-64bit-process"
-
/*
* Add the descriptors to the LDT, starting with
* the descriptor for 'first_selector'.
uint64_t descs, /* out */
uint64_t num_sels)
{
- if (csr_check(CSR_ALLOW_UNTRUSTED_KEXTS) != 0 &&
- !IOTaskHasEntitlement(current_task(), LDT_IN_64BITPROC_ENTITLEMENT)) {
- return EPERM;
- }
-
return i386_set_ldt_impl(retval, start_sel, descs, num_sels);
}
uint64_t descs, /* out */
uint64_t num_sels)
{
- if (csr_check(CSR_ALLOW_UNTRUSTED_KEXTS) != 0 &&
- !IOTaskHasEntitlement(current_task(), LDT_IN_64BITPROC_ENTITLEMENT)) {
- return EPERM;
- }
-
return i386_get_ldt_impl(retval, start_sel, descs, num_sels);
}
ipc_voucher_t voucher;
assert(ip_kotype(kmsg->ikm_voucher) == IKOT_VOUCHER);
- voucher = (ipc_voucher_t)kmsg->ikm_voucher->ip_kobject;
+ voucher = (ipc_voucher_t)ip_get_kobject(kmsg->ikm_voucher);
/* check to see if the voucher has an importance attribute */
val_count = MACH_VOUCHER_ATTR_VALUE_MAX_NESTED;
/* set up recipe to copy the old voucher */
if (IP_VALID(kmsg->ikm_voucher)) {
- ipc_voucher_t sent_voucher = (ipc_voucher_t)kmsg->ikm_voucher->ip_kobject;
+ ipc_voucher_t sent_voucher = (ipc_voucher_t)ip_get_kobject(kmsg->ikm_voucher);
recipe->key = MACH_VOUCHER_ATTR_KEY_ALL;
recipe->command = MACH_VOUCHER_ATTR_COPY;
#include <kern/simple_lock.h>
#include <kern/mach_param.h>
#include <kern/ipc_host.h>
+#include <kern/ipc_kobject.h>
#include <kern/ipc_mig.h>
#include <kern/host_notify.h>
#include <kern/mk_timer.h>
#include <kern/misc_protos.h>
+#include <kern/suid_cred.h>
#include <kern/sync_lock.h>
#include <kern/sync_sema.h>
#include <kern/ux_handler.h>
#if MACH_ASSERT
ipc_port_debug_init();
#endif
- mig_init();
+ ipc_kobject_init();
ipc_table_init();
ipc_voucher_init();
arcade_init();
#endif
+ suid_cred_init();
+
if (PE_parse_boot_argn("prioritize_launch", &prioritize_launch_bootarg, sizeof(prioritize_launch_bootarg))) {
prioritize_launch = !!prioritize_launch_bootarg;
}
assert(entry->ie_bits & MACH_PORT_TYPE_SEND_RECEIVE);
} else {
ip_lock(reply);
- if (!ip_active(reply)) {
+ /* Is the reply port still active and allowed to be copied out? */
+ if (!ip_active(reply) || !ip_label_check(space, reply, reply_type)) {
/* clear the context value */
reply->ip_reply_context = 0;
ip_unlock(reply);
break;
}
+
name = CAST_MACH_PORT_TO_NAME(object);
kr = ipc_entry_get(space, &name, &entry);
if (kr != KERN_SUCCESS) {
return KERN_INVALID_CAPABILITY;
}
+ /* Don't actually copyout rights we aren't allowed to */
+ if (!ip_label_check(space, ip_object_to_port(object), msgt_name)) {
+ io_unlock(object);
+ ipc_entry_dealloc(space, name, entry);
+ is_write_unlock(space);
+
+ switch (msgt_name) {
+ case MACH_MSG_TYPE_PORT_SEND_ONCE:
+ ipc_port_release_sonce(ip_object_to_port(object));
+ break;
+ case MACH_MSG_TYPE_PORT_SEND:
+ ipc_port_release_send(ip_object_to_port(object));
+ break;
+ default:
+ /*
+ * We don't allow labeling of "kobjects" with receive
+ * rights at user-space or port-sets. So, if we get this far,
+ * something went VERY wrong.
+ */
+ panic("ipc_object_copyout: bad port label check failure");
+ }
+ return KERN_INVALID_CAPABILITY;
+ }
+
entry->ie_object = object;
break;
}
return KERN_INVALID_CAPABILITY;
}
+ /* Don't actually copyout rights we aren't allowed to */
+ if (!ip_label_check(space, ip_object_to_port(object), msgt_name)) {
+ io_unlock(object);
+ ipc_entry_dealloc(space, name, entry);
+ is_write_unlock(space);
+
+ switch (msgt_name) {
+ case MACH_MSG_TYPE_PORT_SEND_ONCE:
+ ipc_port_release_sonce(ip_object_to_port(object));
+ break;
+ case MACH_MSG_TYPE_PORT_SEND:
+ ipc_port_release_send(ip_object_to_port(object));
+ break;
+ default:
+ panic("ipc_object_copyout_name: bad port label check failure");
+ }
+ return KERN_INVALID_CAPABILITY;
+ }
+
entry->ie_object = object;
}
* definitions in ipc_port.h.
*/
#define IO_BITS_PORT_INFO 0x0000f000 /* stupid port tricks */
-#define IO_BITS_KOTYPE 0x000007ff /* used by the object */
+#define IO_BITS_KOTYPE 0x000003ff /* used by the object */
#define IO_BITS_KOBJECT 0x00000800 /* port belongs to a kobject */
+#define IO_BITS_KOLABEL 0x00000400 /* The kobject has a label */
#define IO_BITS_OTYPE 0x7fff0000 /* determines a zone */
#define IO_BITS_ACTIVE 0x80000000 /* is object alive? */
#define io_otype(io) (((io)->io_bits & IO_BITS_OTYPE) >> 16)
#define io_kotype(io) ((io)->io_bits & IO_BITS_KOTYPE)
#define io_is_kobject(io) (((io)->io_bits & IO_BITS_KOBJECT) != IKOT_NONE)
-
+#define io_is_kolabeled(io) (((io)->io_bits & IO_BITS_KOLABEL) != 0)
#define io_makebits(active, otype, kotype) \
(((active) ? IO_BITS_ACTIVE : 0) | ((otype) << 16) | (kotype))
union {
ipc_kobject_t kobject;
+ ipc_kobject_label_t kolabel;
ipc_importance_task_t imp_task;
ipc_port_t sync_inheritor_port;
struct knote *sync_inheritor_knote;
#define ip_timestamp data.timestamp
#define ip_kobject kdata.kobject
+#define ip_kolabel kdata.kolabel
#define ip_imp_task kdata.imp_task
#define ip_sync_inheritor_port kdata.sync_inheritor_port
#define ip_sync_inheritor_knote kdata.sync_inheritor_knote
#define ip_kotype(port) io_kotype(ip_to_object(port))
#define ip_is_kobject(port) io_is_kobject(ip_to_object(port))
+#define ip_is_kolabeled(port) io_is_kolabeled(ip_to_object(port))
+#define ip_get_kobject(port) ipc_kobject_get(port)
+#define ip_label_check(space, port, msgt_name) \
+ (!ip_is_kolabeled(port) || ipc_kobject_label_check((space), (port), (msgt_name)))
#define ip_full_kernel(port) imq_full_kernel(&(port)->ip_messages)
#define ip_full(port) imq_full(&(port)->ip_messages)
kern_return_t
ipc_space_create(
ipc_table_size_t initial,
+ ipc_label_t label,
ipc_space_t *spacep)
{
ipc_space_t space;
space->is_table = table;
space->is_table_next = initial + 1;
space->is_task = NULL;
+ space->is_label = label;
space->is_low_mod = new_size;
space->is_high_mod = 0;
space->is_node_id = HOST_LOCAL_NODE; /* HOST_LOCAL_NODE, except proxy spaces */
return KERN_SUCCESS;
}
+/*
+ * Routine: ipc_space_label
+ * Purpose:
+ * Modify the label on a space. The desired
+ * label must be a super-set of the current
+ * label for the space (as rights may already
+ * have been previously copied out under the
+ * old label value.
+ * Conditions:
+ * Nothing locked.
+ * Returns:
+ * KERN_SUCCESS Updated the label
+ * KERN_INVALID_VALUE label not a superset of old
+ */
+kern_return_t
+ipc_space_label(
+ ipc_space_t space,
+ ipc_label_t label)
+{
+ is_write_lock(space);
+ if (!is_active(space)) {
+ is_write_unlock(space);
+ return KERN_SUCCESS;
+ }
+
+ if ((space->is_label & label) != space->is_label) {
+ is_write_unlock(space);
+ return KERN_INVALID_VALUE;
+ }
+ space->is_label = label;
+ is_write_unlock(space);
+ return KERN_SUCCESS;
+}
+
+/*
+ * Routine: ipc_space_add_label
+ * Purpose:
+ * Modify the label on a space. The desired
+ * label is added to the labels already set
+ * on the space.
+ * Conditions:
+ * Nothing locked.
+ * Returns:
+ * KERN_SUCCESS Updated the label
+ * KERN_INVALID_VALUE label not a superset of old
+ */
+kern_return_t
+ipc_space_add_label(
+ ipc_space_t space,
+ ipc_label_t label)
+{
+ is_write_lock(space);
+ if (!is_active(space)) {
+ is_write_unlock(space);
+ return KERN_SUCCESS;
+ }
+
+ space->is_label |= label;
+ is_write_unlock(space);
+ return KERN_SUCCESS;
+}
/*
* Routine: ipc_space_create_special
* Purpose:
space->is_bits = IS_INACTIVE | 1; /* 1 ref, not active, not growing */
space->is_table = IE_NULL;
space->is_task = TASK_NULL;
+ space->is_label = IPC_LABEL_SPECIAL;
space->is_table_next = 0;
space->is_low_mod = 0;
space->is_high_mod = 0;
ipc_entry_num_t is_table_hashed;/* count of hashed elements */
ipc_entry_num_t is_table_free; /* count of free elements */
ipc_entry_t is_table; /* an array of entries */
- task_t is_task; /* associated task */
struct ipc_table_size *is_table_next; /* info for larger table */
+ task_t is_task; /* associated task */
+ ipc_label_t is_label; /* [private] mandatory access label */
ipc_entry_num_t is_low_mod; /* lowest modified entry during growth */
ipc_entry_num_t is_high_mod; /* highest modified entry during growth */
struct bool_gen bool_gen; /* state for boolean RNG */
/* Create a new IPC space */
extern kern_return_t ipc_space_create(
ipc_table_size_t initial,
+ ipc_label_t label,
ipc_space_t *spacep);
+/* Change the label on an existing space */
+extern kern_return_t ipc_space_label(
+ ipc_space_t space,
+ ipc_label_t label);
+
+/* Add a label to an existing space */
+extern kern_return_t ipc_space_add_label(
+ ipc_space_t space,
+ ipc_label_t label);
+
/* Mark a space as dead and cleans up the entries*/
extern void ipc_space_terminate(
ipc_space_t space);
typedef struct ipc_kmsg *ipc_kmsg_t;
typedef uint8_t sync_qos_count_t;
+typedef uint64_t ipc_label_t;
+#define IPC_LABEL_NONE ((ipc_label_t)0x0)
+#define IPC_LABEL_DEXT ((ipc_label_t)0x1)
+#define IPC_LABEL_PLATFORM ((ipc_label_t)0x2)
+#define IPC_LABEL_SPECIAL ((ipc_label_t)0x3)
+
+typedef struct ipc_kobject_label *ipc_kobject_label_t;
+
#define IE_NULL ((ipc_entry_t) 0)
#define ITS_NULL ((ipc_table_size_t) 0)
ipc_port_t port)
{
if (IP_VALID(port)) {
- uintptr_t voucher = (uintptr_t) port->ip_kobject;
+ /* vouchers never labeled (they get transformed before use) */
+ if (ip_is_kolabeled(port)) {
+ return (uintptr_t)IV_NULL;
+ }
+
+ uintptr_t voucher = (uintptr_t)port->ip_kobject;
/*
* No need to lock because we have a reference on the
{
if (IP_VALID(port)) {
zone_require(port, ipc_object_zones[IOT_PORT]);
- ipc_voucher_t voucher = (ipc_voucher_t) port->ip_kobject;
+ ipc_voucher_t voucher = (ipc_voucher_t) ip_get_kobject(port);
/*
* No need to lock because we have a reference on the
{
mach_no_senders_notification_t *notification = (void *)msg;
ipc_port_t port = notification->not_header.msgh_remote_port;
+ ipc_voucher_t voucher = (ipc_voucher_t)ip_get_kobject(port);
require_ip_active(port);
assert(IKOT_VOUCHER == ip_kotype(port));
/* consume the reference donated by convert_voucher_to_port */
- zone_require((ipc_voucher_t)port->ip_kobject, ipc_voucher_zone);
- ipc_voucher_release((ipc_voucher_t)port->ip_kobject);
+ zone_require(voucher, ipc_voucher_zone);
+ ipc_voucher_release(voucher);
}
/*
{
if (IP_VALID(port)) {
zone_require(port, ipc_object_zones[IOT_PORT]);
- ipc_voucher_attr_control_t ivac = (ipc_voucher_attr_control_t) port->ip_kobject;
+ ipc_voucher_attr_control_t ivac = (ipc_voucher_attr_control_t) ip_get_kobject(port);
/*
* No need to lock because we have a reference on the
{
mach_no_senders_notification_t *notification = (void *)msg;
ipc_port_t port = notification->not_header.msgh_remote_port;
+ ipc_voucher_attr_control_t ivac;
require_ip_active(port);
assert(IKOT_VOUCHER_ATTR_CONTROL == ip_kotype(port));
/* release the reference donated by convert_voucher_attr_control_to_port */
- ivac_release((ipc_voucher_attr_control_t)port->ip_kobject);
+ ivac = (ipc_voucher_attr_control_t)ip_get_kobject(port);
+ zone_require(ivac, ipc_voucher_attr_control_zone);
+ ivac_release(ivac);
}
/*
return KERN_FAILURE;
}
- pthread_priority_voucher = (ipc_voucher_t)kmsg->ikm_voucher->ip_kobject;
+ pthread_priority_voucher = (ipc_voucher_t)ip_get_kobject(kmsg->ikm_voucher);
kr = mach_voucher_extract_attr_recipe(pthread_priority_voucher,
MACH_VOUCHER_ATTR_KEY_PTHPRIORITY,
content_data,
}
/* setup recipe for preprocessing of all the attributes. */
- pre_processed_voucher = (ipc_voucher_t)kmsg->ikm_voucher->ip_kobject;
+ pre_processed_voucher = (ipc_voucher_t)ip_get_kobject(kmsg->ikm_voucher);
kr = ipc_voucher_prepare_processing_recipe(pre_processed_voucher,
(mach_voucher_attr_raw_recipe_array_t)recipes,
}
/* setup recipe for auto redeem of all the attributes. */
- sent_voucher = (ipc_voucher_t)kmsg->ikm_voucher->ip_kobject;
+ sent_voucher = (ipc_voucher_t)ip_get_kobject(kmsg->ikm_voucher);
kr = ipc_voucher_prepare_processing_recipe(sent_voucher,
(mach_voucher_attr_raw_recipe_array_t)recipes,
#include <ipc/ipc_right.h>
#include <security/mac_mach_internal.h>
+#include <device/device_types.h>
#endif
/*
#if !MACH_IPC_DEBUG
kern_return_t
-mach_port_kobject(
+mach_port_kobject_description(
__unused ipc_space_t space,
__unused mach_port_name_t name,
__unused natural_t *typep,
- __unused mach_vm_address_t *addrp)
+ __unused mach_vm_address_t *addrp,
+ __unused kobject_description_t desc)
{
return KERN_FAILURE;
}
#else
kern_return_t
-mach_port_kobject(
+mach_port_kobject_description(
ipc_space_t space,
mach_port_name_t name,
natural_t *typep,
- mach_vm_address_t *addrp)
+ mach_vm_address_t *addrp,
+ kobject_description_t desc)
{
ipc_entry_t entry;
ipc_port_t port;
}
*typep = (unsigned int) ip_kotype(port);
- kaddr = (mach_vm_address_t)port->ip_kobject;
+ kaddr = (mach_vm_address_t)ip_get_kobject(port);
*addrp = 0;
#if (DEVELOPMENT || DEBUG)
if (kaddr && ip_is_kobject(port)) {
*addrp = VM_KERNEL_UNSLIDE_OR_PERM(kaddr);
}
#endif
+
+ io_object_t obj = NULL;
+ natural_t kotype = ip_kotype(port);
+ if (desc) {
+ *desc = '\0';
+ switch (kotype) {
+ case IKOT_IOKIT_OBJECT:
+ case IKOT_IOKIT_CONNECT:
+ case IKOT_IOKIT_IDENT:
+ case IKOT_UEXT_OBJECT:
+ obj = (io_object_t) kaddr;
+ iokit_add_reference(obj, IKOT_IOKIT_OBJECT);
+ break;
+
+ default:
+ break;
+ }
+ }
+
ip_unlock(port);
+ if (obj) {
+ iokit_port_object_description(obj, desc);
+ iokit_remove_reference(obj);
+ }
+
return KERN_SUCCESS;
}
#endif /* MACH_IPC_DEBUG */
+kern_return_t
+mach_port_kobject(
+ ipc_space_t space,
+ mach_port_name_t name,
+ natural_t *typep,
+ mach_vm_address_t *addrp)
+{
+ return mach_port_kobject_description(space, name, typep, addrp, NULL);
+}
+
/*
* Routine: mach_port_kernel_object [Legacy kernel call]
* Purpose:
/* No need to lock port because of how refs managed */
if (ip_kotype(port) == IKOT_ARCADE_REG) {
assert(ip_active(port));
- arcade_reg = (arcade_register_t)port->ip_kobject;
+ arcade_reg = (arcade_register_t)ip_get_kobject(port);
assert(arcade_reg == &arcade_register_global);
assert(arcade_reg->ar_port == port);
}
ip_lock(port);
if (IKOT_AU_SESSIONPORT == ip_kotype(port)) {
require_ip_active(port);
- aia_p = (struct auditinfo_addr *)port->ip_kobject;
+ aia_p = (struct auditinfo_addr *)ip_get_kobject(port);
}
ip_unlock(port);
}
require_ip_active(port);
assert(IKOT_AU_SESSIONPORT == ip_kotype(port));
- port_aia_p = (struct auditinfo_addr *)port->ip_kobject;
+ port_aia_p = (struct auditinfo_addr *)ip_get_kobject(port);
assert(NULL != port_aia_p);
audit_session_aiaunref(port_aia_p);
was_truncated_out) + 1;
}
-int
+unsigned int
backtrace_user(uintptr_t *bt, unsigned int max_frames,
- unsigned int *frames_out, bool *user_64_out, bool *was_truncated_out)
+ int *error_out, bool *user_64_out, bool *was_truncated_out)
{
return backtrace_thread_user(current_thread(), bt, max_frames,
- frames_out, user_64_out, was_truncated_out);
+ error_out, user_64_out, was_truncated_out);
}
-int
+unsigned int
backtrace_thread_user(void *thread, uintptr_t *bt, unsigned int max_frames,
- unsigned int *frames_out, bool *user_64_out, bool *was_truncated_out)
+ int *error_out, bool *user_64_out, bool *was_truncated_out)
{
bool user_64;
uintptr_t pc = 0, fp = 0, next_fp = 0;
assert(bt != NULL);
assert(max_frames > 0);
- assert(frames_out != NULL);
#if defined(__x86_64__)
if (user_64_out) {
*user_64_out = user_64;
}
+ if (error_out) {
+ *error_out = err;
+ }
- *frames_out = frame_index;
- return err;
+ return frame_index;
#undef INVALID_USER_FP
}
* thread, nor can it be called from interrupt context or with interrupts
* disabled.
*
- * @param btwritten On success, the number of return addresses written is stored
- * here.
+ * @param error The precise error code that occurred is stored here, or 0 if no
+ * error occurred.
*
* @param user64 On success, true is stored here if user space was running in
* 64-bit mode, and false is stored otherwise.
*
- * @return Returns 0 on success and an errno value on error.
+ * @param was_truncated true is stored here if the full stack could not be written
+ * to bt.
+ *
+ * @return Returns the number of frames written to bt.
*
* @seealso backtrace
*/
-int backtrace_user(uintptr_t *bt, unsigned int btlen, unsigned int *btwritten,
+unsigned int backtrace_user(uintptr_t *bt, unsigned int btlen, int *error,
bool *user64, bool *was_truncated);
/*
*
* @see backtrace_user
*/
-int backtrace_thread_user(void *thread, uintptr_t *bt, unsigned int btlen,
- unsigned int *btwritten, bool *user64, bool *was_truncated);
+unsigned int backtrace_thread_user(void *thread, uintptr_t *bt,
+ unsigned int btlen, int *error, bool *user64, bool *was_truncated);
__END_DECLS
kThreadWaitWorkloopSyncWait = 0x10,
kThreadWaitOnProcess = 0x11,
kThreadWaitSleepWithInheritor = 0x12,
+ kThreadWaitCompressor = 0x14,
} __attribute__((packed)) block_hint_t;
_Static_assert(sizeof(block_hint_t) <= sizeof(short),
return elt;
}
+static inline void
+circle_queue_rotate_head_forward(circle_queue_t cq)
+{
+ queue_entry_t first = circle_queue_first(cq);
+ if (first != NULL) {
+ cq->head = first->next;
+ }
+}
+
+static inline void
+circle_queue_rotate_head_backward(circle_queue_t cq)
+{
+ queue_entry_t last = circle_queue_last(cq);
+ if (last != NULL) {
+ cq->head = last;
+ }
+}
+
/*
* Macro: cqe_element
* Function:
clock_interval_to_absolutetime_interval(interval, scale_factor, &abstime);
- *result = mach_absolute_time() + abstime;
+ if (os_add_overflow(mach_absolute_time(), abstime, result)) {
+ *result = UINT64_MAX;
+ }
}
void
uint64_t abstime,
uint64_t *result)
{
- *result = mach_absolute_time() + abstime;
+ if (os_add_overflow(mach_absolute_time(), abstime, result)) {
+ *result = UINT64_MAX;
+ }
}
void
uint64_t conttime,
uint64_t *result)
{
- *result = mach_continuous_time() + conttime;
+ if (os_add_overflow(mach_continuous_time(), conttime, result)) {
+ *result = UINT64_MAX;
+ }
}
void
{
assert(interval != 0);
- *deadline += interval;
+ // *deadline += interval;
+ if (os_add_overflow(*deadline, interval, deadline)) {
+ *deadline = UINT64_MAX;
+ }
if (*deadline <= abstime) {
- *deadline = abstime + interval;
- abstime = mach_absolute_time();
+ // *deadline = abstime + interval;
+ if (os_add_overflow(abstime, interval, deadline)) {
+ *deadline = UINT64_MAX;
+ }
+ abstime = mach_absolute_time();
if (*deadline <= abstime) {
- *deadline = abstime + interval;
+ // *deadline = abstime + interval;
+ if (os_add_overflow(abstime, interval, deadline)) {
+ *deadline = UINT64_MAX;
+ }
}
}
}
ip_lock(port);
if (ip_kotype(port) == IKOT_HOST_NOTIFY) {
- entry = (host_notify_t)port->ip_kobject;
+ entry = (host_notify_t)ip_get_kobject(port);
assert(entry != NULL);
ipc_kobject_set_atomically(port, IKO_NULL, IKOT_NONE);
ip_unlock(port);
ip_lock(port);
assert(ip_kotype(port) == IKOT_HOST_NOTIFY);
- assert(port->ip_kobject == (ipc_kobject_t)entry);
+ assert(ip_get_kobject(port) == (ipc_kobject_t)entry);
ipc_kobject_set_atomically(port, IKO_NULL, IKOT_NONE);
ip_unlock(port);
if (ip_active(port) &&
((ip_kotype(port) == IKOT_CLOCK) ||
(ip_kotype(port) == IKOT_CLOCK_CTRL))) {
- clock = (clock_t) port->ip_kobject;
+ clock = (clock_t)ip_get_kobject(port);
}
ip_unlock(port);
}
ip_lock(port);
if (ip_active(port) &&
(ip_kotype(port) == IKOT_CLOCK_CTRL)) {
- clock = (clock_t) port->ip_kobject;
+ clock = (clock_t) ip_get_kobject(port);
}
ip_unlock(port);
}
return clock;
}
if (ip_kotype(port) == IKOT_CLOCK) {
- clock = (clock_t) port->ip_kobject;
+ clock = (clock_t) ip_get_kobject(port);
}
ip_unlock(port);
return clock;
if (IP_VALID(port)) {
if (ip_kotype(port) == IKOT_HOST ||
ip_kotype(port) == IKOT_HOST_PRIV) {
- host = (host_t) port->ip_kobject;
+ host = (host_t) ip_get_kobject(port);
require_ip_active(port);
}
}
ip_lock(port);
if (ip_active(port) &&
(ip_kotype(port) == IKOT_HOST_PRIV)) {
- host = (host_t) port->ip_kobject;
+ host = (host_t) ip_get_kobject(port);
}
ip_unlock(port);
}
ip_lock(port);
if (ip_active(port) &&
(ip_kotype(port) == IKOT_PROCESSOR)) {
- processor = (processor_t) port->ip_kobject;
+ processor = (processor_t) ip_get_kobject(port);
}
ip_unlock(port);
}
if (ip_active(port) &&
((ip_kotype(port) == IKOT_PSET) ||
(matchn && (ip_kotype(port) == IKOT_PSET_NAME)))) {
- pset = (processor_set_t) port->ip_kobject;
+ pset = (processor_set_t) ip_get_kobject(port);
}
*ppset = pset;
ip_lock(port);
if (ip_active(port) &&
(ip_kotype(port) == IKOT_HOST_SECURITY)) {
- host = (host_t) port->ip_kobject;
+ host = (host_t) ip_get_kobject(port);
}
ip_unlock(port);
}
#include <kern/sync_sema.h>
#include <kern/counters.h>
#include <kern/work_interval.h>
+#include <kern/suid_cred.h>
#include <vm/vm_protos.h>
static int mig_table_max_displ;
static mach_msg_size_t mig_reply_size = sizeof(mig_reply_error_t);
-
+static zone_t ipc_kobject_label_zone;
const struct mig_subsystem *mig_e[] = {
(const struct mig_subsystem *)&mach_vm_subsystem,
#endif
};
-void
+static void
mig_init(void)
{
unsigned int i, n = sizeof(mig_e) / sizeof(const struct mig_subsystem *);
printf("mig_table_max_displ = %d\n", mig_table_max_displ);
}
+/*
+ * Routine: ipc_kobject_init
+ * Purpose:
+ * Deliver notifications to kobjects that care about them.
+ */
+void
+ipc_kobject_init(void)
+{
+ int label_max = CONFIG_TASK_MAX + CONFIG_THREAD_MAX + 1000 /* UEXT estimate */;
+
+ mig_init();
+
+ ipc_kobject_label_zone =
+ zinit(sizeof(struct ipc_kobject_label),
+ label_max * sizeof(struct ipc_kobject_label),
+ sizeof(struct ipc_kobject_label),
+ "ipc kobject labels");
+}
/*
* Routine: ipc_kobject_server
port->ip_spares[2] = (port->ip_object.io_bits & IO_BITS_KOTYPE);
#endif /* MACH_ASSERT */
port->ip_object.io_bits = (port->ip_object.io_bits & ~IO_BITS_KOTYPE) | type;
- port->ip_kobject = kobject;
+ if (ip_is_kolabeled(port)) {
+ ipc_kobject_label_t labelp = port->ip_kolabel;
+ labelp->ikol_kobject = kobject;
+ } else {
+ port->ip_kobject = kobject;
+ }
if (type != IKOT_NONE) {
/* Once set, this bit can never be unset */
port->ip_object.io_bits |= IO_BITS_KOBJECT;
}
}
+/*
+ * Routine: ipc_kobject_init_port
+ * Purpose:
+ * Initialize a kobject port with the given types and options.
+ *
+ * This function never fails.
+ */
+static inline void
+ipc_kobject_init_port(
+ ipc_port_t port,
+ ipc_kobject_t kobject,
+ ipc_kobject_type_t type,
+ ipc_kobject_alloc_options_t options)
+{
+ ipc_kobject_set_atomically(port, kobject, type);
+
+ if (options & IPC_KOBJECT_ALLOC_MAKE_SEND) {
+ ipc_port_make_send_locked(port);
+ }
+ if (options & IPC_KOBJECT_ALLOC_NSREQUEST) {
+ ipc_port_make_sonce_locked(port);
+ port->ip_nsrequest = port;
+ }
+ if (options & IPC_KOBJECT_ALLOC_NO_GRANT) {
+ port->ip_no_grant = 1;
+ }
+ if (options & IPC_KOBJECT_ALLOC_IMMOVABLE_SEND) {
+ port->ip_immovable_send = 1;
+ }
+}
+
/*
* Routine: ipc_kobject_alloc_port
* Purpose:
ipc_kobject_type_t type,
ipc_kobject_alloc_options_t options)
{
- ipc_port_init_flags_t flags;
- ipc_space_t space;
- ipc_port_t port;
+ ipc_port_t port = ipc_port_alloc_kernel();
- if (options & IPC_KOBJECT_ALLOC_IN_TRANSIT) {
- /* kobject port intended to be copied out to user-space */
- flags = IPC_PORT_INIT_MESSAGE_QUEUE;
- space = IS_NULL;
- } else {
- /* true kernel-bound kobject port */
- flags = IPC_PORT_INIT_NONE;
- space = ipc_space_kernel;
- }
- port = ipc_port_alloc_special(space, flags);
if (port == IP_NULL) {
panic("ipc_kobject_alloc_port(): failed to allocate port");
}
- ipc_kobject_set_atomically(port, kobject, type);
+ ipc_kobject_init_port(port, kobject, type, options);
+ return port;
+}
- if (options & IPC_KOBJECT_ALLOC_MAKE_SEND) {
- ipc_port_make_send_locked(port);
- }
+/*
+ * Routine: ipc_kobject_alloc_labeled_port
+ * Purpose:
+ * Allocate a kobject port and associated mandatory access label
+ * in the kernel space of the specified type.
+ *
+ * This function never fails.
+ *
+ * Conditions:
+ * No locks held (memory is allocated)
+ */
- if (options & IPC_KOBJECT_ALLOC_IN_TRANSIT) {
- /* reset the port like it has been copied in circularity checked */
- if (options & IPC_KOBJECT_ALLOC_NSREQUEST) {
- panic("ipc_kobject_alloc_port(): invalid option for user-space port");
- }
- port->ip_mscount = 0;
- assert(port->ip_tempowner == 0);
- assert(port->ip_receiver == IS_NULL);
- port->ip_receiver = IS_NULL;
- port->ip_receiver_name = MACH_PORT_NULL;
- } else {
- if (options & IPC_KOBJECT_ALLOC_NSREQUEST) {
- ipc_port_make_sonce_locked(port);
- port->ip_nsrequest = port;
- }
- }
- if (options & IPC_KOBJECT_ALLOC_IMMOVABLE_SEND) {
- port->ip_immovable_send = 1;
+ipc_port_t
+ipc_kobject_alloc_labeled_port(
+ ipc_kobject_t kobject,
+ ipc_kobject_type_t type,
+ ipc_label_t label,
+ ipc_kobject_alloc_options_t options)
+{
+ ipc_port_t port;
+ ipc_kobject_label_t labelp;
+
+ port = ipc_port_alloc_kernel();
+ if (port == IP_NULL) {
+ panic("ipc_kobject_alloc_port(): failed to allocate port");
}
- if (options & IPC_KOBJECT_ALLOC_NO_GRANT) {
- port->ip_no_grant = 1;
+
+ labelp = (ipc_kobject_label_t)zalloc(ipc_kobject_label_zone);
+ if (labelp == NULL) {
+ panic("ipc_kobject_alloc_labeled_port(): failed to allocate label");
}
+ labelp->ikol_label = label;
+ port->ip_kolabel = labelp;
+ port->ip_object.io_bits |= IO_BITS_KOLABEL;
+ ipc_kobject_init_port(port, kobject, type, options);
return port;
}
return rc;
}
+/*
+ * Routine: ipc_kobject_make_send_lazy_alloc_labeled_port
+ * Purpose:
+ * Make a send once for a kobject port.
+ *
+ * A location owning this port is passed in port_store.
+ * If no port exists, a port is made lazily.
+ *
+ * A send right is made for the port, and if this is the first one
+ * (possibly not for the first time), then the no-more-senders
+ * notification is rearmed.
+ *
+ * When a notification is armed, the kobject must donate
+ * one of its references to the port. It is expected
+ * the no-more-senders notification will consume this reference.
+ *
+ * Returns:
+ * TRUE if a notification was armed
+ * FALSE else
+ *
+ * Conditions:
+ * Nothing is locked, memory can be allocated.
+ * The caller must be able to donate a kobject reference to the port.
+ */
+boolean_t
+ipc_kobject_make_send_lazy_alloc_labeled_port(
+ ipc_port_t *port_store,
+ ipc_kobject_t kobject,
+ ipc_kobject_type_t type,
+ ipc_label_t label)
+{
+ ipc_port_t port, previous;
+ boolean_t rc = FALSE;
+
+ port = os_atomic_load(port_store, dependency);
+
+ if (!IP_VALID(port)) {
+ port = ipc_kobject_alloc_labeled_port(kobject, type, label,
+ IPC_KOBJECT_ALLOC_MAKE_SEND | IPC_KOBJECT_ALLOC_NSREQUEST);
+ if (os_atomic_cmpxchgv(port_store, IP_NULL, port, &previous, release)) {
+ return TRUE;
+ }
+
+ // undo what ipc_kobject_alloc_port() did above
+ port->ip_nsrequest = IP_NULL;
+ port->ip_mscount = 0;
+ port->ip_sorights = 0;
+ port->ip_srights = 0;
+ ip_release(port);
+ ip_release(port);
+ zfree(ipc_kobject_label_zone, port->ip_kolabel);
+ port->ip_object.io_bits &= ~IO_BITS_KOLABEL;
+ port->ip_kolabel = NULL;
+ ipc_port_dealloc_kernel(port);
+
+ port = previous;
+ assert(ip_is_kolabeled(port));
+ }
+
+ ip_lock(port);
+ ipc_port_make_send_locked(port);
+ if (port->ip_srights == 1) {
+ ipc_port_make_sonce_locked(port);
+ assert(port->ip_nsrequest == IP_NULL);
+ port->ip_nsrequest = port;
+ rc = TRUE;
+ }
+ ip_unlock(port);
+
+ return rc;
+}
+
+
/*
* Routine: ipc_kobject_destroy
* Purpose:
* Release any kernel object resources associated
* with the port, which is being destroyed.
*
- * This should only be needed when resources are
- * associated with a user's port. In the normal case,
- * when the kernel is the receiver, the code calling
- * ipc_port_dealloc_kernel should clean up the resources.
+ * This path to free object resources should only be
+ * needed when resources are associated with a user's port.
+ * In the normal case, when the kernel is the receiver,
+ * the code calling ipc_port_dealloc_kernel should clean
+ * up the object resources.
+ *
+ * Cleans up any kobject label that might be present.
* Conditions:
* The port is not locked, but it is dead.
*/
host_notify_port_destroy(port);
break;
+ case IKOT_SUID_CRED:
+ suid_cred_destroy(port);
+ break;
+
default:
break;
}
+
+ if (ip_is_kolabeled(port)) {
+ ipc_kobject_label_t labelp = port->ip_kolabel;
+
+ assert(labelp != NULL);
+ assert(ip_is_kobject(port));
+ port->ip_kolabel = NULL;
+ port->ip_object.io_bits &= ~IO_BITS_KOLABEL;
+ zfree(ipc_kobject_label_zone, labelp);
+ }
}
+/*
+ * Routine: ipc_kobject_label_check
+ * Purpose:
+ * Check to see if the space is allowed to possess a
+ * right for the given port. In order to qualify, the
+ * space label must contain all the privileges listed
+ * in the port/kobject label.
+ *
+ * Conditions:
+ * Space is write locked and active.
+ * Port is locked and active.
+ */
+boolean_t
+ipc_kobject_label_check(
+ ipc_space_t space,
+ ipc_port_t port,
+ __unused mach_msg_type_name_t msgt_name)
+{
+ ipc_kobject_label_t labelp;
+
+ assert(is_active(space));
+ assert(ip_active(port));
+
+ /* Unlabled ports/kobjects are always allowed */
+ if (!ip_is_kolabeled(port)) {
+ return TRUE;
+ }
+
+ labelp = port->ip_kolabel;
+ return (labelp->ikol_label & space->is_label) == labelp->ikol_label;
+}
boolean_t
ipc_kobject_notify(
case IKOT_WORK_INTERVAL:
work_interval_port_notify(request_header);
return TRUE;
+
+ case IKOT_SUID_CRED:
+ suid_cred_notify(request_header);
+ return TRUE;
}
+
break;
case MACH_NOTIFY_PORT_DELETED:
#define IKOT_UEXT_OBJECT 41
#define IKOT_ARCADE_REG 42
+#define IKOT_SUID_CRED 48
+
/*
* Add new entries here and adjust IKOT_UNKNOWN.
* Please keep ipc/ipc_object.c:ikot_print_array up to date.
*/
-#define IKOT_UNKNOWN 43 /* magic catchall */
+#define IKOT_UNKNOWN 49 /* magic catchall */
#define IKOT_MAX_TYPE (IKOT_UNKNOWN+1) /* # of IKOT_ types */
#ifdef MACH_KERNEL_PRIVATE
-/*
- * Define types of kernel objects that use page lists instead
- * of entry lists for copyin of out of line memory.
- */
+struct ipc_kobject_label {
+ ipc_label_t ikol_label; /* [private] mandatory access label */
+ ipc_kobject_t ikol_kobject; /* actual kobject address */
+};
+
+/* initialization of kobject subsystem */
+extern void ipc_kobject_init(void);
/* Dispatch a kernel server function */
extern ipc_kmsg_t ipc_kobject_server(
IPC_KOBJECT_ALLOC_NO_GRANT = 0x00000004,
/* Make all the send rights immovable */
IPC_KOBJECT_ALLOC_IMMOVABLE_SEND = 0x00000008,
- /* Make the port in-transit from the get-go */
- IPC_KOBJECT_ALLOC_IN_TRANSIT = 0x00000010,
+ /* Add a label structure to the port */
+ IPC_KOBJECT_ALLOC_LABEL = 0x00000010,
});
/* Allocates a kobject port, never fails */
ipc_kobject_type_t type,
ipc_kobject_alloc_options_t options);
+/* Allocates a kobject port, never fails */
+extern ipc_port_t ipc_kobject_alloc_labeled_port(
+ ipc_kobject_t kobject,
+ ipc_kobject_type_t type,
+ ipc_label_t label,
+ ipc_kobject_alloc_options_t options);
+
/* Makes a send right, lazily allocating a kobject port, arming for no-senders, never fails */
extern boolean_t ipc_kobject_make_send_lazy_alloc_port(
ipc_port_t *port_store,
ipc_kobject_t kobject,
ipc_kobject_type_t type) __result_use_check;
+/* Makes a send right, lazily allocating a kobject port, arming for no-senders, never fails */
+extern boolean_t ipc_kobject_make_send_lazy_alloc_labeled_port(
+ ipc_port_t *port_store,
+ ipc_kobject_t kobject,
+ ipc_kobject_type_t type,
+ ipc_label_t label) __result_use_check;
+
+/* Get the kobject address associated with a port */
+static inline ipc_kobject_t
+ipc_kobject_get(ipc_port_t port)
+{
+ if (ip_is_kobject(port)) {
+ if (ip_is_kolabeled(port)) {
+ return port->ip_kolabel->ikol_kobject;
+ }
+ return port->ip_kobject;
+ }
+ return 0;
+}
+
+/* Check if a kobject can be copied out to a given space */
+extern boolean_t ipc_kobject_label_check(
+ ipc_space_t space,
+ ipc_port_t port,
+ mach_msg_type_name_t msgt_name);
/* Release any kernel object resources associated with a port */
extern void ipc_kobject_destroy(
mach_msg_size_t send_size,
mach_msg_option_t option,
mach_msg_timeout_t timeout_val)
+{
+ return kernel_mach_msg_send(msg, send_size, option, timeout_val, NULL);
+}
+
+mach_msg_return_t
+kernel_mach_msg_send(
+ mach_msg_header_t *msg,
+ mach_msg_size_t send_size,
+ mach_msg_option_t option,
+ mach_msg_timeout_t timeout_val,
+ boolean_t *message_moved)
{
ipc_kmsg_t kmsg;
mach_msg_return_t mr;
KDBG(MACHDBG_CODE(DBG_MACH_IPC, MACH_IPC_KMSG_INFO) | DBG_FUNC_START);
+ if (message_moved) {
+ *message_moved = FALSE;
+ }
+
mr = ipc_kmsg_get_from_kernel(msg, send_size, &kmsg);
if (mr != MACH_MSG_SUCCESS) {
KDBG(MACHDBG_CODE(DBG_MACH_IPC, MACH_IPC_KMSG_INFO) | DBG_FUNC_END, mr);
return mr;
}
+ if (message_moved) {
+ *message_moved = TRUE;
+ }
+
/*
* Until we are sure of its effects, we are disabling
* importance donation from the kernel-side of user
* MACH_RCV_PORT_DIED The reply port was deallocated.
*/
-mach_msg_return_t mach_msg_rpc_from_kernel_body(mach_msg_header_t *msg,
- mach_msg_size_t send_size, mach_msg_size_t rcv_size, boolean_t legacy);
-
#if IKM_SUPPORT_LEGACY
#undef mach_msg_rpc_from_kernel
mach_msg_size_t send_size,
mach_msg_size_t rcv_size)
{
- return mach_msg_rpc_from_kernel_body(msg, send_size, rcv_size, TRUE);
+ return kernel_mach_msg_rpc(msg, send_size, rcv_size, TRUE, NULL);
}
-
#endif /* IKM_SUPPORT_LEGACY */
mach_msg_return_t
mach_msg_size_t send_size,
mach_msg_size_t rcv_size)
{
- return mach_msg_rpc_from_kernel_body(msg, send_size, rcv_size, FALSE);
+ return kernel_mach_msg_rpc(msg, send_size, rcv_size, FALSE, NULL);
}
mach_msg_return_t
-mach_msg_rpc_from_kernel_body(
+kernel_mach_msg_rpc(
mach_msg_header_t *msg,
mach_msg_size_t send_size,
mach_msg_size_t rcv_size,
#if !IKM_SUPPORT_LEGACY
__unused
#endif
- boolean_t legacy)
+ boolean_t legacy,
+ boolean_t *message_moved)
{
thread_t self = current_thread();
ipc_port_t reply;
KDBG(MACHDBG_CODE(DBG_MACH_IPC, MACH_IPC_KMSG_INFO) | DBG_FUNC_START);
+ if (message_moved) {
+ *message_moved = FALSE;
+ }
+
mr = ipc_kmsg_get_from_kernel(msg, send_size, &kmsg);
if (mr != MACH_MSG_SUCCESS) {
KDBG(MACHDBG_CODE(DBG_MACH_IPC, MACH_IPC_KMSG_INFO) | DBG_FUNC_END, mr);
return mr;
}
+ if (message_moved) {
+ *message_moved = TRUE;
+ }
+
/*
* respect the thread's SEND_IMPORTANCE option to force importance
* donation from the kernel-side of user threads
* query it to get a reference to the desired interface.
*/
ppv = NULL;
- mig_object = (mig_object_t)port->ip_kobject;
+ mig_object = (mig_object_t) ip_get_kobject(port);
mig_object->pVtbl->QueryInterface((IMIGObject *)mig_object, iid, &ppv);
ip_unlock(port);
return (mig_object_t)ppv;
assert(IKOT_MIG == ip_kotype(port));
/* consume the reference donated by convert_mig_object_to_port */
- mig_object_deallocate((mig_object_t)port->ip_kobject);
+ mig_object_deallocate((mig_object_t) ip_get_kobject(port));
}
/*
#define mach_msg_rpc_from_kernel mach_msg_rpc_from_kernel_proper
+#ifdef XNU_KERNEL_PRIVATE
+mach_msg_return_t kernel_mach_msg_rpc(
+ mach_msg_header_t *msg,
+ mach_msg_size_t send_size,
+ mach_msg_size_t rcv_size,
+ boolean_t legacy,
+ boolean_t *message_moved);
+#endif /* XNU_KERNEL_PRIVATE */
+
extern void
mach_msg_destroy_from_kernel_proper(
mach_msg_header_t *msg);
mach_msg_option_t option,
mach_msg_timeout_t timeout_val);
+extern mach_msg_return_t kernel_mach_msg_send(
+ mach_msg_header_t *msg,
+ mach_msg_size_t send_size,
+ mach_msg_option_t option,
+ mach_msg_timeout_t timeout_val,
+ boolean_t *message_moved);
+
extern mach_msg_return_t mach_msg_send_from_kernel_with_options(
mach_msg_header_t *msg,
mach_msg_size_t send_size,
extern void mach_msg_receive_continue(void);
-/* Initialize kernel server dispatch table */
-extern void mig_init(void);
-
/*
* Kernel implementation of the MIG object base class
*
ip_lock(port);
if (ip_active(port) && IKOT_FILEPORT == ip_kotype(port)) {
- fg = (void *)port->ip_kobject;
+ fg = (void *) ip_get_kobject(port);
}
ip_unlock(port);
ip_lock(port);
- fg = (struct fileglob *)port->ip_kobject;
+ fg = (struct fileglob *) ip_get_kobject(port);
if (!ip_active(port)) {
panic("Inactive port passed to fileport_notify()\n");
*/
if (ip_kotype(port) == IKOT_SEMAPHORE) {
require_ip_active(port);
- semaphore = (semaphore_t) port->ip_kobject;
+ semaphore = (semaphore_t) ip_get_kobject(port);
semaphore_reference(semaphore);
return semaphore;
}
require_ip_active(port);
assert(IKOT_SEMAPHORE == ip_kotype(port));
- semaphore_dereference((semaphore_t)port->ip_kobject);
+ semaphore_dereference((semaphore_t) ip_get_kobject(port));
}
lock_set_t
int i;
- kr = ipc_space_create(&ipc_table_entries[0], &space);
+ kr = ipc_space_create(&ipc_table_entries[0], IPC_LABEL_NONE, &space);
if (kr != KERN_SUCCESS) {
panic("ipc_task_init");
}
ip_unlock(port);
return TASK_NULL;
}
- task = (task_t) port->ip_kobject;
+ task = (task_t) ip_get_kobject(port);
assert(task != TASK_NULL);
if (task_conversion_eval(ct, task)) {
ip_unlock(port);
return TASK_INSPECT_NULL;
}
- task = (task_inspect_t)port->ip_kobject;
+ task = (task_inspect_t) ip_get_kobject(port);
assert(task != TASK_INSPECT_NULL);
/*
* Normal lock ordering puts task_lock() before ip_lock().
if (ip_kotype(port) == IKOT_TASK) {
task_t ct = current_task();
- task = (task_t)port->ip_kobject;
+ task = (task_t) ip_get_kobject(port);
assert(task != TASK_NULL);
if (task_conversion_eval(ct, task)) {
if (ip_active(port) &&
(ip_kotype(port) == IKOT_TASK ||
ip_kotype(port) == IKOT_TASK_NAME)) {
- task = (task_name_t)port->ip_kobject;
+ task = (task_name_t) ip_get_kobject(port);
assert(task != TASK_NAME_NULL);
task_reference_internal(task);
require_ip_active(port);
if (ip_kotype(port) == IKOT_TASK) {
- task = (task_inspect_t)port->ip_kobject;
+ task = (task_inspect_t) ip_get_kobject(port);
assert(task != TASK_INSPECT_NULL);
task_reference_internal(task);
if (ip_active(port) &&
ip_kotype(port) == IKOT_TASK_RESUME) {
- task = (task_suspension_token_t)port->ip_kobject;
+ task = (task_suspension_token_t) ip_get_kobject(port);
assert(task != TASK_NULL);
task_reference_internal(task);
require_ip_active(port);
if (ip_kotype(port) == IKOT_THREAD) {
- thread = (thread_t)port->ip_kobject;
+ thread = (thread_t) ip_get_kobject(port);
assert(thread != THREAD_NULL);
if (options & PORT_TO_THREAD_NOT_CURRENT_THREAD) {
if (ip_active(port) &&
ip_kotype(port) == IKOT_THREAD) {
- thread = (thread_inspect_t)port->ip_kobject;
+ thread = (thread_inspect_t) ip_get_kobject(port);
assert(thread != THREAD_INSPECT_NULL);
thread_reference_internal((thread_t)thread);
}
/* if size was too large for a zone, then use kmem_free */
vm_map_t alloc_map = kernel_map;
+ size = round_page(size);
if ((((vm_offset_t) data) >= kalloc_map_min) && (((vm_offset_t) data) <= kalloc_map_max)) {
alloc_map = kalloc_map;
#include <vm/vm_pageout.h>
#include <vm/vm_fault.h>
#include <vm/vm_shared_region.h>
+#include <vm/vm_compressor.h>
#include <libkern/OSKextLibPrivate.h>
#if defined(__x86_64__)
case kThreadWaitSleepWithInheritor:
kdp_sleep_with_inheritor_find_owner(thread->waitq, thread->wait_event, waitinfo);
break;
+ case kThreadWaitCompressor:
+ kdp_compressor_busy_find_owner(thread->wait_event, waitinfo);
+ break;
default:
waitinfo->owner = 0;
waitinfo->context = 0;
#include <kern/lock_group.h>
#include <kern/mk_timer.h>
#include <kern/thread_call.h>
+#include <ipc/ipc_kmsg.h>
static zone_t mk_timer_zone;
mk_timer_create_trap(
__unused struct mk_timer_create_trap_args *args)
{
- mk_timer_t timer;
- ipc_space_t myspace = current_space();
- mach_port_name_t name = MACH_PORT_NULL;
- ipc_port_t port;
- kern_return_t result;
-
+ mk_timer_t timer;
+ ipc_space_t myspace = current_space();
+ mach_port_name_t name = MACH_PORT_NULL;
+ ipc_port_init_flags_t init_flags;
+ ipc_port_t port;
+ kern_return_t result;
+
+ /* Allocate and initialize local state of a timer object */
timer = (mk_timer_t)zalloc(mk_timer_zone);
if (timer == NULL) {
return MACH_PORT_NULL;
}
+ simple_lock_init(&timer->lock, 0);
+ thread_call_setup(&timer->call_entry, mk_timer_expire, timer);
+ timer->is_armed = timer->is_dead = FALSE;
+ timer->active = 0;
/* Pre-allocate a kmsg for the timer messages */
ipc_kmsg_t kmsg;
return MACH_PORT_NULL;
}
- /* Allocate an in-transit kobject port with a send right */
- ipc_kobject_alloc_options_t options;
- options = (IPC_KOBJECT_ALLOC_IN_TRANSIT | IPC_KOBJECT_ALLOC_MAKE_SEND);
- port = ipc_kobject_alloc_port((ipc_kobject_t)timer, IKOT_TIMER, options);
- assert(port != IP_NULL);
+ init_flags = IPC_PORT_INIT_MESSAGE_QUEUE;
+ result = ipc_port_alloc(myspace, init_flags, &name, &port);
+ if (result != KERN_SUCCESS) {
+ zfree(mk_timer_zone, timer);
+ ipc_kmsg_free(kmsg);
+ return MACH_PORT_NULL;
+ }
- /* Associate the kmsg */
+ /* Associate the pre-allocated kmsg with the port */
ipc_kmsg_set_prealloc(kmsg, port);
- /* Initialize the timer object and bind port to it */
- simple_lock_init(&timer->lock, 0);
- thread_call_setup(&timer->call_entry, mk_timer_expire, timer);
- timer->is_armed = timer->is_dead = FALSE;
- timer->active = 0;
- timer->port = port;
+ /* port locked, receive right at user-space */
+ ipc_kobject_set_atomically(port, (ipc_kobject_t)timer, IKOT_TIMER);
- /* Copyout the receive right for the timer port to user-space */
- current_thread()->ith_knote = ITH_KNOTE_NULL;
- result = ipc_object_copyout(myspace, ip_to_object(port),
- MACH_MSG_TYPE_MOVE_RECEIVE,
- NULL, NULL, &name);
- if (result != KERN_SUCCESS) {
- ipc_object_destroy(ip_to_object(port), MACH_MSG_TYPE_MOVE_RECEIVE);
- /* should trigger mk_timer_port_destroy() call */
- return MACH_PORT_NULL;
- }
+ /* make a (naked) send right for the timer to keep */
+ timer->port = ipc_port_make_send_locked(port);
+
+ ip_unlock(port);
return name;
}
ip_lock(port);
if (ip_kotype(port) == IKOT_TIMER) {
- timer = (mk_timer_t)port->ip_kobject;
+ timer = (mk_timer_t) ip_get_kobject(port);
assert(timer != NULL);
ipc_kobject_set_atomically(port, IKO_NULL, IKOT_NONE);
simple_lock(&timer->lock, LCK_GRP_NULL);
}
if (ip_kotype(port) == IKOT_TIMER) {
- timer = (mk_timer_t)port->ip_kobject;
+ timer = (mk_timer_t) ip_get_kobject(port);
assert(timer != NULL);
simple_lock(&timer->lock, LCK_GRP_NULL);
}
if (ip_kotype(port) == IKOT_TIMER) {
- timer = (mk_timer_t)port->ip_kobject;
+ timer = (mk_timer_t) ip_get_kobject(port);
assert(timer != NULL);
simple_lock(&timer->lock, LCK_GRP_NULL);
assert(timer->port == port);
static void sched_clutch_root_bucket_deadline_update(sched_clutch_root_bucket_t, sched_clutch_root_t, uint64_t);
static int sched_clutch_root_bucket_pri_compare(sched_clutch_root_bucket_t, sched_clutch_root_bucket_t);
+/* Options for clutch bucket ordering in the runq */
+__options_decl(sched_clutch_bucket_options_t, uint32_t, {
+ SCHED_CLUTCH_BUCKET_OPTIONS_NONE = 0x0,
+ /* Round robin clutch bucket on thread removal */
+ SCHED_CLUTCH_BUCKET_OPTIONS_SAMEPRI_RR = 0x1,
+ /* Insert clutch bucket at head (for thread preemption) */
+ SCHED_CLUTCH_BUCKET_OPTIONS_HEADQ = 0x2,
+ /* Insert clutch bucket at tail (default) */
+ SCHED_CLUTCH_BUCKET_OPTIONS_TAILQ = 0x4,
+});
+
/* Clutch bucket level hierarchy management */
-static void sched_clutch_bucket_hierarchy_insert(sched_clutch_root_t, sched_clutch_bucket_t, sched_bucket_t, uint64_t);
-static void sched_clutch_bucket_hierarchy_remove(sched_clutch_root_t, sched_clutch_bucket_t, sched_bucket_t, uint64_t);
-static boolean_t sched_clutch_bucket_runnable(sched_clutch_bucket_t, sched_clutch_root_t, uint64_t);
-static boolean_t sched_clutch_bucket_update(sched_clutch_bucket_t, sched_clutch_root_t, uint64_t);
-static void sched_clutch_bucket_empty(sched_clutch_bucket_t, sched_clutch_root_t, uint64_t);
+static void sched_clutch_bucket_hierarchy_insert(sched_clutch_root_t, sched_clutch_bucket_t, sched_bucket_t, uint64_t, sched_clutch_bucket_options_t);
+static void sched_clutch_bucket_hierarchy_remove(sched_clutch_root_t, sched_clutch_bucket_t, sched_bucket_t, uint64_t, sched_clutch_bucket_options_t);
+static boolean_t sched_clutch_bucket_runnable(sched_clutch_bucket_t, sched_clutch_root_t, uint64_t, sched_clutch_bucket_options_t);
+static boolean_t sched_clutch_bucket_update(sched_clutch_bucket_t, sched_clutch_root_t, uint64_t, sched_clutch_bucket_options_t);
+static void sched_clutch_bucket_empty(sched_clutch_bucket_t, sched_clutch_root_t, uint64_t, sched_clutch_bucket_options_t);
static void sched_clutch_bucket_cpu_usage_update(sched_clutch_bucket_t, uint64_t);
static void sched_clutch_bucket_cpu_blocked_update(sched_clutch_bucket_t, uint64_t);
static boolean_t sched_thread_sched_pri_promoted(thread_t);
/* Clutch membership management */
static boolean_t sched_clutch_thread_insert(sched_clutch_root_t, thread_t, integer_t);
-static void sched_clutch_thread_remove(sched_clutch_root_t, thread_t, uint64_t);
+static void sched_clutch_thread_remove(sched_clutch_root_t, thread_t, uint64_t, sched_clutch_bucket_options_t);
static thread_t sched_clutch_thread_highest(sched_clutch_root_t);
/* Clutch properties updates */
}
}
+/*
+ * Clutch Bucket Runqueues
+ *
+ * The clutch buckets are maintained in a runq at the root bucket level. The
+ * runq organization allows clutch buckets to be ordered based on various
+ * factors such as:
+ *
+ * - Clutch buckets are round robin'ed at the same priority level when a
+ * thread is selected from a clutch bucket. This prevents a clutch bucket
+ * from starving out other clutch buckets at the same priority.
+ *
+ * - Clutch buckets are inserted at the head when it becomes runnable due to
+ * thread preemption. This allows threads that were preempted to maintain
+ * their order in the queue.
+ *
+ */
+
+/*
+ * sched_clutch_bucket_runq_init()
+ *
+ * Initialize a clutch bucket runq.
+ */
+static void
+sched_clutch_bucket_runq_init(
+ sched_clutch_bucket_runq_t clutch_buckets_rq)
+{
+ clutch_buckets_rq->scbrq_highq = NOPRI;
+ for (uint8_t i = 0; i < BITMAP_LEN(NRQS); i++) {
+ clutch_buckets_rq->scbrq_bitmap[i] = 0;
+ }
+ clutch_buckets_rq->scbrq_count = 0;
+ for (int i = 0; i < NRQS; i++) {
+ circle_queue_init(&clutch_buckets_rq->scbrq_queues[i]);
+ }
+}
+
+/*
+ * sched_clutch_bucket_runq_empty()
+ *
+ * Returns if a clutch bucket runq is empty.
+ */
+static boolean_t
+sched_clutch_bucket_runq_empty(
+ sched_clutch_bucket_runq_t clutch_buckets_rq)
+{
+ return clutch_buckets_rq->scbrq_count == 0;
+}
+
+/*
+ * sched_clutch_bucket_runq_peek()
+ *
+ * Returns the highest priority clutch bucket in the runq.
+ */
+static sched_clutch_bucket_t
+sched_clutch_bucket_runq_peek(
+ sched_clutch_bucket_runq_t clutch_buckets_rq)
+{
+ if (clutch_buckets_rq->scbrq_count > 0) {
+ circle_queue_t queue = &clutch_buckets_rq->scbrq_queues[clutch_buckets_rq->scbrq_highq];
+ return cqe_queue_first(queue, struct sched_clutch_bucket, scb_runqlink);
+ } else {
+ return NULL;
+ }
+}
+
+/*
+ * sched_clutch_bucket_runq_enqueue()
+ *
+ * Enqueue a clutch bucket into the runq based on the options passed in.
+ */
+static void
+sched_clutch_bucket_runq_enqueue(
+ sched_clutch_bucket_runq_t clutch_buckets_rq,
+ sched_clutch_bucket_t clutch_bucket,
+ sched_clutch_bucket_options_t options)
+{
+ circle_queue_t queue = &clutch_buckets_rq->scbrq_queues[clutch_bucket->scb_priority];
+ if (circle_queue_empty(queue)) {
+ circle_enqueue_tail(queue, &clutch_bucket->scb_runqlink);
+ bitmap_set(clutch_buckets_rq->scbrq_bitmap, clutch_bucket->scb_priority);
+ if (clutch_bucket->scb_priority > clutch_buckets_rq->scbrq_highq) {
+ clutch_buckets_rq->scbrq_highq = clutch_bucket->scb_priority;
+ }
+ } else {
+ if (options & SCHED_CLUTCH_BUCKET_OPTIONS_HEADQ) {
+ circle_enqueue_head(queue, &clutch_bucket->scb_runqlink);
+ } else {
+ /*
+ * Default behavior (handles SCHED_CLUTCH_BUCKET_OPTIONS_TAILQ &
+ * SCHED_CLUTCH_BUCKET_OPTIONS_NONE)
+ */
+ circle_enqueue_tail(queue, &clutch_bucket->scb_runqlink);
+ }
+ }
+ clutch_buckets_rq->scbrq_count++;
+}
+
+/*
+ * sched_clutch_bucket_runq_remove()
+ *
+ * Remove a clutch bucket from the runq.
+ */
+static void
+sched_clutch_bucket_runq_remove(
+ sched_clutch_bucket_runq_t clutch_buckets_rq,
+ sched_clutch_bucket_t clutch_bucket)
+{
+ circle_queue_t queue = &clutch_buckets_rq->scbrq_queues[clutch_bucket->scb_priority];
+ circle_dequeue(queue, &clutch_bucket->scb_runqlink);
+ assert(clutch_buckets_rq->scbrq_count > 0);
+ clutch_buckets_rq->scbrq_count--;
+ if (circle_queue_empty(queue)) {
+ bitmap_clear(clutch_buckets_rq->scbrq_bitmap, clutch_bucket->scb_priority);
+ clutch_buckets_rq->scbrq_highq = bitmap_first(clutch_buckets_rq->scbrq_bitmap, NRQS);
+ }
+}
+
+static void
+sched_clutch_bucket_runq_rotate(
+ sched_clutch_bucket_runq_t clutch_buckets_rq,
+ sched_clutch_bucket_t clutch_bucket)
+{
+ circle_queue_t queue = &clutch_buckets_rq->scbrq_queues[clutch_bucket->scb_priority];
+ assert(clutch_bucket == cqe_queue_first(queue, struct sched_clutch_bucket, scb_runqlink));
+ circle_queue_rotate_head_forward(queue);
+}
+
/*
* sched_clutch_root_bucket_init()
*
sched_bucket_t bucket)
{
root_bucket->scrb_bucket = bucket;
- priority_queue_init(&root_bucket->scrb_clutch_buckets, PRIORITY_QUEUE_BUILTIN_KEY | PRIORITY_QUEUE_MAX_HEAP);
+ sched_clutch_bucket_runq_init(&root_bucket->scrb_clutch_buckets);
priority_queue_entry_init(&root_bucket->scrb_pqlink);
root_bucket->scrb_deadline = SCHED_CLUTCH_INVALID_TIME_64;
root_bucket->scrb_warped_deadline = 0;
clutch_bucket->scb_interactivity_ts = 0;
clutch_bucket->scb_blocked_ts = SCHED_CLUTCH_BUCKET_BLOCKED_TS_INVALID;
- priority_queue_entry_init(&clutch_bucket->scb_pqlink);
clutch_bucket->scb_clutch = clutch;
clutch_bucket->scb_root = NULL;
priority_queue_init(&clutch_bucket->scb_clutchpri_prioq, PRIORITY_QUEUE_BUILTIN_KEY | PRIORITY_QUEUE_MAX_HEAP);
sched_clutch_root_t root_clutch,
sched_clutch_bucket_t clutch_bucket,
sched_bucket_t bucket,
- uint64_t timestamp)
+ uint64_t timestamp,
+ sched_clutch_bucket_options_t options)
{
sched_clutch_hierarchy_locked_assert(root_clutch);
if (bucket > TH_BUCKET_FIXPRI) {
sched_clutch_root_bucket_t root_bucket = &root_clutch->scr_buckets[bucket];
/* If this is the first clutch bucket in the root bucket, insert the root bucket into the root priority queue */
- if (priority_queue_empty(&root_bucket->scrb_clutch_buckets)) {
+ if (sched_clutch_bucket_runq_empty(&root_bucket->scrb_clutch_buckets)) {
sched_clutch_root_bucket_runnable(root_bucket, root_clutch, timestamp);
}
- /* Insert the clutch bucket into the root bucket priority queue */
- priority_queue_insert(&root_bucket->scrb_clutch_buckets, &clutch_bucket->scb_pqlink, clutch_bucket->scb_priority, PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE);
+ /* Insert the clutch bucket into the root bucket run queue with order based on options */
+ sched_clutch_bucket_runq_enqueue(&root_bucket->scrb_clutch_buckets, clutch_bucket, options);
os_atomic_store(&clutch_bucket->scb_root, root_clutch, relaxed);
KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_TG_BUCKET_STATE) | DBG_FUNC_NONE,
thread_group_get_id(clutch_bucket->scb_clutch->sc_tg), clutch_bucket->scb_bucket, SCHED_CLUTCH_STATE_RUNNABLE, clutch_bucket->scb_priority, 0);
sched_clutch_root_t root_clutch,
sched_clutch_bucket_t clutch_bucket,
sched_bucket_t bucket,
- uint64_t timestamp)
+ uint64_t timestamp,
+ __unused sched_clutch_bucket_options_t options)
{
sched_clutch_hierarchy_locked_assert(root_clutch);
if (bucket > TH_BUCKET_FIXPRI) {
sched_clutch_root_bucket_t root_bucket = &root_clutch->scr_buckets[bucket];
/* Remove the clutch bucket from the root bucket priority queue */
- priority_queue_remove(&root_bucket->scrb_clutch_buckets, &clutch_bucket->scb_pqlink, PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE);
+ sched_clutch_bucket_runq_remove(&root_bucket->scrb_clutch_buckets, clutch_bucket);
os_atomic_store(&clutch_bucket->scb_root, NULL, relaxed);
clutch_bucket->scb_blocked_ts = timestamp;
KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_TG_BUCKET_STATE) | DBG_FUNC_NONE,
thread_group_get_id(clutch_bucket->scb_clutch->sc_tg), clutch_bucket->scb_bucket, SCHED_CLUTCH_STATE_EMPTY, 0, 0);
/* If the root bucket priority queue is now empty, remove it from the root priority queue */
- if (priority_queue_empty(&root_bucket->scrb_clutch_buckets)) {
+ if (sched_clutch_bucket_runq_empty(&root_bucket->scrb_clutch_buckets)) {
sched_clutch_root_bucket_empty(root_bucket, root_clutch, timestamp);
}
}
sched_clutch_root_bucket_highest_clutch_bucket(
sched_clutch_root_bucket_t root_bucket)
{
- if (priority_queue_empty(&root_bucket->scrb_clutch_buckets)) {
+ if (sched_clutch_bucket_runq_empty(&root_bucket->scrb_clutch_buckets)) {
return NULL;
}
- return priority_queue_max(&root_bucket->scrb_clutch_buckets, struct sched_clutch_bucket, scb_pqlink);
+ return sched_clutch_bucket_runq_peek(&root_bucket->scrb_clutch_buckets);
}
/*
sched_clutch_bucket_runnable(
sched_clutch_bucket_t clutch_bucket,
sched_clutch_root_t root_clutch,
- uint64_t timestamp)
+ uint64_t timestamp,
+ sched_clutch_bucket_options_t options)
{
sched_clutch_hierarchy_locked_assert(root_clutch);
sched_clutch_bucket_cpu_blocked_update(clutch_bucket, timestamp);
clutch_bucket->scb_priority = sched_clutch_bucket_pri_calculate(clutch_bucket, timestamp);
- sched_clutch_bucket_hierarchy_insert(root_clutch, clutch_bucket, clutch_bucket->scb_bucket, timestamp);
+ sched_clutch_bucket_hierarchy_insert(root_clutch, clutch_bucket, clutch_bucket->scb_bucket, timestamp, options);
/* Update the timesharing properties of this clutch_bucket; also done every sched_tick */
sched_clutch_bucket_timeshare_update(clutch_bucket);
int16_t root_old_pri = root_clutch->scr_priority;
/*
* sched_clutch_bucket_update()
*
- * Update the clutch_bucket's position in the hierarchy based on whether
- * the newly runnable thread changes its priority. Also update the root
- * priority accordingly.
+ * Update the clutch_bucket's position in the hierarchy. This routine is
+ * called when a new thread is inserted or removed from a runnable clutch
+ * bucket. The options specify some properties about the clutch bucket
+ * insertion order into the clutch bucket runq.
*/
static boolean_t
sched_clutch_bucket_update(
sched_clutch_bucket_t clutch_bucket,
sched_clutch_root_t root_clutch,
- uint64_t timestamp)
+ uint64_t timestamp,
+ sched_clutch_bucket_options_t options)
{
sched_clutch_hierarchy_locked_assert(root_clutch);
uint64_t new_pri = sched_clutch_bucket_pri_calculate(clutch_bucket, timestamp);
+ sched_clutch_bucket_runq_t bucket_runq = &root_clutch->scr_buckets[clutch_bucket->scb_bucket].scrb_clutch_buckets;
if (new_pri == clutch_bucket->scb_priority) {
+ /*
+ * If SCHED_CLUTCH_BUCKET_OPTIONS_SAMEPRI_RR is specified, move the clutch bucket
+ * to the end of the runq. Typically used when a thread is selected for execution
+ * from a clutch bucket.
+ */
+ if (options & SCHED_CLUTCH_BUCKET_OPTIONS_SAMEPRI_RR) {
+ sched_clutch_bucket_runq_rotate(bucket_runq, clutch_bucket);
+ }
return false;
}
- struct priority_queue *bucket_prioq = &root_clutch->scr_buckets[clutch_bucket->scb_bucket].scrb_clutch_buckets;
-
- if (new_pri < clutch_bucket->scb_priority) {
- clutch_bucket->scb_priority = new_pri;
- priority_queue_entry_decrease(bucket_prioq, &clutch_bucket->scb_pqlink,
- clutch_bucket->scb_priority, PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE);
- } else {
- clutch_bucket->scb_priority = new_pri;
- priority_queue_entry_increase(bucket_prioq, &clutch_bucket->scb_pqlink,
- clutch_bucket->scb_priority, PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE);
- }
+ sched_clutch_bucket_runq_remove(bucket_runq, clutch_bucket);
+ clutch_bucket->scb_priority = new_pri;
+ sched_clutch_bucket_runq_enqueue(bucket_runq, clutch_bucket, options);
int16_t root_old_pri = root_clutch->scr_priority;
sched_clutch_root_pri_update(root_clutch);
sched_clutch_bucket_empty(
sched_clutch_bucket_t clutch_bucket,
sched_clutch_root_t root_clutch,
- uint64_t timestamp)
+ uint64_t timestamp,
+ sched_clutch_bucket_options_t options)
{
sched_clutch_hierarchy_locked_assert(root_clutch);
- sched_clutch_bucket_hierarchy_remove(root_clutch, clutch_bucket, clutch_bucket->scb_bucket, timestamp);
+ sched_clutch_bucket_hierarchy_remove(root_clutch, clutch_bucket, clutch_bucket->scb_bucket, timestamp, options);
clutch_bucket->scb_priority = sched_clutch_bucket_pri_calculate(clutch_bucket, timestamp);
sched_clutch_root_pri_update(root_clutch);
}
KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_THREAD_STATE) | DBG_FUNC_NONE,
thread_group_get_id(clutch_bucket->scb_clutch->sc_tg), clutch_bucket->scb_bucket, thread_tid(thread), SCHED_CLUTCH_STATE_RUNNABLE, 0);
- /* Enqueue the clutch into the hierarchy (if needed) and update properties */
+ /* Enqueue the clutch into the hierarchy (if needed) and update properties; pick the insertion order based on thread options */
+ sched_clutch_bucket_options_t scb_options = (options & SCHED_HEADQ) ? SCHED_CLUTCH_BUCKET_OPTIONS_HEADQ : SCHED_CLUTCH_BUCKET_OPTIONS_TAILQ;
if (clutch_bucket->scb_thr_count == 0) {
sched_clutch_thr_count_inc(&clutch_bucket->scb_thr_count);
sched_clutch_thr_count_inc(&root_clutch->scr_thr_count);
- /* Insert the newly runnable clutch bucket into the hierarchy */
- result = sched_clutch_bucket_runnable(clutch_bucket, root_clutch, current_timestamp);
+ result = sched_clutch_bucket_runnable(clutch_bucket, root_clutch, current_timestamp, scb_options);
} else {
sched_clutch_thr_count_inc(&clutch_bucket->scb_thr_count);
sched_clutch_thr_count_inc(&root_clutch->scr_thr_count);
- /* Update the position of the clutch bucket in the hierarchy */
- result = sched_clutch_bucket_update(clutch_bucket, root_clutch, current_timestamp);
+ result = sched_clutch_bucket_update(clutch_bucket, root_clutch, current_timestamp, scb_options);
}
return result;
}
sched_clutch_thread_remove(
sched_clutch_root_t root_clutch,
thread_t thread,
- uint64_t current_timestamp)
+ uint64_t current_timestamp,
+ sched_clutch_bucket_options_t options)
{
sched_clutch_hierarchy_locked_assert(root_clutch);
sched_clutch_t clutch = sched_clutch_for_thread(thread);
/* Remove the clutch from hierarchy (if needed) and update properties */
if (clutch_bucket->scb_thr_count == 0) {
- sched_clutch_bucket_empty(clutch_bucket, root_clutch, current_timestamp);
+ sched_clutch_bucket_empty(clutch_bucket, root_clutch, current_timestamp, options);
} else {
- sched_clutch_bucket_update(clutch_bucket, root_clutch, current_timestamp);
+ sched_clutch_bucket_update(clutch_bucket, root_clutch, current_timestamp, options);
}
}
thread_t thread = run_queue_peek(&clutch_bucket->scb_runq);
assert(thread != NULL);
- /* Remove and return the thread from the hierarchy */
- sched_clutch_thread_remove(root_clutch, thread, current_timestamp);
+ /* Remove and return the thread from the hierarchy; also round robin the clutch bucket if the priority remains unchanged */
+ sched_clutch_thread_remove(root_clutch, thread, current_timestamp, SCHED_CLUTCH_BUCKET_OPTIONS_SAMEPRI_RR);
KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_CLUTCH_THREAD_SELECT) | DBG_FUNC_NONE,
thread_tid(thread), thread_group_get_id(clutch_bucket->scb_clutch->sc_tg), clutch_bucket->scb_bucket, 0, 0);
return thread;
*/
if (SCHED_CLUTCH_THREAD_ELIGIBLE(thread)) {
sched_clutch_root_t pset_clutch_root = sched_clutch_processor_root_clutch(processor);
- sched_clutch_thread_remove(pset_clutch_root, thread, mach_absolute_time());
+ sched_clutch_thread_remove(pset_clutch_root, thread, mach_absolute_time(), SCHED_CLUTCH_BUCKET_OPTIONS_NONE);
} else {
rq = sched_clutch_thread_bound_runq(processor, thread);
run_queue_remove(rq, thread);
uint64_t current_timestamp = mach_approximate_time();
while (thread_count > 0) {
thread = run_queue_peek(&clutch_bucket->scb_runq);
- sched_clutch_thread_remove(root_clutch, thread, current_timestamp);
+ sched_clutch_thread_remove(root_clutch, thread, current_timestamp, SCHED_CLUTCH_BUCKET_OPTIONS_NONE);
enqueue_tail(clutch_threads, &thread->runq_links);
thread_count--;
}
*/
#define SCHED_CLUTCH_THREAD_ELIGIBLE(thread) ((thread->bound_processor) == PROCESSOR_NULL)
+/*
+ * Clutch Bucket Runqueue Structure.
+ */
+struct sched_clutch_bucket_runq {
+ int scbrq_highq;
+ bitmap_t scbrq_bitmap[BITMAP_LEN(NRQS_MAX)];
+ int scbrq_count;
+ circle_queue_head_t scbrq_queues[NRQS_MAX];
+};
+typedef struct sched_clutch_bucket_runq *sched_clutch_bucket_runq_t;
+
/*
*
* Clutch hierarchy locking protocol
/* (I) sched bucket represented by this root bucket */
uint8_t scrb_bucket;
/* (P) priority queue for all clutch buckets in this sched bucket */
- struct priority_queue scrb_clutch_buckets;
+ struct sched_clutch_bucket_runq scrb_clutch_buckets;
/* (P) priority queue entry to use for enqueueing root bucket into root prioq */
struct priority_queue_entry scrb_pqlink;
/* (P) ageout deadline for this root bucket */
/* (A) CPU usage information for the clutch bucket */
sched_clutch_bucket_cpu_data_t scb_cpu_data;
- /* (P) linkage for clutch_bucket in root_bucket priority queue */
- struct priority_queue_entry scb_pqlink;
+ /* (P) linkage for clutch_bucket in root_bucket runqueue */
+ queue_chain_t scb_runqlink;
/* (I) clutch to which this clutch bucket belongs */
struct sched_clutch *scb_clutch;
/* (A) pointer to the root of the hierarchy this bucket is in */
**Implementation**
-The thread group level implements a variation of the FreeBSD ULE scheduler to decide which clutch bucket should be selected next for execution. Each clutch bucket with runnable threads is represented as an entry in a priority queue which is ordered by clutch bucket priorities. The clutch bucket selection algorithm simply selects the clutch bucket with the highest priority in the priority queue. The priority calculation for the clutch buckets is based on the following factors:
+The thread group level implements a variation of the FreeBSD ULE scheduler to decide which clutch bucket should be selected next for execution. Each clutch bucket with runnable threads is represented as an entry in a runqueue which is ordered by clutch bucket priorities. The clutch bucket selection algorithm simply selects the clutch bucket with the highest priority in the clutch bucket runqueue. The priority calculation for the clutch buckets is based on the following factors:
* **Highest runnable thread in the clutch bucket**: The clutch bucket maintains a priority queue which contains threads ordered by their promoted or base priority (whichever property made the thread eligible to be part of that clutch bucket). It uses the highest of these threads to calculate the base priority of the clutch bucket. The use of both base and sched priority allows the scheduler to honor priority differences specified from userspace via SPIs, priority boosts due to priority inheritance mechanisms like turnstiles and other priority affecting mechanisms outside the core scheduler.
* **Interactivity score**: The scheduler calculates an interactivity score based on the ratio of voluntary blocking time and CPU usage time for the clutch bucket as a whole. This score allows the scheduler to prefer highly interactive thread groups over batch processing compute intensive thread groups.
* Since the priority calculation is fairly cheap, the scheduler is able to maintain up-to-date information about all thread groups which leads to more optimal decisions.
* Thread groups provide a convenient abstraction for groups of threads working together for a user workload. Basing scheduling decisions on this abstraction allows the system to make interesting choices such as preferring Apps over daemons which is typically better for system responsiveness.
+The clutch bucket runqueue data structure allows the clutch buckets to be inserted at the head of the queue when threads from that clutch bucket are pre-empted. The runqueues also rotate the clutch bucket to the end of the runqueue at the same priority level when a thread is selected for execution from the clutch bucket. This allows the system to round robin efficiently among clutch buckets at the same priority value especially on highly contended low CPU systems.
+
### Thread Level
At the lowest level the scheduler decides which thread within a clutch bucket should be selected next for execution. Each runnable thread in the clutch bucket is represented as an entry in a runqueue which is organized based on the schedpri of threads. The thread selection algorithm simply selects the highest priority thread in the runqueue. The schedpri calculation for the threads is based on the traditional Mach scheduling algorithm which uses load & CPU usage to decay priority for a thread. The thread decay model is more suited at this level as compared to the global scheduler because the load calculation only accounts for threads in the same clutch bucket. Since all threads in the same clutch bucket belong to the same thread group and scheduling bucket, this algorithm provides quick CPU access for latency sensitive threads within the clutch bucket without impacting other non-related threads in the system.
kernel_bootstrap_log("machine_init");
machine_init();
+ kernel_bootstrap_log("thread_machine_init_template");
+ thread_machine_init_template();
+
kernel_bootstrap_log("clock_init");
clock_init();
--- /dev/null
+/*
+ * Copyright (c) 2019 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ *
+ * An SUID credential is a port type which allows a process to create a new
+ * process with a specific user id. It provides an alternative means to acheive
+ * this to the more traditional SUID bit file permission.
+ *
+ * To create a new SUID credential the process must be running as root and must
+ * have a special entitlement. When created, the credential is associated with a
+ * specific vnode and UID so the unprivileged owner of the credential may only
+ * create a new process from the file associated with that vnode and the
+ * resulting effective UID will be that of the UID in the credential.
+ */
+
+#include <kern/ipc_kobject.h>
+#include <kern/queue.h>
+#include <kern/suid_cred.h>
+
+#include <mach/mach_types.h>
+#include <mach/task.h>
+
+#include <IOKit/IOBSD.h>
+
+/* Declarations necessary to call vnode_lookup()/vnode_put(). */
+struct vnode;
+struct vfs_context;
+extern int vnode_lookup(const char *, int, struct vnode **,
+ struct vfs_context *);
+extern struct vfs_context * vfs_context_current(void);
+extern int vnode_put(struct vnode *);
+
+/* Declarations necessary to call kauth_cred_issuser(). */
+struct ucred;
+extern int kauth_cred_issuser(struct ucred *);
+extern struct ucred *kauth_cred_get(void);
+
+static struct zone *suid_cred_zone = NULL;
+
+/* Data associated with the suid cred port. Consumed during posix_spawn(). */
+struct suid_cred {
+ ipc_port_t port;
+ struct vnode *vnode;
+ uint32_t uid;
+};
+
+/* Allocs a new suid credential. The vnode reference will be owned by the newly
+ * created suid_cred_t. */
+static suid_cred_t
+suid_cred_alloc(struct vnode *vnode, uint32_t uid)
+{
+ suid_cred_t sc = SUID_CRED_NULL;
+
+ assert(vnode != NULL);
+
+ sc = zalloc(suid_cred_zone);
+ if (sc != NULL) {
+ // Lazily allocated in convert_suid_cred_to_port().
+ sc->port = IP_NULL;
+ sc->vnode = vnode;
+ sc->uid = uid;
+ }
+
+ return sc;
+}
+
+static void
+suid_cred_free(suid_cred_t sc)
+{
+ assert(sc != NULL);
+ assert(sc->vnode != NULL);
+
+ vnode_put(sc->vnode);
+
+ sc->uid = UINT32_MAX;
+ sc->vnode = NULL;
+ sc->port = IP_NULL;
+
+ zfree(suid_cred_zone, sc);
+}
+
+void
+suid_cred_destroy(ipc_port_t port)
+{
+ suid_cred_t sc = NULL;
+
+ ip_lock(port);
+ assert(ip_kotype(port) == IKOT_SUID_CRED);
+ sc = (suid_cred_t)port->ip_kobject;
+ ipc_kobject_set_atomically(port, IKO_NULL, IKOT_NONE);
+ ip_unlock(port);
+
+ assert(sc->port == port);
+
+ suid_cred_free(sc);
+}
+
+void
+suid_cred_notify(mach_msg_header_t *msg)
+{
+ assert(msg->msgh_id == MACH_NOTIFY_NO_SENDERS);
+
+ mach_no_senders_notification_t *not = (mach_no_senders_notification_t *)msg;
+ ipc_port_t port = not->not_header.msgh_remote_port;
+
+ if (IP_VALID(port)) {
+ ipc_port_dealloc_kernel(port);
+ }
+}
+
+ipc_port_t
+convert_suid_cred_to_port(suid_cred_t sc)
+{
+ if (sc == NULL) {
+ return IP_NULL;
+ }
+
+ if (!ipc_kobject_make_send_lazy_alloc_port(&sc->port,
+ (ipc_kobject_t) sc, IKOT_SUID_CRED)) {
+ suid_cred_free(sc);
+ return IP_NULL;
+ }
+
+ return sc->port;
+}
+
+/*
+ * Verify the suid cred port. The cached vnode should match the passed vnode.
+ * The uid to be used to spawn the new process is returned in 'uid'.
+ */
+int
+suid_cred_verify(ipc_port_t port, struct vnode *vnode, uint32_t *uid)
+{
+ suid_cred_t sc = NULL;
+ int ret = -1;
+
+ if (!IP_VALID(port)) {
+ return -1;
+ }
+
+ ip_lock(port);
+
+ if (ip_kotype(port) != IKOT_SUID_CRED) {
+ ip_unlock(port);
+ return -1;
+ }
+
+ if (!ip_active(port)) {
+ ip_unlock(port);
+ return -1;
+ }
+
+ sc = (suid_cred_t)port->ip_kobject;
+
+ if (vnode != sc->vnode) {
+ ip_unlock(port);
+ return -1;
+ }
+
+ *uid = sc->uid;
+ ret = 0;
+
+ ipc_port_destroy(port);
+ return ret;
+}
+
+void
+suid_cred_init(void)
+{
+ const size_t sc_size = sizeof(struct suid_cred);
+ suid_cred_zone = zinit(sc_size, 1024 * sc_size, 0, "suid_cred");
+}
+
+kern_return_t
+task_create_suid_cred(
+ task_t task,
+ suid_cred_path_t path,
+ suid_cred_uid_t uid,
+ suid_cred_t *sc_p)
+{
+ suid_cred_t sc = NULL;
+ struct vnode *vnode;
+ int err = -1;
+
+ if (task == TASK_NULL || task != current_task()) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ // Task must have entitlement.
+ if (!IOTaskHasEntitlement(task, "com.apple.private.suid_cred")) {
+ return KERN_NO_ACCESS;
+ }
+
+ // Thread must be root owned.
+ if (!kauth_cred_issuser(kauth_cred_get())) {
+ return KERN_NO_ACCESS;
+ }
+
+ // Find the vnode for the path.
+ err = vnode_lookup(path, 0, &vnode, vfs_context_current());
+ if (err != 0) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ sc = suid_cred_alloc(vnode, uid);
+ if (sc == NULL) {
+ (void) vnode_put(vnode);
+ return KERN_RESOURCE_SHORTAGE;
+ }
+
+ *sc_p = sc;
+
+ return KERN_SUCCESS;
+}
--- /dev/null
+/*
+ * Copyright (c) 2019 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _KERN_SUID_CRED_H
+#define _KERN_SUID_CRED_H
+
+#if XNU_KERNEL_PRIVATE
+
+#include <kern/kern_types.h>
+#include <mach/mach_types.h>
+
+struct vnode;
+
+extern ipc_port_t convert_suid_cred_to_port(suid_cred_t);
+
+extern void suid_cred_init(void);
+
+extern void suid_cred_notify(mach_msg_header_t *msg);
+
+extern int suid_cred_verify(ipc_port_t port, struct vnode *vnode, uint32_t *uid);
+
+extern void suid_cred_destroy(ipc_port_t port);
+
+#endif /* XNU_KERNEL_PRIVATE */
+
+#endif /* _KERN_SUID_CRED_H */
KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SYSDIAGNOSE, SYSDIAGNOSE_NOTIFY_USER) | DBG_FUNC_START, 0, 0, 0, 0, 0);
- kr = send_sysdiagnose_notification(user_port, keycode);
+ kr = send_sysdiagnose_notification_with_audit_token(user_port, keycode);
ipc_port_release_send(user_port);
return kr;
}
require_ip_active(port);
assert(IKOT_TASK == ip_kotype(port));
- task = (task_t) port->ip_kobject;
+ task = (task_t) ip_get_kobject(port);
assert(task_is_a_corpse(task));
pmap_set_process(task->map->pmap, pid, procname);
#endif /* MACH_ASSERT */
- vm_map_remove(task->map,
- task->map->min_offset,
- task->map->max_offset,
- /*
- * Final cleanup:
- * + no unnesting
- * + remove immutable mappings
- * + allow gaps in range
- */
- (VM_MAP_REMOVE_NO_UNNESTING |
- VM_MAP_REMOVE_IMMUTABLE |
- VM_MAP_REMOVE_GAPS_OK));
+ vm_map_terminate(task->map);
/* release our shared region */
vm_shared_region_set(task, NULL);
task_unlock(task);
if (VM_CONFIG_COMPRESSOR_IS_PRESENT &&
+ (kr == KERN_SUCCESS) &&
(eval_only == FALSE)) {
vm_wake_compactor_swapper();
/*
int64_t io_delta = 0;
int64_t * global_counter_to_update;
boolean_t needs_telemetry = FALSE;
+ boolean_t is_external_device = FALSE;
int ledger_to_update = 0;
struct task_writes_counters * writes_counters_to_update;
global_counter_to_update = &global_logical_writes_count;
ledger_to_update = task_ledgers.logical_writes;
writes_counters_to_update = &task->task_writes_counters_internal;
+ is_external_device = FALSE;
} else {
global_counter_to_update = &global_logical_writes_to_external_count;
ledger_to_update = task_ledgers.logical_writes_to_external;
writes_counters_to_update = &task->task_writes_counters_external;
+ is_external_device = TRUE;
}
switch (flags) {
case TASK_WRITE_IMMEDIATE:
OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_immediate_writes));
ledger_credit(task->ledger, ledger_to_update, io_size);
- coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
+ if (!is_external_device) {
+ coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
+ }
break;
case TASK_WRITE_DEFERRED:
OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_deferred_writes));
ledger_credit(task->ledger, ledger_to_update, io_size);
- coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
+ if (!is_external_device) {
+ coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
+ }
break;
case TASK_WRITE_INVALIDATED:
OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_invalidated_writes));
ledger_debit(task->ledger, ledger_to_update, io_size);
- coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, FALSE, io_size);
+ if (!is_external_device) {
+ coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, FALSE, io_size);
+ }
break;
case TASK_WRITE_METADATA:
OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_metadata_writes));
ledger_credit(task->ledger, ledger_to_update, io_size);
- coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
+ if (!is_external_device) {
+ coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
+ }
break;
}
if (io_telemetry_limit != 0) {
/* If io_telemetry_limit is 0, disable global updates and I/O telemetry */
needs_telemetry = global_update_logical_writes(io_delta, global_counter_to_update);
- if (needs_telemetry) {
+ if (needs_telemetry && !is_external_device) {
act_set_io_telemetry_ast(current_thread());
}
}
uintptr_t frames[128];
bool user64_regs = false;
- int backtrace_error = backtrace_user(frames,
- sizeof(frames) / sizeof(frames[0]), &btcount, &user64_regs, NULL);
- if (backtrace_error) {
+ int bterror = 0;
+ btcount = backtrace_user(frames,
+ sizeof(frames) / sizeof(frames[0]), &bterror, &user64_regs, NULL);
+ if (bterror != 0) {
return;
}
bool user64_va = task_has_64Bit_addr(task);
os_refgrp_decl(static, thread_refgrp, "thread", NULL);
-void
+thread_t
thread_bootstrap(void)
{
/*
/* fiddle with init thread to skip asserts in set_sched_pri */
init_thread.sched_pri = MAXPRI_KERNEL;
- machine_set_current_thread(&init_thread);
+ return &init_thread;
+}
+
+void
+thread_machine_init_template(void)
+{
+ machine_thread_template_init(&thread_template);
}
extern boolean_t allow_qos_policy_set;
#define assert_thread_magic(thread) do { (void)(thread); } while (0)
#endif
-extern void thread_bootstrap(void);
+extern thread_t thread_bootstrap(void);
+
+extern void thread_machine_init_template(void);
extern void thread_init(void);
thread_t target,
boolean_t is_corpse);
-extern void machine_thread_init(void);
+extern void machine_thread_init(void);
+
+extern void machine_thread_template_init(thread_t thr_template);
extern kern_return_t machine_thread_create(
thread_t thread,
return NULL;
}
- work_interval = (struct work_interval *)port->ip_kobject;
+ work_interval = (struct work_interval *) ip_get_kobject(port);
wi_retain(work_interval);
port, port->ip_srights);
}
- work_interval = (struct work_interval *)port->ip_kobject;
+ work_interval = (struct work_interval *) ip_get_kobject(port);
if (work_interval == NULL) {
panic("work_interval_port_notify(): missing kobject: %p", port);
bool user64 = false;
bool trunc = false;
- int err = backtrace_thread_user(thread, cs->kpuc_frames,
- cs->kpuc_nframes - 1, &cs->kpuc_nframes, &user64, &trunc);
- cs->kpuc_flags = CALLSTACK_KERNEL_WORDS;
- if (user64) {
- cs->kpuc_flags |= CALLSTACK_64BIT;
- }
- if (trunc) {
- cs->kpuc_flags |= CALLSTACK_TRUNCATED;
- }
+ int error = 0;
+ /*
+ * Leave space for the fixup information.
+ */
+ unsigned int maxnframes = cs->kpuc_nframes - 1;
+ unsigned int nframes = backtrace_thread_user(thread, cs->kpuc_frames,
+ maxnframes, &error, &user64, &trunc);
+ cs->kpuc_nframes = MIN(maxnframes, nframes);
- if (!err || err == EFAULT) {
+ /*
+ * Ignore EFAULT to get as much of the stack as possible. It will be
+ * marked as truncated, below.
+ */
+ if (error == 0 || error == EFAULT) {
callstack_fixup_user(cs, thread);
cs->kpuc_flags |= CALLSTACK_VALID;
} else {
cs->kpuc_nframes = 0;
- BUF_INFO(PERF_CS_ERROR, ERR_GETSTACK, err);
+ BUF_INFO(PERF_CS_ERROR, ERR_GETSTACK, error);
}
+ cs->kpuc_flags |= CALLSTACK_KERNEL_WORDS | (user64 ? CALLSTACK_64BIT : 0) |
+ (trunc ? CALLSTACK_TRUNCATED : 0);
+
BUF_INFO(PERF_CS_USAMPLE | DBG_FUNC_END, (uintptr_t)thread_tid(thread),
cs->kpuc_flags, cs->kpuc_nframes);
}
};
#endif /* !__DARWIN_UNIX03 */
-#if !defined(RC_HIDE_XNU_J137)
/* defn of 256 bit YMM regs */
#if __DARWIN_UNIX03
char opmask_reg[8];
};
#endif /* !__DARWIN_UNIX03 */
-#endif /* not RC_HIDE_XNU_J137 */
/*
* Floating point state.
_STRUCT_XMM_REG __fpu_ymmh7; /* YMMH 7 */
};
-#if !defined(RC_HIDE_XNU_J137)
#define _STRUCT_X86_AVX512_STATE32 struct __darwin_i386_avx512_state
_STRUCT_X86_AVX512_STATE32
{
_STRUCT_YMM_REG __fpu_zmmh6; /* ZMMH 6 */
_STRUCT_YMM_REG __fpu_zmmh7; /* ZMMH 7 */
};
-#endif /* not RC_HIDE_XNU_J137 */
#else /* !__DARWIN_UNIX03 */
#define _STRUCT_X86_FLOAT_STATE32 struct i386_float_state
_STRUCT_XMM_REG fpu_ymmh7; /* YMMH 7 */
};
-#if !defined(RC_HIDE_XNU_J137)
#define _STRUCT_X86_AVX512_STATE32 struct i386_avx512_state
_STRUCT_X86_AVX512_STATE32
{
_STRUCT_YMM_REG fpu_zmmh6; /* ZMMH 6 */
_STRUCT_YMM_REG fpu_zmmh7; /* ZMMH 7 */
};
-#endif /* not RC_HIDE_XNU_J137 */
#endif /* !__DARWIN_UNIX03 */
_STRUCT_XMM_REG __fpu_ymmh15; /* YMMH 15 */
};
-#if !defined(RC_HIDE_XNU_J137)
#define _STRUCT_X86_AVX512_STATE64 struct __darwin_x86_avx512_state64
_STRUCT_X86_AVX512_STATE64
{
_STRUCT_ZMM_REG __fpu_zmm30; /* ZMM 30 */
_STRUCT_ZMM_REG __fpu_zmm31; /* ZMM 31 */
};
-#endif /* not RC_HIDE_XNU_J137 */
#else /* !__DARWIN_UNIX03 */
#define _STRUCT_X86_FLOAT_STATE64 struct x86_float_state64
_STRUCT_XMM_REG fpu_ymmh15; /* YMMH 15 */
};
-#if !defined(RC_HIDE_XNU_J137)
#define _STRUCT_X86_AVX512_STATE64 struct x86_avx512_state64
_STRUCT_X86_AVX512_STATE64
{
_STRUCT_ZMM_REG fpu_zmm30; /* ZMM 30 */
_STRUCT_ZMM_REG fpu_zmm31; /* ZMM 31 */
};
-#endif /* not RC_HIDE_XNU_J137 */
#endif /* !__DARWIN_UNIX03 */
typedef union {
struct x86_fx_thread_state fx;
struct x86_avx_thread_state avx;
-#if !defined(RC_HIDE_XNU_J137)
struct x86_avx512_thread_state avx512;
-#endif
} x86_ext_thread_state_t;
#define EVEX_PREFIX 0x62 /* AVX512's EVEX vector operation prefix */
#define _MACH_I386_THREAD_STATE_H_
/* Size of maximum exported thread state in words */
-#if !defined(RC_HIDE_XNU_J137)
#define I386_THREAD_STATE_MAX (614) /* Size of biggest state possible */
-#else
-#define I386_THREAD_STATE_MAX (224) /* Size of biggest state possible */
-#endif /* !defined(RC_HIDE_XNU_J137) */
#if defined (__i386__) || defined(__x86_64__)
#define THREAD_STATE_MAX I386_THREAD_STATE_MAX
#include <mach/mach_types.defs>
#include <mach_debug/mach_debug_types.defs>
+type kobject_description_t = c_string[*:512];
+
/*
* Returns the set of port and port set names
* to which the target task has access, along with
new_guard : uint64_t);
#endif
+/*
+ * Return the type and address of the kernel object
+ * that the given send/receive right represents.
+ * This call is only valid on MACH_IPC_DEBUG kernels.
+ * Otherwise, KERN_FAILURE is returned.
+ */
+routine mach_port_kobject_description(
+ task : ipc_space_inspect_t;
+ name : mach_port_name_t;
+ out object_type : natural_t;
+ out object_addr : mach_vm_address_t;
+ out description : kobject_description_t);
+
/* vim: set ft=c : */
#endif /* KERNEL_SERVER */
;
+type suid_cred_path_t = c_string[*:1024];
+type suid_cred_uid_t = uint32_t;
+type suid_cred_t = mach_port_t
+#if KERNEL_SERVER
+ outtran: mach_port_t convert_suid_cred_to_port(suid_cred_t)
+#endif /* KERNEL_SERVER */
+ ;
+
+
/* thread_state_t: This inline array can hold
* a machine-dependent amount of data, defined in
* mach/machine/???? (currently THREAD_STATE_MAX,
simport <kern/ipc_mig.h>; /* pick up kernel-specific MIG things */
+simport <kern/suid_cred.h>;
#endif /* KERNEL_SERVER */
import <mach/mig.h>;
typedef struct alarm *alarm_t;
typedef struct clock *clock_serv_t;
typedef struct clock *clock_ctrl_t;
-
typedef struct arcade_register *arcade_register_t;
+typedef struct suid_cred *suid_cred_t;
/*
* OBSOLETE: lock_set interfaces are obsolete.
struct ledger;
struct alarm;
struct clock;
-
struct arcade_register;
+struct suid_cred;
__END_DECLS
typedef mach_port_t alarm_t;
typedef mach_port_t clock_serv_t;
typedef mach_port_t clock_ctrl_t;
-
typedef mach_port_t arcade_register_t;
+typedef mach_port_t suid_cred_t;
+
#endif /* KERNEL */
/*
typedef exception_handler_array_t exception_port_arrary_t;
typedef char vfs_path_t[4096];
typedef char nspace_path_t[1024]; /* 1024 == PATH_MAX */
+typedef char suid_cred_path_t[1024];
+typedef uint32_t suid_cred_uid_t;
#ifdef KERNEL
#define TASK_NULL ((task_t) NULL)
#define CLOCK_NULL ((clock_t) NULL)
#define UND_SERVER_NULL ((UNDServerRef) NULL)
#define ARCADE_REG_NULL ((arcade_register_t) NULL)
+#define SUID_CRED_NULL ((suid_cred_t) NULL)
#else
#define TASK_NULL ((task_t) 0)
#define TASK_NAME_NULL ((task_name_t) 0)
#define CLOCK_NULL ((clock_t) 0)
#define UND_SERVER_NULL ((UNDServerRef) 0)
#define ARCADE_REG_NULL ((arcade_register_t) 0)
+#define SUID_CRED_NULL ((suid_cred_t) 0)
#endif
/* DEPRECATED */
sysdiagnose_port : mach_port_t;
flags : uint32_t);
+simpleroutine sysdiagnose_notification_with_audit_token(
+ sysdiagnose_port : mach_port_t;
+ flags : uint32_t;
+ ServerAuditToken atoken : audit_token_t);
+
/* vim: set ft=c : */
task : task_t;
behavior : task_exc_guard_behavior_t);
+routine task_create_suid_cred(
+ task : task_t;
+ path : suid_cred_path_t;
+ uid : suid_cred_uid_t;
+ out delegation : suid_cred_t);
+
/* vim: set ft=c : */
struct mach_core_details files[MACH_CORE_FILEHEADER_MAXFILES];
};
+#define KOBJECT_DESCRIPTION_LENGTH 512
+typedef char kobject_description_t[KOBJECT_DESCRIPTION_LENGTH];
#endif /* _MACH_DEBUG_MACH_DEBUG_TYPES_H_ */
if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
vm_named_entry_t named_entry;
- named_entry = (vm_named_entry_t)port->ip_kobject;
+ named_entry = (vm_named_entry_t) ip_get_kobject(port);
/* a few checks to make sure user is obeying rules */
if (*upl_size == 0) {
if (offset >= named_entry->size) {
ip_unlock(port);
return (upl_t)NULL;
}
- upl = (upl_t) port->ip_kobject;
+ upl = (upl_t) ip_get_kobject(port);
ip_unlock(port);
upl_lock(upl);
upl->ref_count += 1;
extern boolean_t vm_darkwake_mode;
+#if DEVELOPMENT || DEBUG
+int do_cseg_wedge_thread(void);
+int do_cseg_unwedge_thread(void);
+static event_t debug_cseg_wait_event = NULL;
+#endif /* DEVELOPMENT || DEBUG */
+
#if POPCOUNT_THE_COMPRESSED_DATA
boolean_t popcount_c_segs = TRUE;
compressor_pool_size = ((kernel_map->max_offset - kernel_map->min_offset) - kernel_map->size) - VM_RESERVE_SIZE;
}
compressor_pool_multiplier = 1;
+
+#elif defined(__arm64__) && defined(XNU_TARGET_OS_WATCH)
+
+ /*
+ * On M9 watches the compressor can become big and can lead to
+ * churn in workingset resulting in audio drops. Setting a cap
+ * on the compressor size favors reclaiming unused memory
+ * sitting in idle band via jetsams
+ */
+
+#define COMPRESSOR_CAP_PERCENTAGE 30ULL
+
+ if (compressor_pool_max_size > max_mem) {
+ compressor_pool_max_size = max_mem;
+ }
+
+ if (vm_compression_limit == 0) {
+ compressor_pool_size = (max_mem * COMPRESSOR_CAP_PERCENTAGE) / 100ULL;
+ }
+ compressor_pool_multiplier = 1;
+
#else
+
if (compressor_pool_max_size > max_mem) {
compressor_pool_max_size = max_mem;
}
return c_seg_freed;
}
+void
+kdp_compressor_busy_find_owner(event64_t wait_event, thread_waitinfo_t *waitinfo)
+{
+ c_segment_t c_seg = (c_segment_t) wait_event;
+
+ waitinfo->owner = thread_tid(c_seg->c_busy_for_thread);
+ waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(c_seg);
+}
+
+#if DEVELOPMENT || DEBUG
+int
+do_cseg_wedge_thread(void)
+{
+ struct c_segment c_seg;
+ c_seg.c_busy_for_thread = current_thread();
+
+ debug_cseg_wait_event = (event_t) &c_seg;
+
+ thread_set_pending_block_hint(current_thread(), kThreadWaitCompressor);
+ assert_wait((event_t) (&c_seg), THREAD_INTERRUPTIBLE);
+
+ thread_block(THREAD_CONTINUE_NULL);
+
+ return 0;
+}
+
+int
+do_cseg_unwedge_thread(void)
+{
+ thread_wakeup(debug_cseg_wait_event);
+ debug_cseg_wait_event = NULL;
+
+ return 0;
+}
+#endif /* DEVELOPMENT || DEBUG */
void
c_seg_wait_on_busy(c_segment_t c_seg)
{
c_seg->c_wanted = 1;
+
+ thread_set_pending_block_hint(current_thread(), kThreadWaitCompressor);
assert_wait((event_t) (c_seg), THREAD_UNINT);
lck_mtx_unlock_always(&c_seg->c_lock);
unsigned int cseg_swap_size;
#endif /* CHECKSUM_THE_SWAP */
-#if MACH_ASSERT
thread_t c_busy_for_thread;
-#endif /* MACH_ASSERT */
int c_slot_var_array_len;
struct c_slot *c_slot_var_array;
assert((cseg)->c_busy); \
(cseg)->c_busy = 0; \
assert((cseg)->c_busy_for_thread != NULL); \
- assert((((cseg)->c_busy_for_thread = NULL), TRUE)); \
+ (cseg)->c_busy_for_thread = NULL; \
if ((cseg)->c_wanted) { \
(cseg)->c_wanted = 0; \
thread_wakeup((event_t) (cseg)); \
assert((cseg)->c_busy == 0); \
(cseg)->c_busy = 1; \
assert((cseg)->c_busy_for_thread == NULL); \
- assert((((cseg)->c_busy_for_thread = current_thread()), TRUE)); \
+ (cseg)->c_busy_for_thread = current_thread(); \
MACRO_END
extern uint64_t vm_compressor_compute_elapsed_msecs(clock_sec_t, clock_nsec_t, clock_sec_t, clock_nsec_t);
+extern void kdp_compressor_busy_find_owner(event64_t wait_event, thread_waitinfo_t *waitinfo);
+
#define PAGE_REPLACEMENT_DISALLOWED(enable) (enable == TRUE ? lck_rw_lock_shared(&c_master_lock) : lck_rw_done(&c_master_lock))
#define PAGE_REPLACEMENT_ALLOWED(enable) (enable == TRUE ? lck_rw_lock_exclusive(&c_master_lock) : lck_rw_done(&c_master_lock))
uint64_t cupid = get_current_unique_pid();
uintptr_t bpc = 0;
- uint32_t bfrs = 0;
+ int btr = 0;
bool u64 = false;
/* Capture a single-frame backtrace; this extracts just the program
* further user stack traversals, thus avoiding copyin()s and further
* faults.
*/
- int btr = backtrace_thread_user(cthread, &bpc, 1U, &bfrs, &u64, NULL);
+ unsigned int bfrs = backtrace_thread_user(cthread, &bpc, 1U, &btr, &u64, NULL);
if ((btr == 0) && (bfrs > 0)) {
cfpc = bpc;
result->map_disallow_data_exec = FALSE;
result->is_nested_map = FALSE;
result->map_disallow_new_exec = FALSE;
+ result->terminated = FALSE;
result->highest_entry_end = 0;
result->first_free = vm_map_to_entry(result);
result->hint = vm_map_to_entry(result);
} else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
vm_named_entry_t named_entry;
- named_entry = (vm_named_entry_t) port->ip_kobject;
+ named_entry = (vm_named_entry_t) ip_get_kobject(port);
if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
VM_FLAGS_RETURN_4K_DATA_ADDR)) {
const vm_map_offset_t FIND_GAP = 1; /* a not page aligned value */
const vm_map_offset_t GAPS_OK = 2; /* a different not page aligned value */
- if (map != kernel_map && !(flags & VM_MAP_REMOVE_GAPS_OK)) {
+ if (map != kernel_map && !(flags & VM_MAP_REMOVE_GAPS_OK) && !map->terminated) {
gap_start = FIND_GAP;
} else {
gap_start = GAPS_OK;
return KERN_SUCCESS;
}
+
+/*
+ * vm_map_terminate:
+ *
+ * Clean out a task's map.
+ */
+kern_return_t
+vm_map_terminate(
+ vm_map_t map)
+{
+ vm_map_lock(map);
+ map->terminated = TRUE;
+ vm_map_unlock(map);
+
+ return vm_map_remove(map,
+ map->min_offset,
+ map->max_offset,
+ /*
+ * Final cleanup:
+ * + no unnesting
+ * + remove immutable mappings
+ * + allow gaps in range
+ */
+ (VM_MAP_REMOVE_NO_UNNESTING |
+ VM_MAP_REMOVE_IMMUTABLE |
+ VM_MAP_REMOVE_GAPS_OK));
+}
+
/*
* vm_map_remove:
*
if (ip_active(port) && (ip_kotype(port)
== IKOT_NAMED_ENTRY)) {
named_entry =
- (vm_named_entry_t)port->ip_kobject;
+ (vm_named_entry_t) ip_get_kobject(port);
if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
ip_unlock(port);
ip_lock(port);
if (ip_active(port) &&
(ip_kotype(port) == IKOT_NAMED_ENTRY)) {
- named_entry = (vm_named_entry_t)port->ip_kobject;
+ named_entry = (vm_named_entry_t) ip_get_kobject(port);
if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
ip_unlock(port);
try_failed_count++;
}
}
+ *shared_count = (unsigned int) ((dirty_shared_count * PAGE_SIZE_64) / (1024 * 1024ULL));
if (evaluation_phase) {
unsigned int shared_pages_threshold = (memorystatus_freeze_shared_mb_per_process_max * 1024 * 1024ULL) / PAGE_SIZE_64;
goto again;
} else {
kr = KERN_SUCCESS;
- *shared_count = (unsigned int) ((dirty_shared_count * PAGE_SIZE_64) / (1024 * 1024ULL));
}
done:
/* boolean_t */ map_disallow_new_exec:1, /* Disallow new executable code */
/* boolean_t */ jit_entry_exists:1,
/* boolean_t */ has_corpse_footprint:1,
- /* reserved */ pad:20;
+ /* boolean_t */ terminated:1,
+ /* reserved */ pad:19;
unsigned int timestamp; /* Version number */
};
vm_prot_t max_protection,
vm_inherit_t inheritance);
+extern kern_return_t vm_map_terminate(
+ vm_map_t map);
+
#endif /* !XNU_KERNEL_PRIVATE */
/* Deallocate a region */
assert(shared_region->sr_ref_count > 1);
sr_handle = shared_region->sr_mem_entry;
- sr_mem_entry = (vm_named_entry_t) sr_handle->ip_kobject;
+ sr_mem_entry = (vm_named_entry_t) ip_get_kobject(sr_handle);
sr_map = sr_mem_entry->backing.map;
assert(sr_mem_entry->is_sub_map);
assert(!shared_region->sr_persists);
assert(!shared_region->sr_slid);
- mem_entry = (vm_named_entry_t) shared_region->sr_mem_entry->ip_kobject;
+ mem_entry = (vm_named_entry_t) ip_get_kobject(shared_region->sr_mem_entry);
assert(mem_entry->is_sub_map);
assert(!mem_entry->internal);
assert(!mem_entry->is_copy);
/* no need to lock because this data is never modified... */
sr_handle = shared_region->sr_mem_entry;
- sr_mem_entry = (vm_named_entry_t) sr_handle->ip_kobject;
+ sr_mem_entry = (vm_named_entry_t) ip_get_kobject(sr_handle);
sr_map = sr_mem_entry->backing.map;
sr_base_address = shared_region->sr_base_address;
}
/* no need to lock because this data is never modified... */
sr_handle = shared_region->sr_mem_entry;
- sr_mem_entry = (vm_named_entry_t) sr_handle->ip_kobject;
+ sr_mem_entry = (vm_named_entry_t) ip_get_kobject(sr_handle);
sr_map = sr_mem_entry->backing.map;
sr_base_address = shared_region->sr_base_address;
}
sr_handle = shared_region->sr_mem_entry;
- sr_mem_entry = (vm_named_entry_t) sr_handle->ip_kobject;
+ sr_mem_entry = (vm_named_entry_t) ip_get_kobject(sr_handle);
sr_map = sr_mem_entry->backing.map;
/* Trim the pmap if possible. */
/* create the 32 bit comm text page */
unsigned int offset = (random() % _PFZ32_SLIDE_RANGE) << PAGE_SHIFT; /* restricting to 32bMAX-2PAGE */
_vm_commpage_init(&commpage_text32_handle, _COMM_PAGE_TEXT_AREA_LENGTH);
- commpage_text32_entry = (vm_named_entry_t) commpage_text32_handle->ip_kobject;
+ commpage_text32_entry = (vm_named_entry_t) ip_get_kobject(commpage_text32_handle);
commpage_text32_map = commpage_text32_entry->backing.map;
commpage_text32_location = (user32_addr_t) (_COMM_PAGE32_TEXT_START + offset);
/* XXX if (cpu_is_64bit_capable()) ? */
/* create the 64-bit comm page */
offset = (random() % _PFZ64_SLIDE_RANGE) << PAGE_SHIFT; /* restricting sliding upto 2Mb range */
_vm_commpage_init(&commpage_text64_handle, _COMM_PAGE_TEXT_AREA_LENGTH);
- commpage_text64_entry = (vm_named_entry_t) commpage_text64_handle->ip_kobject;
+ commpage_text64_entry = (vm_named_entry_t) ip_get_kobject(commpage_text64_handle);
commpage_text64_map = commpage_text64_entry->backing.map;
commpage_text64_location = (user64_addr_t) (_COMM_PAGE64_TEXT_START + offset);
#if defined(__i386__) || defined(__x86_64__)
/* create the 32-bit comm page */
_vm_commpage_init(&commpage32_handle, _COMM_PAGE32_AREA_LENGTH);
- commpage32_entry = (vm_named_entry_t) commpage32_handle->ip_kobject;
+ commpage32_entry = (vm_named_entry_t) ip_get_kobject(commpage32_handle);
commpage32_map = commpage32_entry->backing.map;
/* XXX if (cpu_is_64bit_capable()) ? */
/* create the 64-bit comm page */
_vm_commpage_init(&commpage64_handle, _COMM_PAGE64_AREA_LENGTH);
- commpage64_entry = (vm_named_entry_t) commpage64_handle->ip_kobject;
+ commpage64_entry = (vm_named_entry_t) ip_get_kobject(commpage64_handle);
commpage64_map = commpage64_entry->backing.map;
#endif /* __i386__ || __x86_64__ */
if (IP_VALID(parent_handle) &&
ip_kotype(parent_handle) == IKOT_NAMED_ENTRY) {
- parent_entry = (vm_named_entry_t) parent_handle->ip_kobject;
+ parent_entry = (vm_named_entry_t) ip_get_kobject(parent_handle);
} else {
parent_entry = NULL;
}
return KERN_INVALID_ARGUMENT;
}
- mem_entry = (vm_named_entry_t) entry_port->ip_kobject;
+ mem_entry = (vm_named_entry_t) ip_get_kobject(entry_port);
named_entry_lock(mem_entry);
return KERN_INVALID_ARGUMENT;
}
- mem_entry = (vm_named_entry_t) entry_port->ip_kobject;
+ mem_entry = (vm_named_entry_t) ip_get_kobject(entry_port);
named_entry_lock(mem_entry);
ip_kotype(entry_port) != IKOT_NAMED_ENTRY) {
return KERN_INVALID_ARGUMENT;
}
- mem_entry = (vm_named_entry_t) entry_port->ip_kobject;
+ mem_entry = (vm_named_entry_t) ip_get_kobject(entry_port);
named_entry_lock(mem_entry);
return KERN_INVALID_ARGUMENT;
}
- mem_entry = (vm_named_entry_t) entry_port->ip_kobject;
+ mem_entry = (vm_named_entry_t) ip_get_kobject(entry_port);
named_entry_lock(mem_entry);
#if MACH_ASSERT
assert(ip_kotype(port) == IKOT_NAMED_ENTRY);
#endif /* MACH_ASSERT */
- named_entry = (vm_named_entry_t)port->ip_kobject;
+ named_entry = (vm_named_entry_t) ip_get_kobject(port);
named_entry_lock(named_entry);
named_entry->ref_count -= 1;
lck_mtx_unlock(&vm_named_entry_list_lock_data);
#endif /* VM_NAMED_ENTRY_LIST */
- kfree(port->ip_kobject,
- sizeof(struct vm_named_entry));
+ kfree(named_entry, sizeof(struct vm_named_entry));
} else {
named_entry_unlock(named_entry);
}
return KERN_INVALID_ARGUMENT;
}
- mem_entry = (vm_named_entry_t) entry_port->ip_kobject;
+ mem_entry = (vm_named_entry_t) ip_get_kobject(entry_port);
named_entry_lock(mem_entry);
return KERN_INVALID_ARGUMENT;
}
- mem_entry = (vm_named_entry_t) entry_port->ip_kobject;
+ mem_entry = (vm_named_entry_t) ip_get_kobject(entry_port);
named_entry_lock(mem_entry);
#include <sys/errno.h>
#include <sys/monotonic.h>
#include <x86_64/monotonic.h>
+#include <kern/kpc.h>
/*
* Sanity check the compiler.
enable_counters(void)
{
wrmsr64(FIXED_CTR_CTRL, FIXED_CTR_CTRL_INIT | FIXED_CTR_CTRL_ENABLE);
- wrmsr64(GLOBAL_CTRL, GLOBAL_CTRL_FIXED_EN);
+
+ uint64_t global_en = GLOBAL_CTRL_FIXED_EN;
+ if (kpc_get_running() & KPC_CLASS_CONFIGURABLE_MASK) {
+ global_en |= kpc_get_configurable_pmc_mask(KPC_CLASS_CONFIGURABLE_MASK);
+ }
+
+ wrmsr64(GLOBAL_CTRL, global_en);
}
static void
return 0;
}
+int mac_mount_check_snapshot_mount(vfs_context_t ctx, struct vnode *rvp, struct vnode *vp, struct componentname *cnp,
+ const char *name, const char *vfc_name);
+int
+mac_mount_check_snapshot_mount(vfs_context_t ctx __unused, struct vnode *rvp __unused, struct vnode *vp __unused,
+ struct componentname *cnp __unused, const char *name __unused, const char *vfc_name __unused)
+{
+ return 0;
+}
+
int mac_vnode_check_trigger_resolve(vfs_context_t ctx __unused, struct vnode *dvp __unused, struct componentname *cnp __unused);
int
mac_vnode_check_trigger_resolve(vfs_context_t ctx __unused, struct vnode *dvp __unused, struct componentname *cnp __unused)
const char *name);
int mac_mount_check_snapshot_delete(vfs_context_t ctx, struct mount *mp,
const char *name);
+int mac_mount_check_snapshot_mount(vfs_context_t ctx, struct vnode *rvp,
+ struct vnode *vp, struct componentname *cnp, const char *name,
+ const char *vfc_name);
int mac_mount_check_snapshot_revert(vfs_context_t ctx, struct mount *mp,
const char *name);
int mac_mount_check_remount(vfs_context_t ctx, struct mount *mp);
struct mount *mp,
const char *name
);
+/**
+ * @brief Access control check for fs_snapshot_mount
+ * @param cred Subject credential
+ * @param rvp Vnode of either the root directory of the
+ * filesystem to mount snapshot of, or the device from
+ * which to mount the snapshot.
+ * @param vp Vnode that is to be the mount point
+ * @param cnp Component name for vp
+ * @param name Name of snapshot to mount
+ * @param vfc_name Filesystem type name
+ *
+ * Determine whether the subject identified by the credential can
+ * mount the named snapshot from the filesystem at the given
+ * directory.
+ *
+ * @return Return 0 if access is granted, otherwise an appropriate value
+ * for errno should be returned.
+ */
+typedef int mpo_mount_check_snapshot_mount_t(
+ kauth_cred_t cred,
+ struct vnode *rvp,
+ struct vnode *vp,
+ struct componentname *cnp,
+ const char *name,
+ const char *vfc_name
+ );
/**
* @brief Access control check for fs_snapshot_revert
* @param cred Subject credential
* Please note that this should be kept in sync with the check assumptions
* policy in bsd/kern/policy_check.c (policy_ops struct).
*/
-#define MAC_POLICY_OPS_VERSION 59 /* inc when new reserved slots are taken */
+#define MAC_POLICY_OPS_VERSION 62 /* inc when new reserved slots are taken */
struct mac_policy_ops {
mpo_audit_check_postselect_t *mpo_audit_check_postselect;
mpo_audit_check_preselect_t *mpo_audit_check_preselect;
mpo_vnode_check_trigger_resolve_t *mpo_vnode_check_trigger_resolve;
mpo_mount_check_mount_late_t *mpo_mount_check_mount_late;
- mpo_reserved_hook_t *mpo_reserved1;
+ mpo_mount_check_snapshot_mount_t *mpo_mount_check_snapshot_mount;
mpo_reserved_hook_t *mpo_reserved2;
mpo_skywalk_flow_check_connect_t *mpo_skywalk_flow_check_connect;
mpo_skywalk_flow_check_listen_t *mpo_skywalk_flow_check_listen;
* KDBG_EVENTID(DBG_FSYSTEM, DBG_VFS, dcode) global event id, see bsd/sys/kdebug.h.
* Note that dcode is multiplied by 4 and ORed as part of the construction. See bsd/kern/trace_codes
* for list of system-wide {global event id, name} pairs. Currently DBG_VFS event ids are in range
- * [0x3130000, 0x313016C].
+ * [0x3130000, 0x3130170].
*/
//#define VFS_TRACE_POLICY_OPS
return error;
}
+int
+mac_mount_check_snapshot_mount(vfs_context_t ctx, struct vnode *rvp, struct vnode *vp, struct componentname *cnp,
+ const char *name, const char *vfc_name)
+{
+ kauth_cred_t cred;
+ int error;
+
+#if SECURITY_MAC_CHECK_ENFORCE
+ /* 21167099 - only check if we allow write */
+ if (!mac_vnode_enforce) {
+ return 0;
+ }
+#endif
+ cred = vfs_context_ucred(ctx);
+ if (!mac_cred_check_enforce(cred)) {
+ return 0;
+ }
+ VFS_KERNEL_DEBUG_START1(92, vp);
+ MAC_CHECK(mount_check_snapshot_mount, cred, rvp, vp, cnp, name, vfc_name);
+ VFS_KERNEL_DEBUG_END1(92, vp);
+ return error;
+}
+
int
mac_mount_check_snapshot_revert(vfs_context_t ctx, struct mount *mp,
const char *name)
kdebug: INVALID_ARCHS = i386
kdebug: OTHER_LDFLAGS = -framework ktrace -ldarwintest_utils -framework kperf
-EXCLUDED_SOURCES += drop_priv.c kperf_helpers.c xnu_quick_test_helpers.c memorystatus_assertion_helpers.c
+EXCLUDED_SOURCES += drop_priv.c kperf_helpers.c xnu_quick_test_helpers.c memorystatus_assertion_helpers.c bpflib.c in_cksum.c
ifneq ($(PLATFORM),iPhoneOS)
EXCLUDED_SOURCES += jumbo_va_spaces_28530648.c perf_compressor.c memorystatus_freeze_test.c
memorystatus_zone_test: OTHER_LDFLAGS += -framework ktrace
memorystatus_zone_test: OTHER_LDFLAGS += -ldarwintest_utils
-kpc: OTHER_LDFLAGS += -framework kperf
+kpc: OTHER_LDFLAGS += -framework kperf -framework ktrace
+kpc: INVALID_ARCHS = i386
kperf: INVALID_ARCHS = i386
kperf: OTHER_CFLAGS += kperf_helpers.c
net_tuntests: CODE_SIGN_ENTITLEMENTS = network_entitlements.plist
+net_bridge: OTHER_CFLAGS += bpflib.c in_cksum.c
+net_bridge: OTHER_LDFLAGS += -ldarwintest_utils
+
ifneq (osx,$(TARGET_NAME))
EXCLUDED_SOURCES += no32exec_35914211.c no32exec_35914211_helper.c
else # target = osx
prng: OTHER_LDFLAGS += -ldarwintest_utils
+OTHER_TEST_TARGETS += io_catalog_send_data
+
+io_catalog_send_data: INVALID_ARCHS = i386
+io_catalog_send_data: OTHER_CFLAGS += -DTEST_UNENTITLED -framework IOKit -framework CoreFoundation -framework Foundation
+io_catalog_send_data: iokit/io_catalog_send_data.m
+ $(CC) $(DT_CFLAGS) $(OTHER_CFLAGS) $(CFLAGS) $(DT_LDFLAGS) $(OTHER_LDFLAGS) $(LDFLAGS) $< -o $(SYMROOT)/$@
+
+task_create_suid_cred: CODE_SIGN_ENTITLEMENTS = ./task_create_suid_cred_entitlement.plist
+
+OTHER_TEST_TARGETS += task_create_suid_cred_unentitled
+task_create_suid_cred_unentitled: OTHER_CFLAGS += -DUNENTITLED
+task_create_suid_cred_unentitled: task_create_suid_cred.c
+ $(CC) $(DT_CFLAGS) $(OTHER_CFLAGS) $(CFLAGS) $(DT_LDFLAGS) $(OTHER_LDFLAGS) $(LDFLAGS) $< -o $(SYMROOT)/$@
+
include $(DEVELOPER_DIR)/AppleInternal/Makefiles/darwintest/Makefile.targets
--- /dev/null
+/*
+ * Copyright (c) 2000-2018 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <net/bpf.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <errno.h>
+#include <net/if.h>
+#include <stdbool.h>
+#define PRIVATE_EXTERN __private_extern__
+
+#include "bpflib.h"
+
+#ifdef TESTING
+#include "util.h"
+#endif /* TESTING */
+
+PRIVATE_EXTERN int
+bpf_set_timeout(int fd, struct timeval * tv_p)
+{
+ return ioctl(fd, BIOCSRTIMEOUT, tv_p);
+}
+
+PRIVATE_EXTERN int
+bpf_get_blen(int fd, int * blen)
+{
+ return ioctl(fd, BIOCGBLEN, blen);
+}
+
+PRIVATE_EXTERN int
+bpf_set_header_complete(int fd, u_int header_complete)
+{
+ return ioctl(fd, BIOCSHDRCMPLT, &header_complete);
+}
+
+PRIVATE_EXTERN int
+bpf_set_see_sent(int fd, u_int see_sent)
+{
+ return ioctl(fd, BIOCSSEESENT, &see_sent);
+}
+
+PRIVATE_EXTERN int
+bpf_dispose(int bpf_fd)
+{
+ if (bpf_fd >= 0) {
+ return close(bpf_fd);
+ }
+ return 0;
+}
+
+PRIVATE_EXTERN int
+bpf_new(void)
+{
+ char bpfdev[256];
+ int i;
+ int fd = -1;
+
+ for (i = 0; true; i++) {
+ snprintf(bpfdev, sizeof(bpfdev), "/dev/bpf%d", i);
+ fd = open(bpfdev, O_RDWR, 0);
+ if (fd >= 0) {
+#ifdef SO_TC_CTL
+ int tc = SO_TC_CTL;
+ (void) ioctl(fd, BIOCSETTC, &tc);
+#endif /* SO_TC_CTL */
+ break;
+ }
+ if (errno != EBUSY) {
+ break;
+ }
+ }
+ return fd;
+}
+
+PRIVATE_EXTERN int
+bpf_setif(int fd, const char * en_name)
+{
+ struct ifreq ifr;
+
+ strlcpy(ifr.ifr_name, en_name, sizeof(ifr.ifr_name));
+ return ioctl(fd, BIOCSETIF, &ifr);
+}
+
+PRIVATE_EXTERN int
+bpf_set_immediate(int fd, u_int value)
+{
+ return ioctl(fd, BIOCIMMEDIATE, &value);
+}
+
+PRIVATE_EXTERN int
+bpf_filter_receive_none(int fd)
+{
+ struct bpf_insn insns[] = {
+ BPF_STMT(BPF_RET + BPF_K, 0),
+ };
+ struct bpf_program prog;
+
+ prog.bf_len = sizeof(insns) / sizeof(struct bpf_insn);
+ prog.bf_insns = insns;
+ return ioctl(fd, BIOCSETF, &prog);
+}
+
+PRIVATE_EXTERN int
+bpf_arp_filter(int fd, int type_offset, int type, u_int pkt_size)
+{
+ struct bpf_insn insns[] = {
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, type_offset),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, type, 0, 1),
+ BPF_STMT(BPF_RET + BPF_K, pkt_size),
+ BPF_STMT(BPF_RET + BPF_K, 0),
+ };
+ struct bpf_program prog;
+
+ prog.bf_len = sizeof(insns) / sizeof(struct bpf_insn);
+ prog.bf_insns = insns;
+ return ioctl(fd, BIOCSETF, &prog);
+}
+
+#ifdef TESTING
+#include <net/if_arp.h>
+#include <net/ethernet.h>
+#include <netinet/if_ether.h>
+
+
+void
+bpf_read_continuously(int fd, u_int blen)
+{
+ int n;
+ char * rxbuf = malloc(blen);
+
+ printf("rx buf len is %d\n", blen);
+ while (1) {
+ n = read(fd, rxbuf, blen);
+ if (n < 0) {
+ perror("bpf_read_continuously");
+ return;
+ }
+ if (n == 0) {
+ continue;
+ }
+ print_data(rxbuf, n);
+ }
+}
+
+int
+main(int argc, char * argv[])
+{
+ int fd = bpf_new();
+ char * en_name = "en0";
+ u_int bpf_blen = 0;
+
+ if (fd < 0) {
+ perror("no bpf devices");
+ exit(1);
+ }
+
+ if (argc > 1) {
+ en_name = argv[1];
+ }
+ (void)bpf_set_immediate(fd, 1);
+ if (bpf_arp_filter(fd, 12, ETHERTYPE_ARP,
+ sizeof(struct ether_arp) + sizeof(struct ether_header))
+ < 0) {
+ perror("bpf_arp_filter");
+ }
+ if (bpf_setif(fd, en_name) < 0) {
+ perror("bpf_attach");
+ exit(1);
+ }
+
+ if (bpf_get_blen(fd, &bpf_blen) < 0) {
+ perror("bpf_get_blen");
+ exit(1);
+ }
+ bpf_read_continuously(fd, bpf_blen);
+ exit(0);
+ return 0;
+}
+#endif /* TESTING */
--- /dev/null
+/*
+ * Copyright (c) 2000 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+
+#ifndef _S_BPFLIB_H
+#define _S_BPFLIB_H
+
+int bpf_get_blen(int fd, int * blen);
+int bpf_new(void);
+int bpf_dispose(int fd);
+int bpf_setif(int fd, const char * en_name);
+int bpf_set_immediate(int fd, u_int value);
+int bpf_filter_receive_none(int fd);
+int bpf_arp_filter(int fd, int type_offset, int type, u_int packet_size);
+int bpf_set_timeout(int fd, struct timeval * tv_p);
+int bpf_set_header_complete(int fd, u_int header_complete);
+int bpf_set_see_sent(int fd, u_int see_send);
+
+#endif /* _S_BPFLIB_H */
--- /dev/null
+#include <darwintest.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <darwintest_utils.h>
+#include <mach/vm_page_size.h>
+
+/** Verify that F_ADDSIGS does not page fault off the end of the user blob
+ * 1. Find VA space for 3 pages
+ * 2. Unmap the last page
+ * 3. Start fs_blob_start at PAGE_SIZE + 1 bytes away from the end of the
+ * VA region (such that any read of more than PAGE_SIZE + 1 bytes will fault)
+ * 4. Call fcntl with the arguments and verify the output is not EFAULT
+ */
+T_DECL(fcntl_addsig, "Verify that fcntl(F_ADDSIGS) doesn't EFAULT", T_META_NAMESPACE("xnu.vfs")) {
+ void* blob_space = mmap(NULL, vm_page_size * 3, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ T_ASSERT_NE(blob_space, MAP_FAILED, "Blob Region: %p [%zd]", blob_space, vm_page_size);
+
+ T_ASSERT_POSIX_SUCCESS(munmap((char*)blob_space + (vm_page_size * 2), vm_page_size), NULL);
+
+ size_t blob_size = vm_page_size + 1;
+ char* blob_start = ((char*)blob_space) + (vm_page_size * 2) - blob_size;
+ fsignatures_t args = { .fs_file_start = 0, .fs_blob_start = blob_start, .fs_blob_size = blob_size};
+
+ // Create test file to operate on
+ const char * tmp_dir = dt_tmpdir();
+ char tmp_file_name[PATH_MAX];
+ sprintf(tmp_file_name, "%s/foo", tmp_dir);
+ FILE* tmp_file = fopen(tmp_file_name, "wx");
+ fprintf(tmp_file, "Just some random content");
+ fclose(tmp_file);
+
+ int fd = open(tmp_file_name, O_RDONLY);
+ T_ASSERT_POSIX_SUCCESS(fd, "tmp file: %s", tmp_file_name);
+
+ // This command will fail, but should not fail with EFAULT
+ int result = fcntl(fd, F_ADDSIGS, &args);
+ int error = errno;
+ T_QUIET; T_EXPECT_EQ(result, -1, NULL);
+ // EBADEXEC is expected, but not required for success of this test
+ T_EXPECT_NE(error, EFAULT, "fcntl: %d (%d:%s)", result, error, strerror(error));
+}
--- /dev/null
+/*
+ * Copyright (c) 2000-2018 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+/*
+ * Copyright (c) 1988, 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include "in_cksum.h"
+
+typedef union {
+ char c[2];
+ u_short s;
+} short_union_t;
+
+typedef union {
+ u_short s[2];
+ long l;
+} long_union_t;
+
+static __inline__ void
+reduce(int * sum)
+{
+ long_union_t l_util;
+
+ l_util.l = *sum;
+ *sum = l_util.s[0] + l_util.s[1];
+ if (*sum > 65535) {
+ *sum -= 65535;
+ }
+ return;
+}
+
+
+#include <stdio.h>
+
+unsigned short
+in_cksum(void * pkt, int len)
+{
+ u_short * w;
+ int sum = 0;
+
+ w = (u_short *)pkt;
+ while ((len -= 32) >= 0) {
+ sum += w[0]; sum += w[1];
+ sum += w[2]; sum += w[3];
+ sum += w[4]; sum += w[5];
+ sum += w[6]; sum += w[7];
+ sum += w[8]; sum += w[9];
+ sum += w[10]; sum += w[11];
+ sum += w[12]; sum += w[13];
+ sum += w[14]; sum += w[15];
+ w += 16;
+ }
+ len += 32;
+ while ((len -= 8) >= 0) {
+ sum += w[0]; sum += w[1];
+ sum += w[2]; sum += w[3];
+ w += 4;
+ }
+ len += 8;
+ if (len) {
+ reduce(&sum);
+ while ((len -= 2) >= 0) {
+ sum += *w++;
+ }
+ }
+ if (len == -1) { /* odd-length packet */
+ short_union_t s_util;
+
+ s_util.s = 0;
+ s_util.c[0] = *((char *)w);
+ s_util.c[1] = 0;
+ sum += s_util.s;
+ }
+ reduce(&sum);
+ return ~sum & 0xffff;
+}
--- /dev/null
+#ifndef _S_IN_CKSUM_H
+#define _S_IN_CKSUM_H
+/*
+ * Copyright (c) 2000 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+extern unsigned short in_cksum(void * pkt, int len);
+
+#endif /* _S_IN_CKSUM_H */
--- /dev/null
+/*
+ * io_catalog_send_data.m
+ *
+ * A regression test to build an IORegistry entry with mismatching
+ * IOService and IOUserClientClass via IOCatalogueSendData, to verify
+ * if exploit risk still exists in IOCatalogueSendData.
+ *
+ */
+#include <darwintest.h>
+
+#include <Foundation/Foundation.h>
+#include <IOKit/IOCFSerialize.h>
+#include <IOKit/IOKitLib.h>
+
+#define kIOClassKey @"IOClass"
+#define kIOProviderClassKey @"IOProviderClass"
+#define kIOMatchCategoryKey @"IOMatchCategory"
+#define kIOUserClientClassKey @"IOUserClientClass"
+#define vIOProviderClassValue @"IOResources"
+
+T_GLOBAL_META(T_META_NAMESPACE("xnu.iokit"),
+ T_META_RUN_CONCURRENTLY(true));
+
+kern_return_t
+build_ioregistry_by_catalog_send_data(const char *match_name,
+ const char *userclient_name, const char *service_name)
+{
+ kern_return_t kret;
+
+ NSArray *rootCatalogueArray = @[@{
+ kIOProviderClassKey: vIOProviderClassValue,
+ kIOClassKey: @(service_name),
+ kIOUserClientClassKey: @(userclient_name),
+ kIOMatchCategoryKey: @(match_name)
+ }];
+
+ CFDataRef cfData = IOCFSerialize((__bridge CFTypeRef)rootCatalogueArray,
+ kIOCFSerializeToBinary);
+
+ kret = IOCatalogueSendData(MACH_PORT_NULL, 1, CFDataGetBytePtr(cfData),
+ CFDataGetLength(cfData));
+
+ if (cfData) {
+ CFRelease(cfData);
+ }
+
+ return kret;
+}
+
+bool
+test_open_ioregistry(const char *match_name, const char *service_name,
+ bool exploit)
+{
+ kern_return_t kret;
+ bool ioreg_found = false;
+ CFStringRef cfstrMatchName = NULL;
+ io_connect_t conn = IO_OBJECT_NULL;
+ io_iterator_t iter = IO_OBJECT_NULL, obj = IO_OBJECT_NULL;
+ CFMutableDictionaryRef service_info = NULL, properties = NULL;
+
+ service_info = IOServiceMatching(service_name);
+ kret = IOServiceGetMatchingServices(kIOMasterPortDefault, service_info, &iter);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "IOServiceGetMatchingServices");
+ cfstrMatchName = CFStringCreateWithCString(kCFAllocatorDefault,
+ match_name, kCFStringEncodingUTF8);
+
+ while (obj = IOIteratorNext(iter)) {
+ kret = IORegistryEntryCreateCFProperties(obj, &properties,
+ kCFAllocatorDefault, kNilOptions);
+ if (kret != KERN_SUCCESS) {
+ T_LOG("IORegistryEntryCreateCFProperties fails, 0x%08X",
+ (uint32_t)kret);
+ IOObjectRelease(obj);
+ continue;
+ }
+
+ CFStringRef value = CFDictionaryGetValue(properties, CFSTR("IOMatchCategory"));
+ if (value && CFGetTypeID(value) == CFStringGetTypeID() &&
+ CFEqual(value, cfstrMatchName)) {
+ ioreg_found = true;
+ } else {
+ IOObjectRelease(obj);
+ continue;
+ }
+
+ if (!exploit) {
+ goto bail;
+ }
+
+ T_LOG("try to exploit by opening io service, possibly panic?");
+ IOServiceOpen(obj, mach_task_self(), 0, &conn);
+ IOObjectRelease(obj);
+
+ break;
+ }
+
+bail:
+ if (cfstrMatchName) {
+ CFRelease(cfstrMatchName);
+ }
+
+ if (properties) {
+ CFRelease(properties);
+ }
+
+ if (iter != IO_OBJECT_NULL) {
+ IOObjectRelease(iter);
+ }
+
+ if (conn != IO_OBJECT_NULL) {
+ IOServiceClose(conn);
+ }
+
+ return ioreg_found;
+}
+
+T_DECL(io_catalog_send_data_test, "regression test to build an IORegistry entry"
+ " with mismatching IOService and IOUserClientClass by IOCatalogueSendData, "
+ "to verify if exploit risk still exists in IOCatalogueSendData for "
+ "potential DoS - <rdar://problem/31558871>")
+{
+ kern_return_t kret;
+
+ kret = build_ioregistry_by_catalog_send_data("fooBar",
+ "IOSurfaceRootUserClient", "IOReportHub");
+#if (TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR)
+ /* this trick to build an entry by io_catalog_send_data should fail */
+ T_EXPECT_EQ(kret, kIOReturnNotPrivileged, "build an entry with"
+ " mismatch IOService and IOUserClientClass by IOCatalogueSendData "
+ "should fail as kIOReturnNotPrivileged");
+#else
+ T_EXPECT_EQ(kret, KERN_SUCCESS, "IOCatalogueSendData should return success with kextd");
+#endif
+ T_EXPECT_FALSE(test_open_ioregistry("fooBar", "IOReportHub", false),
+ "Mismatched entry built by IOCatalogueSendData should not be opened");
+}
-/* Copyright (c) 2018 Apple Inc. All rights reserved. */
+// Copyright (c) 2018-2020 Apple Inc. All rights reserved.
#include <darwintest.h>
+#include <ktrace/config.h>
+#include <ktrace/session.h>
#include <inttypes.h>
+#include <libproc.h>
+#include <pthread.h>
#include <stdint.h>
+#include <sys/resource.h>
#include <sys/sysctl.h>
#include <kperf/kpc.h>
+#include <kperf/kperf.h>
+
+#include "ktrace_helpers.h"
+#include "kperf_helpers.h"
T_GLOBAL_META(
T_META_NAMESPACE("xnu.ktrace"),
T_META_ASROOT(true),
T_META_CHECK_LEAKS(false));
-T_DECL(fixed_thread_counters,
- "test that fixed thread counters return monotonically increasing values")
+struct machine {
+ unsigned int ncpus;
+ unsigned int nfixed;
+ unsigned int nconfig;
+};
+
+static void
+skip_if_unsupported(void)
+{
+ int r;
+ int supported = 0;
+ size_t supported_size = sizeof(supported);
+
+ r = sysctlbyname("kern.monotonic.supported", &supported, &supported_size,
+ NULL, 0);
+ if (r < 0) {
+ T_WITH_ERRNO;
+ T_SKIP("could not find \"kern.monotonic.supported\" sysctl");
+ }
+
+ if (!supported) {
+ T_SKIP("PMCs are not supported on this platform");
+ }
+}
+
+static struct rusage_info_v4 pre_ru = {};
+
+static void
+start_kpc(void)
+{
+ T_SETUPBEGIN;
+
+ kpc_classmask_t classes = KPC_CLASS_FIXED_MASK |
+ KPC_CLASS_CONFIGURABLE_MASK;
+ int ret = kpc_set_counting(classes);
+ T_ASSERT_POSIX_SUCCESS(ret, "started counting");
+
+ ret = proc_pid_rusage(getpid(), RUSAGE_INFO_V4, (rusage_info_t *)&pre_ru);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "got rusage information");
+
+ kpc_classmask_t classes_on = kpc_get_counting();
+ T_QUIET;
+ T_ASSERT_EQ(classes, classes_on, "classes counting is correct");
+
+ T_SETUPEND;
+}
+
+static void kpc_reset_atend(void);
+
+#if defined(__arm__) || defined(__arm64__)
+#define CYCLES_EVENT 0x02
+#else // defined(__arm__) || defined(__arm64__)
+#define CYCLES_EVENT (0x10000 | 0x20000 | 0x3c)
+#endif // !defined(__arm__) && !defined(__arm64__)
+
+static void
+prepare_kpc(struct machine *mch, bool config, bool reset)
{
+ T_SETUPBEGIN;
+
+ if (!reset) {
+ T_ATEND(kpc_reset_atend);
+ }
+
+ size_t ncpus_sz = sizeof(mch->ncpus);
+ int ret = sysctlbyname("hw.logicalcpu_max", &mch->ncpus, &ncpus_sz,
+ NULL, 0);
+ T_QUIET;
+ T_ASSERT_POSIX_SUCCESS(ret, "sysctlbyname(hw.logicalcpu_max)");
+ T_QUIET;
+ T_ASSERT_GT(mch->ncpus, 0, "must have some number of CPUs");
+
+ ret = kpc_force_all_ctrs_set(1);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_force_all_ctrs_set(1)");
+
+ int forcing = 0;
+ ret = kpc_force_all_ctrs_get(&forcing);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_force_all_ctrs_get");
+ T_QUIET; T_ASSERT_EQ(forcing, 1, "counters must be forced");
+
+ mch->nfixed = kpc_get_counter_count(KPC_CLASS_FIXED_MASK);
+ mch->nconfig = kpc_get_counter_count(KPC_CLASS_CONFIGURABLE_MASK);
+
+ T_LOG("machine: ncpus = %d, nfixed = %d, nconfig = %d", mch->ncpus,
+ mch->nfixed, mch->nconfig);
+
+ if (config) {
+ uint32_t nconfigs = kpc_get_config_count(
+ KPC_CLASS_CONFIGURABLE_MASK);
+ uint64_t *configs = calloc(nconfigs, sizeof(*configs));
+ T_QUIET; T_ASSERT_NOTNULL(configs, "allocated config words");
+
+ for (unsigned int i = 0; i < nconfigs; i++) {
+ configs[i] = reset ? 0 : CYCLES_EVENT;
+ }
+
+ ret = kpc_set_config(KPC_CLASS_CONFIGURABLE_MASK, configs);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_config");
+ }
+
+ T_SETUPEND;
+}
+
+static void
+kpc_reset_atend(void)
+{
+ struct machine mch = {};
+ prepare_kpc(&mch, true, true);
+ uint64_t *periods = calloc(mch.nconfig, sizeof(*periods));
+ T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(periods, "allocate periods array");
+
+ int ret = kpc_set_period(KPC_CLASS_CONFIGURABLE_MASK, periods);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_period");
+ free(periods);
+}
+
+static void *
+spin(void *arg)
+{
+ while (*(volatile int *)arg == 0) {
+ ;
+ }
+
+ return NULL;
+}
+
+static pthread_t *
+start_threads(const struct machine *mch, void *(*func)(void *), void *arg)
+{
+ T_SETUPBEGIN;
+
+ pthread_t *threads = calloc((unsigned int)mch->ncpus,
+ sizeof(*threads));
+ T_QUIET; T_ASSERT_NOTNULL(threads, "allocated array of threads");
+ for (unsigned int i = 0; i < mch->ncpus; i++) {
+ int error = pthread_create(&threads[i], NULL, func, arg);
+ T_QUIET; T_ASSERT_POSIX_ZERO(error, "pthread_create");
+ }
+
+ T_SETUPEND;
+
+ return threads;
+}
+
+static void
+end_threads(const struct machine *mch, pthread_t *threads)
+{
+ for (unsigned int i = 0; i < mch->ncpus; i++) {
+ int error = pthread_join(threads[i], NULL);
+ T_QUIET; T_ASSERT_POSIX_ZERO(error, "joined thread %d", i);
+ }
+ free(threads);
+}
+
+struct tally {
+ uint64_t firstvalue;
+ uint64_t lastvalue;
+ uint64_t nchecks;
+ uint64_t nzero;
+ uint64_t nstuck;
+ uint64_t ndecrease;
+};
+
+static void
+check_counters(unsigned int ncpus, unsigned int nctrs, struct tally *tallies,
+ uint64_t *counts)
+{
+ for (unsigned int i = 0; i < ncpus; i++) {
+ for (unsigned int j = 0; j < nctrs; j++) {
+ unsigned int ctr = i * nctrs + j;
+ struct tally *tly = &tallies[ctr];
+ uint64_t count = counts[ctr];
+
+ if (counts[ctr] == 0) {
+ tly->nzero++;
+ }
+ if (tly->lastvalue == count) {
+ tly->nstuck++;
+ }
+ if (tly->lastvalue > count) {
+ tly->ndecrease++;
+ }
+ tly->lastvalue = count;
+ if (tly->nchecks == 0) {
+ tly->firstvalue = count;
+ }
+ tly->nchecks++;
+ }
+ }
+}
+static void
+check_tally(const char *name, unsigned int ncpus, unsigned int nctrs,
+ struct tally *tallies)
+{
+ for (unsigned int i = 0; i < ncpus; i++) {
+ for (unsigned int j = 0; j < nctrs; j++) {
+ unsigned int ctr = i * nctrs + j;
+ struct tally *tly = &tallies[ctr];
+
+ T_LOG("CPU %2u PMC %u: nchecks = %llu, last value = %llx, "
+ "delta = %llu, nstuck = %llu", i, j,
+ tly->nchecks, tly->lastvalue, tly->lastvalue - tly->firstvalue,
+ tly->nstuck);
+ T_QUIET; T_EXPECT_GT(tly->nchecks, 0ULL,
+ "checked that CPU %d %s counter %d values", i, name, j);
+ T_QUIET; T_EXPECT_EQ(tly->nzero, 0ULL,
+ "CPU %d %s counter %d value was zero", i, name, j);
+ T_QUIET; T_EXPECT_EQ(tly->nstuck, 0ULL,
+ "CPU %d %s counter %d value was stuck", i, name, j);
+ T_QUIET; T_EXPECT_EQ(tly->ndecrease, 0ULL,
+ "CPU %d %s counter %d value decreased", i, name, j);
+ }
+ }
+}
+
+#define TESTDUR_NS (5 * NSEC_PER_SEC)
+
+T_DECL(kpc_cpu_direct_configurable,
+ "test that configurable counters return monotonically increasing values")
+{
+ skip_if_unsupported();
+
+ struct machine mch = {};
+ prepare_kpc(&mch, true, false);
+
+ int until = 0;
+ pthread_t *threads = start_threads(&mch, spin, &until);
+ start_kpc();
+
+ T_SETUPBEGIN;
+
+ uint64_t startns = clock_gettime_nsec_np(CLOCK_MONOTONIC);
+ uint64_t *counts = kpc_counterbuf_alloc();
+ T_QUIET; T_ASSERT_NOTNULL(counts, "allocated space for counter values");
+ memset(counts, 0, sizeof(*counts) * mch.ncpus * (mch.nfixed + mch.nconfig));
+ struct tally *tly = calloc(mch.ncpus * mch.nconfig, sizeof(*tly));
+ T_QUIET; T_ASSERT_NOTNULL(tly, "allocated space for tallies");
+
+ T_SETUPEND;
+
+ int n = 0;
+ while (clock_gettime_nsec_np(CLOCK_MONOTONIC) - startns < TESTDUR_NS) {
+ int ret = kpc_get_cpu_counters(true,
+ KPC_CLASS_CONFIGURABLE_MASK, NULL, counts);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_get_cpu_counters");
+
+ check_counters(mch.ncpus, mch.nconfig, tly, counts);
+
+ usleep(10000);
+ n++;
+ if (n % 100 == 0) {
+ T_LOG("checked 100 times");
+ }
+ }
+
+ check_tally("config", mch.ncpus, mch.nconfig, tly);
+
+ until = 1;
+ end_threads(&mch, threads);
+}
+
+T_DECL(kpc_thread_direct_instrs_cycles,
+ "test that fixed thread counters return monotonically increasing values")
+{
int err;
uint32_t ctrs_cnt;
uint64_t *ctrs_a;
uint64_t *ctrs_b;
+ skip_if_unsupported();
+
T_SETUPBEGIN;
ctrs_cnt = kpc_get_counter_count(KPC_CLASS_FIXED_MASK);
free(ctrs_b);
}
+#define PMI_TEST_DURATION_NS (15 * NSEC_PER_SEC)
+#define PERIODIC_CPU_COUNT_MS (250)
+#define NTIMESLICES (72)
+#define PMI_PERIOD (50ULL * 1000 * 1000)
+#define END_EVENT KDBG_EVENTID(0xfe, 0xfe, 0)
+
+struct cpu {
+ uint64_t prev_count, max_skid;
+ unsigned int timeslices[NTIMESLICES];
+};
+
+T_DECL(kpc_pmi_configurable,
+ "test that PMIs don't interfere with sampling counters in kperf")
+{
+ skip_if_unsupported();
+
+ start_controlling_ktrace();
+ struct machine mch = {};
+ prepare_kpc(&mch, true, false);
+
+ T_SETUPBEGIN;
+
+ uint64_t *periods = calloc(mch.nconfig, sizeof(*periods));
+ T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(periods, "allocate periods array");
+ periods[0] = PMI_PERIOD;
+
+ int ret = kpc_set_period(KPC_CLASS_CONFIGURABLE_MASK, periods);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_period");
+ free(periods);
+
+ int32_t *actions = calloc(mch.nconfig, sizeof(*actions));
+ actions[0] = 1;
+ ret = kpc_set_actionid(KPC_CLASS_CONFIGURABLE_MASK, actions);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_actionid");
+ free(actions);
+
+ (void)kperf_action_count_set(1);
+ ret = kperf_action_samplers_set(1, KPERF_SAMPLER_TINFO);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kperf_action_samplers_set");
+
+ ktrace_config_t ktconfig = ktrace_config_create_current();
+ T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(ktconfig, "create current config");
+ ret = ktrace_config_print_description(ktconfig, stdout);
+ T_QUIET; T_ASSERT_POSIX_ZERO(ret, "print config description");
+
+ struct cpu *cpus = calloc(mch.ncpus, sizeof(*cpus));
+ T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(cpus, "allocate CPUs array");
+
+ __block unsigned int nsamples = 0;
+ __block uint64_t first_ns = 0;
+ __block uint64_t last_ns = 0;
+
+ ktrace_session_t sess = ktrace_session_create();
+ T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(sess, "ktrace_session_create");
+
+ ktrace_events_single(sess, PERF_KPC_PMI, ^(struct trace_point *tp) {
+ if (tp->debugid & DBG_FUNC_END) {
+ return;
+ }
+
+ uint64_t cur_ns = 0;
+ int cret = ktrace_convert_timestamp_to_nanoseconds(sess,
+ tp->timestamp, &cur_ns);
+ T_QUIET; T_ASSERT_POSIX_ZERO(cret, "convert timestamp");
+
+ uint64_t count = tp->arg2;
+ if (first_ns == 0) {
+ first_ns = cur_ns;
+ }
+ struct cpu *cpu = &cpus[tp->cpuid];
+
+ if (cpu->prev_count != 0) {
+ uint64_t delta = count - cpu->prev_count;
+ T_QUIET; T_EXPECT_GT(delta, PMI_PERIOD,
+ "counter delta should be greater than PMI period");
+ uint64_t skid = delta - PMI_PERIOD;
+ if (skid > cpu->max_skid) {
+ cpu->max_skid = skid;
+ }
+ }
+ cpu->prev_count = count;
+
+ double slice = (double)(cur_ns - first_ns) / PMI_TEST_DURATION_NS *
+ NTIMESLICES;
+ if (slice < NTIMESLICES) {
+ cpu->timeslices[(unsigned int)slice] += 1;
+ }
+
+ nsamples++;
+ });
+
+ ktrace_events_single(sess, END_EVENT, ^(struct trace_point *tp __unused) {
+ int cret = ktrace_convert_timestamp_to_nanoseconds(sess,
+ tp->timestamp, &last_ns);
+ T_QUIET; T_ASSERT_POSIX_ZERO(cret, "convert timestamp");
+
+ ktrace_end(sess, 1);
+ });
+
+ uint64_t *counts = kpc_counterbuf_alloc();
+ T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(counts,
+ "allocated counter values array");
+ memset(counts, 0, sizeof(*counts) * mch.ncpus * (mch.nfixed + mch.nconfig));
+ struct tally *tly = calloc(mch.ncpus * (mch.nconfig + mch.nfixed),
+ sizeof(*tly));
+ T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(tly, "allocated tallies array");
+
+ dispatch_source_t cpu_count_timer = dispatch_source_create(
+ DISPATCH_SOURCE_TYPE_TIMER, 0, 0, dispatch_get_main_queue());
+ dispatch_source_set_timer(cpu_count_timer, dispatch_time(DISPATCH_TIME_NOW,
+ PERIODIC_CPU_COUNT_MS * NSEC_PER_MSEC),
+ PERIODIC_CPU_COUNT_MS * NSEC_PER_MSEC, 0);
+ dispatch_source_set_cancel_handler(cpu_count_timer, ^{
+ dispatch_release(cpu_count_timer);
+ });
+
+ __block uint64_t first_check_ns = 0;
+ __block uint64_t last_check_ns = 0;
+
+ dispatch_source_set_event_handler(cpu_count_timer, ^{
+ int cret = kpc_get_cpu_counters(true,
+ KPC_CLASS_FIXED_MASK | KPC_CLASS_CONFIGURABLE_MASK, NULL, counts);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(cret, "kpc_get_cpu_counters");
+
+ if (!first_check_ns) {
+ first_check_ns = clock_gettime_nsec_np(CLOCK_MONOTONIC);
+ } else {
+ last_check_ns = clock_gettime_nsec_np(CLOCK_MONOTONIC);
+ }
+ check_counters(mch.ncpus, mch.nfixed + mch.nconfig, tly, counts);
+ });
+
+ int stop = 0;
+ (void)start_threads(&mch, spin, &stop);
+
+ ktrace_set_completion_handler(sess, ^{
+ dispatch_cancel(cpu_count_timer);
+
+ check_tally("config", mch.ncpus, mch.nfixed + mch.nconfig, tly);
+
+ struct rusage_info_v4 post_ru = {};
+ int ruret = proc_pid_rusage(getpid(), RUSAGE_INFO_V4,
+ (rusage_info_t *)&post_ru);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ruret, "got rusage information");
+
+ T_LOG("saw %llu cycles in process", post_ru.ri_cycles - pre_ru.ri_cycles);
+ uint64_t total = 0;
+
+ unsigned int nsamplecpus = 0;
+ char sample_slices[NTIMESLICES + 1];
+ sample_slices[NTIMESLICES] = '\0';
+ for (unsigned int i = 0; i < mch.ncpus; i++) {
+ memset(sample_slices, '.', sizeof(sample_slices) - 1);
+
+ struct cpu *cpu = &cpus[i];
+ unsigned int nsampleslices = 0, ncpusamples = 0,
+ last_contiguous = 0;
+ bool seen_empty = false;
+ for (unsigned int j = 0; j < NTIMESLICES; j++) {
+ unsigned int nslice = cpu->timeslices[j];
+ nsamples += nslice;
+ ncpusamples += nslice;
+ if (nslice > 0) {
+ nsampleslices++;
+ sample_slices[j] = '*';
+ } else {
+ seen_empty = true;
+ }
+ if (!seen_empty) {
+ last_contiguous = j;
+ }
+ }
+ unsigned int ctr = i * (mch.nfixed + mch.nconfig) + mch.nfixed;
+ uint64_t delta = tly[ctr].lastvalue - tly[ctr].firstvalue;
+ T_LOG("%g GHz", (double)delta / (last_check_ns - first_check_ns));
+ total += delta;
+ T_LOG("CPU %2u: %4u/%u, %6u/%llu, max skid = %llu (%.1f%%), "
+ "last contiguous = %u", i,
+ nsampleslices, NTIMESLICES, ncpusamples, delta / PMI_PERIOD,
+ cpu->max_skid, (double)cpu->max_skid / PMI_PERIOD * 100,
+ last_contiguous);
+ T_LOG("%s", sample_slices);
+ if (nsampleslices > 0) {
+ nsamplecpus++;
+ }
+ T_EXPECT_EQ(last_contiguous, NTIMESLICES - 1,
+ "CPU %2u: saw samples in each time slice", i);
+ }
+ T_LOG("kpc reported %llu total cycles", total);
+ T_LOG("saw %u sample events, across %u/%u cpus", nsamples, nsamplecpus,
+ mch.ncpus);
+ T_END;
+ });
+
+ int dbglvl = 3;
+ ret = sysctlbyname("kperf.debug_level", NULL, NULL, &dbglvl,
+ sizeof(dbglvl));
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "set kperf debug level");
+ ret = kperf_sample_set(1);
+ T_ASSERT_POSIX_SUCCESS(ret, "kperf_sample_set");
+
+ start_kpc();
+
+ int error = ktrace_start(sess, dispatch_get_main_queue());
+ T_ASSERT_POSIX_ZERO(error, "started tracing");
+
+ dispatch_after(dispatch_time(DISPATCH_TIME_NOW, PMI_TEST_DURATION_NS),
+ dispatch_get_main_queue(), ^{
+ T_LOG("ending tracing after timeout");
+ kdebug_trace(END_EVENT, 0, 0, 0, 0);
+ });
+
+ dispatch_activate(cpu_count_timer);
+
+ T_SETUPEND;
+
+ dispatch_main();
+}
+
#if defined(__arm64__)
-/*
- * This policy only applies to arm64 devices.
- */
+// This policy only applies to arm64 devices.
static int g_prev_disablewl = 0;
T_DECL(whitelist, "ensure kpc's whitelist is filled out")
{
- /* Start enforcing the whitelist. */
+ // Start enforcing the whitelist.
int set = 0;
size_t getsz = sizeof(g_prev_disablewl);
int ret = sysctlbyname("kpc.disable_whitelist", &g_prev_disablewl, &getsz,
uint32_t nconfigs = kpc_get_config_count(KPC_CLASS_CONFIGURABLE_MASK);
uint64_t *config = calloc(nconfigs, sizeof(*config));
- /*
- * Check that events in the whitelist are allowed. CORE_CYCLE (0x2) is
- * always present in the whitelist.
- */
+ // Check that events in the whitelist are allowed. CORE_CYCLE (0x2) is
+ // always present in the whitelist.
config[0] = 0x02;
ret = kpc_set_config(KPC_CLASS_CONFIGURABLE_MASK, config);
T_ASSERT_POSIX_SUCCESS(ret, "configured kpc to count cycles");
- /* Check that non-event bits are ignored by the whitelist. */
+ // Check that non-event bits are ignored by the whitelist.
config[0] = 0x102;
ret = kpc_set_config(KPC_CLASS_CONFIGURABLE_MASK, config);
T_ASSERT_POSIX_SUCCESS(ret,
"configured kpc to count cycles with non-event bits set");
- /* Check that configurations of non-whitelisted events fail. */
+ // Check that configurations of non-whitelisted events fail.
config[0] = 0xfe;
ret = kpc_set_config(KPC_CLASS_CONFIGURABLE_MASK, config);
T_ASSERT_POSIX_FAILURE(ret, EPERM,
"shouldn't allow arbitrary events with whitelist enabled");
- /* Clean up the configuration. */
+ // Clean up the configuration.
config[0] = 0;
(void)kpc_set_config(KPC_CLASS_CONFIGURABLE_MASK, config);
free(config);
}
-#endif /* defined(__arm64__) */
+#endif // defined(__arm64__)
#define PERF_KPC_REG KDBG_EVENTID(DBG_PERF, 6, 5)
#define PERF_KPC_REG32 KDBG_EVENTID(DBG_PERF, 6, 7)
#define PERF_INSTR_DATA KDBG_EVENTID(DBG_PERF, 1, 17)
+#define PERF_EVENT KDBG_EVENTID(DBG_PERF, 0, 0)
#define SCHED_HANDOFF KDBG_EVENTID(DBG_MACH, DBG_MACH_SCHED, \
MACH_STACK_HANDOFF)
void configure_kperf_stacks_timer(pid_t pid, unsigned int period_ms);
+#define PERF_SAMPLE KDBG_EVENTID(DBG_PERF, 0, 0)
+#define PERF_KPC_PMI KDBG_EVENTID(DBG_PERF, 6, 0)
+
#endif /* !defined(KPERF_HELPERS_H) */
--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+ <key>Label</key>
+ <string>com.apple.xnu.test.task_create_suid_cred</string>
+ <key>MachServices</key>
+ <dict>
+ <key>com.apple.xnu.test.task_create_suid_cred</key>
+ <true/>
+ </dict>
+ <key>ThrottleInterval</key>
+ <integer>1</integer>
+ <key>UserName</key>
+ <string>root</string>
+ <key>ProcessType</key>
+ <string>Adaptive</string>
+ <key>EnvironmentVariables</key>
+ <dict>
+ <key>MallocNanoZone</key>
+ <string>1</string>
+ </dict>
+</dict>
+</plist>
#include <signal.h>
#include <sys/sysctl.h>
#include <sys/kern_memorystatus.h>
+#include <time.h>
#include <mach-o/dyld.h>
#include <mach/mach_vm.h>
#include <mach/vm_page_size.h> /* Needed for vm_region info */
X(MEMORYSTATUS_CONTROL_FAILED) \
X(IS_FREEZABLE_NOT_AS_EXPECTED) \
X(MEMSTAT_PRIORITY_CHANGE_FAILED) \
- X(INVALID_ALLOCATE_PAGES_ARGUMENTS) \
+ X(INVALID_ALLOCATE_PAGES_ARGUMENTS) \
X(EXIT_CODE_MAX)
#define EXIT_CODES_ENUM(VAR) VAR,
/* these values will remain fixed during testing */
int active_limit_mb = 15; /* arbitrary */
int inactive_limit_mb = 7; /* arbitrary */
+ int demote_value = 1;
/* Launch the child process, and elevate its priority */
int requestedpriority;
dispatch_source_t ds_signal, ds_exit;
/* Freeze the process, trigger agressive demotion, and check that it hasn't been demoted. */
freeze_process(child_pid);
/* Agressive demotion */
- sysctl_ret = sysctlbyname("kern.memorystatus_demote_frozen_processes", NULL, NULL, NULL, 0);
- T_ASSERT_POSIX_SUCCESS(sysctl_ret, "sysctl kern.memorystatus_demote_frozen_processes failed");
+ sysctl_ret = sysctlbyname("kern.memorystatus_demote_frozen_processes", NULL, NULL, &demote_value, sizeof(demote_value));
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(sysctl_ret, "sysctl kern.memorystatus_demote_frozen_processes succeeded");
/* Check */
(void)check_properties(child_pid, requestedpriority, inactive_limit_mb, 0x0, ASSERTION_STATE_IS_SET, "Priority was set");
T_LOG("Relinquishing our assertion.");
relinquish_assertion_priority(child_pid, 0x0);
(void)check_properties(child_pid, JETSAM_PRIORITY_AGING_BAND2, inactive_limit_mb, 0x0, ASSERTION_STATE_IS_RELINQUISHED, "Assertion was reqlinquished.");
/* Kill the child */
- T_QUIET; T_ASSERT_POSIX_SUCCESS(kill(child_pid, SIGKILL), "Unable to kill child process");
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(kill(child_pid, SIGKILL), "Killed child process");
T_END;
});
T_DECL(assertion_test_demote_frozen, "demoted frozen process goes to asserted priority.", T_META_ASROOT(true)) {
memorystatus_assertion_test_demote_frozen();
}
+
+T_DECL(budget_replenishment, "budget replenishes properly") {
+ size_t length;
+ int ret;
+ static unsigned int kTestIntervalSecs = 60 * 60 * 32; // 32 Hours
+ unsigned int memorystatus_freeze_daily_mb_max, memorystatus_freeze_daily_pages_max;
+ static unsigned int kFixedPointFactor = 100;
+ static unsigned int kNumSecondsInDay = 60 * 60 * 24;
+ unsigned int new_budget, expected_new_budget_pages;
+ size_t new_budget_ln;
+ unsigned int page_size = (unsigned int) get_vmpage_size();
+
+ /*
+ * Calculate a new budget as if the previous interval expired kTestIntervalSecs
+ * ago and we used up its entire budget.
+ */
+ length = sizeof(kTestIntervalSecs);
+ new_budget_ln = sizeof(new_budget);
+ ret = sysctlbyname("vm.memorystatus_freeze_calculate_new_budget", &new_budget, &new_budget_ln, &kTestIntervalSecs, length);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "vm.memorystatus_freeze_calculate_new_budget");
+
+ // Grab the daily budget.
+ length = sizeof(memorystatus_freeze_daily_mb_max);
+ ret = sysctlbyname("kern.memorystatus_freeze_daily_mb_max", &memorystatus_freeze_daily_mb_max, &length, NULL, 0);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kern.memorystatus_freeze_daily_mb_max");
+
+ memorystatus_freeze_daily_pages_max = memorystatus_freeze_daily_mb_max * 1024 * 1024 / page_size;
+
+ /*
+ * We're kTestIntervalSecs past a new interval. Which means we are owed kNumSecondsInDay
+ * seconds of budget.
+ */
+ expected_new_budget_pages = memorystatus_freeze_daily_pages_max;
+ expected_new_budget_pages += ((kTestIntervalSecs * kFixedPointFactor) / (kNumSecondsInDay)
+ * memorystatus_freeze_daily_pages_max) / kFixedPointFactor;
+
+ T_QUIET; T_ASSERT_EQ(new_budget, expected_new_budget_pages, "Calculate new budget behaves correctly.");
+}
--- /dev/null
+/*
+ * Copyright (c) 2019 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ * net_bridge.c
+ * - test if_bridge.c functionality
+ */
+
+#include <darwintest.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <sys/event.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+#include <netinet/bootp.h>
+#include <netinet/tcp.h>
+#include <netinet/if_ether.h>
+#include <netinet/ip6.h>
+#include <netinet/icmp6.h>
+#include <net/if_arp.h>
+#include <net/bpf.h>
+#include <net/if_bridgevar.h>
+#include <net/if_fake_var.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <errno.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <TargetConditionals.h>
+#include <darwintest_utils.h>
+#include "bpflib.h"
+#include "in_cksum.h"
+
+static bool S_debug;
+static bool S_cleaning_up;
+
+#define ALL_ADDRS (uint32_t)(-1)
+
+#define DHCP_PAYLOAD_MIN sizeof(struct bootp)
+#define DHCP_FLAGS_BROADCAST ((u_short)0x8000)
+
+typedef union {
+ char bytes[DHCP_PAYLOAD_MIN];
+ /* force 4-byte alignment */
+ uint32_t words[DHCP_PAYLOAD_MIN / sizeof(uint32_t)];
+} dhcp_min_payload, *dhcp_min_payload_t;
+
+#define ETHER_PKT_LEN (ETHER_HDR_LEN + ETHERMTU)
+typedef union {
+ char bytes[ETHER_PKT_LEN];
+ /* force 4-byte aligment */
+ uint32_t words[ETHER_PKT_LEN / sizeof(uint32_t)];
+} ether_packet, *ether_packet_t;
+
+typedef struct {
+ struct ip ip;
+ struct udphdr udp;
+} ip_udp_header_t;
+
+typedef struct {
+ struct in_addr src_ip;
+ struct in_addr dst_ip;
+ char zero;
+ char proto;
+ unsigned short length;
+} udp_pseudo_hdr_t;
+
+typedef struct {
+ struct ip ip;
+ struct tcphdr tcp;
+} ip_tcp_header_t;
+
+typedef union {
+ ip_udp_header_t udp;
+ ip_tcp_header_t tcp;
+} ip_udp_tcp_header_u;
+
+typedef struct {
+ struct in_addr src_ip;
+ struct in_addr dst_ip;
+ char zero;
+ char proto;
+ unsigned short length;
+} tcp_pseudo_hdr_t;
+
+typedef struct {
+ struct ip6_hdr ip6;
+ struct udphdr udp;
+} ip6_udp_header_t;
+
+typedef struct {
+ struct in6_addr src_ip;
+ struct in6_addr dst_ip;
+ char zero;
+ char proto;
+ unsigned short length;
+} udp6_pseudo_hdr_t;
+
+typedef struct {
+ char ifname[IFNAMSIZ];
+ char member_ifname[IFNAMSIZ]; /* member of bridge */
+ ether_addr_t member_mac;
+ int fd;
+ u_int unit;
+ u_int num_addrs;
+ void * rx_buf;
+ int rx_buf_size;
+ bool mac_nat;
+
+ u_int test_count;
+ u_int test_address_count;
+ uint64_t test_address_present;
+} switch_port, *switch_port_t;
+
+typedef struct {
+ u_int size;
+ u_int count;
+ bool mac_nat;
+ switch_port list[1];
+} switch_port_list, * switch_port_list_t;
+
+static struct ifbareq *
+bridge_rt_table_copy(u_int * ret_count);
+
+static void
+bridge_rt_table_log(struct ifbareq *rt_table, u_int count);
+
+static struct ifbrmne *
+bridge_mac_nat_entries_copy(u_int * ret_count);
+
+static void
+bridge_mac_nat_entries_log(struct ifbrmne * entries, u_int count);
+
+static void
+system_cmd(const char *cmd, bool fail_on_error);
+
+static int
+inet_dgram_socket(void)
+{
+ int s;
+
+ s = socket(AF_INET, SOCK_DGRAM, 0);
+ T_QUIET;
+ T_ASSERT_POSIX_SUCCESS(s, "socket(AF_INET, SOCK_DGRAM, 0)");
+ return s;
+}
+
+
+/**
+** Packet creation/display
+**/
+#define BOOTP_SERVER_PORT 67
+#define BOOTP_CLIENT_PORT 68
+
+#define TEST_SOURCE_PORT 14
+#define TEST_DEST_PORT 15
+
+#define EA_UNIT_INDEX 4
+#define EA_ADDR_INDEX 5
+
+static void
+set_ethernet_address(ether_addr_t *eaddr, u_int unit, u_int addr_index)
+{
+ u_char *a = eaddr->octet;
+
+ a[0] = 0x02;
+ a[2] = 0x00;
+ a[3] = 0x00;
+ a[1] = 0x00;
+ a[EA_UNIT_INDEX] = (u_char)unit;
+ a[EA_ADDR_INDEX] = (u_char)addr_index;
+}
+
+#define TEN_NET 0x0a000000
+#define TEN_1_NET (TEN_NET | 0x010000)
+
+static void
+get_ipv4_address(u_int unit, u_int addr_index, struct in_addr *ip)
+{
+ /* up to 255 units, 255 addresses */
+ ip->s_addr = htonl(TEN_1_NET | (unit << 8) | addr_index);
+ return;
+}
+
+#define IN6ADDR_ULA_INIT \
+ {{{ 0xfd, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}}
+
+static struct in6_addr ula_address = IN6ADDR_ULA_INIT;
+
+#define ULA_UNIT_INDEX 14
+#define ULA_ADDR_INDEX 15
+
+static void
+get_ipv6_address(u_int unit, u_int addr_index, struct in6_addr *ip)
+{
+ *ip = ula_address;
+ /* up to 255 units, 255 addresses */
+ ip->s6_addr[ULA_UNIT_INDEX] = (uint8_t)unit;
+ ip->s6_addr[ULA_ADDR_INDEX] = (uint8_t)addr_index;
+}
+
+
+static void
+get_ip_address(uint8_t af, u_int unit, u_int addr_index, union ifbrip *ip)
+{
+ switch (af) {
+ case AF_INET:
+ get_ipv4_address(unit, addr_index, &ip->ifbrip_addr);
+ break;
+ case AF_INET6:
+ get_ipv6_address(unit, addr_index, &ip->ifbrip_addr6);
+ break;
+ default:
+ T_FAIL("unrecognized address family %u", af);
+ break;
+ }
+}
+
+static bool
+ip_addresses_are_equal(uint8_t af, union ifbrip * ip1, union ifbrip * ip2)
+{
+ bool equal;
+
+ switch (af) {
+ case AF_INET:
+ equal = (ip1->ifbrip_addr.s_addr == ip2->ifbrip_addr.s_addr);
+ break;
+ case AF_INET6:
+ equal = IN6_ARE_ADDR_EQUAL(&ip1->ifbrip_addr6,
+ &ip2->ifbrip_addr6);
+ break;
+ default:
+ T_FAIL("unrecognized address family %u", af);
+ equal = false;
+ break;
+ }
+ return equal;
+}
+
+static ether_addr_t ether_broadcast = {
+ { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }
+};
+
+static ether_addr_t ether_external = {
+ { 0x80, 0x00, 0x00, 0x00, 0x00, 0x01 }
+};
+
+static inline struct in_addr
+get_external_ipv4_address(void)
+{
+ struct in_addr ip;
+
+ /* IP 10.1.255.1 */
+ ip.s_addr = htonl(TEN_1_NET | 0xff01);
+ return ip;
+}
+
+static inline void
+get_external_ip_address(uint8_t af, union ifbrip * ip)
+{
+ switch (af) {
+ case AF_INET:
+ /* IP 10.1.255.1 */
+ ip->ifbrip_addr = get_external_ipv4_address();
+ break;
+ case AF_INET6:
+ /* fd80::1 */
+ ip->ifbrip_addr6 = ula_address;
+ ip->ifbrip_addr6.s6_addr[1] = 0x80;
+ ip->ifbrip_addr6.s6_addr[15] = 0x01;
+ break;
+ default:
+ T_FAIL("unrecognized address family %u", af);
+ break;
+ }
+}
+
+static inline void
+get_broadcast_ip_address(uint8_t af, union ifbrip * ip)
+{
+ switch (af) {
+ case AF_INET:
+ ip->ifbrip_addr.s_addr = INADDR_BROADCAST;
+ break;
+ case AF_INET6:
+ /* 0xff0e::0 linklocal scope multicast */
+ ip->ifbrip_addr6 = in6addr_any;
+ ip->ifbrip_addr6.s6_addr[0] = 0xff;
+ ip->ifbrip_addr6.s6_addr[1] = __IPV6_ADDR_SCOPE_LINKLOCAL;
+ break;
+ default:
+ T_FAIL("unrecognized address family %u", af);
+ break;
+ }
+}
+
+
+#define ETHER_NTOA_BUFSIZE (ETHER_ADDR_LEN * 3)
+static const char *
+ether_ntoa_buf(const ether_addr_t *n, char * buf, int buf_size)
+{
+ char * str;
+
+ str = ether_ntoa(n);
+ strlcpy(buf, str, buf_size);
+ return buf;
+}
+
+static const char *
+inet_ptrtop(int af, const void * ptr, char * buf, socklen_t buf_size)
+{
+ union {
+ struct in_addr ip;
+ struct in6_addr ip6;
+ } u;
+
+ switch (af) {
+ case AF_INET:
+ bcopy(ptr, &u.ip, sizeof(u.ip));
+ break;
+ case AF_INET6:
+ bcopy(ptr, &u.ip6, sizeof(u.ip6));
+ break;
+ default:
+ return NULL;
+ }
+ return inet_ntop(af, &u, buf, buf_size);
+}
+
+static __inline__ char *
+arpop_name(u_int16_t op)
+{
+ switch (op) {
+ case ARPOP_REQUEST:
+ return "ARP REQUEST";
+ case ARPOP_REPLY:
+ return "ARP REPLY";
+ case ARPOP_REVREQUEST:
+ return "REVARP REQUEST";
+ case ARPOP_REVREPLY:
+ return "REVARP REPLY";
+ default:
+ break;
+ }
+ return "<unknown>";
+}
+
+static void
+arp_frame_validate(const struct ether_arp * earp, u_int len, bool dump)
+{
+ const struct arphdr * arp_p;
+ int arphrd;
+ char buf_sender_ether[ETHER_NTOA_BUFSIZE];
+ char buf_sender_ip[INET_ADDRSTRLEN];
+ char buf_target_ether[ETHER_NTOA_BUFSIZE];
+ char buf_target_ip[INET_ADDRSTRLEN];
+
+ T_QUIET;
+ T_ASSERT_GE(len, (u_int)sizeof(*earp),
+ "%s ARP packet size %u need %u",
+ __func__, len, (u_int)sizeof(*earp));
+ if (!dump) {
+ return;
+ }
+ arp_p = &earp->ea_hdr;
+ arphrd = ntohs(arp_p->ar_hrd);
+ T_LOG("%s type=0x%x proto=0x%x", arpop_name(ntohs(arp_p->ar_op)),
+ arphrd, ntohs(arp_p->ar_pro));
+ if (arp_p->ar_hln == sizeof(earp->arp_sha)) {
+ ether_ntoa_buf((const ether_addr_t *)earp->arp_sha,
+ buf_sender_ether,
+ sizeof(buf_sender_ether));
+ ether_ntoa_buf((const ether_addr_t *)earp->arp_tha,
+ buf_target_ether,
+ sizeof(buf_target_ether));
+ T_LOG("Sender H/W\t%s", buf_sender_ether);
+ T_LOG("Target H/W\t%s", buf_target_ether);
+ }
+ inet_ptrtop(AF_INET, earp->arp_spa,
+ buf_sender_ip, sizeof(buf_sender_ip));
+ inet_ptrtop(AF_INET, earp->arp_tpa,
+ buf_target_ip, sizeof(buf_target_ip));
+ T_LOG("Sender IP\t%s", buf_sender_ip);
+ T_LOG("Target IP\t%s", buf_target_ip);
+ return;
+}
+
+static void
+ip_frame_validate(const void * buf, u_int buf_len, bool dump)
+{
+ char buf_dst[INET_ADDRSTRLEN];
+ char buf_src[INET_ADDRSTRLEN];
+ const ip_udp_header_t * ip_udp;
+ u_int ip_len;
+
+ T_QUIET;
+ T_ASSERT_GE(buf_len, (u_int)sizeof(struct ip), NULL);
+ ip_udp = (const ip_udp_header_t *)buf;
+ ip_len = ntohs(ip_udp->ip.ip_len);
+ inet_ptrtop(AF_INET, &ip_udp->ip.ip_src,
+ buf_src, sizeof(buf_src));
+ inet_ptrtop(AF_INET, &ip_udp->ip.ip_dst,
+ buf_dst, sizeof(buf_dst));
+ if (dump) {
+ T_LOG("ip src %s dst %s len %u id %d",
+ buf_src, buf_dst, ip_len,
+ ntohs(ip_udp->ip.ip_id));
+ }
+ T_QUIET;
+ T_ASSERT_GE(buf_len, ip_len, NULL);
+ T_QUIET;
+ T_ASSERT_EQ(ip_udp->ip.ip_v, IPVERSION, NULL);
+ T_QUIET;
+ T_ASSERT_EQ((u_int)(ip_udp->ip.ip_hl << 2),
+ (u_int)sizeof(struct ip), NULL);
+ if (ip_udp->ip.ip_p == IPPROTO_UDP) {
+ u_int udp_len;
+ u_int data_len;
+
+ T_QUIET;
+ T_ASSERT_GE(buf_len, (u_int)sizeof(*ip_udp), NULL);
+ udp_len = ntohs(ip_udp->udp.uh_ulen);
+ T_QUIET;
+ T_ASSERT_GE(udp_len, (u_int)sizeof(ip_udp->udp), NULL);
+ data_len = udp_len - (u_int)sizeof(ip_udp->udp);
+ if (dump) {
+ T_LOG("udp src 0x%x dst 0x%x len %u"
+ " csum 0x%x datalen %u",
+ ntohs(ip_udp->udp.uh_sport),
+ ntohs(ip_udp->udp.uh_dport),
+ udp_len,
+ ntohs(ip_udp->udp.uh_sum),
+ data_len);
+ }
+ }
+}
+
+static void
+ip6_frame_validate(const void * buf, u_int buf_len, bool dump)
+{
+ char buf_dst[INET6_ADDRSTRLEN];
+ char buf_src[INET6_ADDRSTRLEN];
+ const struct ip6_hdr * ip6;
+ u_int ip6_len;
+
+ T_QUIET;
+ T_ASSERT_GE(buf_len, (u_int)sizeof(struct ip6_hdr), NULL);
+ ip6 = (const struct ip6_hdr *)buf;
+ ip6_len = ntohs(ip6->ip6_plen);
+ inet_ptrtop(AF_INET6, &ip6->ip6_src, buf_src, sizeof(buf_src));
+ inet_ptrtop(AF_INET6, &ip6->ip6_dst, buf_dst, sizeof(buf_dst));
+ if (dump) {
+ T_LOG("ip6 src %s dst %s len %u", buf_src, buf_dst, ip6_len);
+ }
+ T_QUIET;
+ T_ASSERT_GE(buf_len, ip6_len + (u_int)sizeof(struct ip6_hdr), NULL);
+ T_QUIET;
+ T_ASSERT_EQ((ip6->ip6_vfc & IPV6_VERSION_MASK),
+ IPV6_VERSION, NULL);
+ T_QUIET;
+ switch (ip6->ip6_nxt) {
+ case IPPROTO_UDP: {
+ u_int data_len;
+ const ip6_udp_header_t *ip6_udp;
+ u_int udp_len;
+
+ ip6_udp = (const ip6_udp_header_t *)buf;
+ T_QUIET;
+ T_ASSERT_GE(buf_len, (u_int)sizeof(*ip6_udp), NULL);
+ udp_len = ntohs(ip6_udp->udp.uh_ulen);
+ T_QUIET;
+ T_ASSERT_GE(udp_len, (u_int)sizeof(ip6_udp->udp), NULL);
+ data_len = udp_len - (u_int)sizeof(ip6_udp->udp);
+ if (dump) {
+ T_LOG("udp src 0x%x dst 0x%x len %u"
+ " csum 0x%x datalen %u",
+ ntohs(ip6_udp->udp.uh_sport),
+ ntohs(ip6_udp->udp.uh_dport),
+ udp_len,
+ ntohs(ip6_udp->udp.uh_sum),
+ data_len);
+ }
+ break;
+ }
+ case IPPROTO_ICMPV6: {
+ const struct icmp6_hdr *icmp6;
+ u_int icmp6_len;
+
+ icmp6_len = buf_len - sizeof(*ip6);
+ T_QUIET;
+ T_ASSERT_GE(buf_len, icmp6_len, NULL);
+ icmp6 = (const struct icmp6_hdr *)(ip6 + 1);
+ switch (icmp6->icmp6_type) {
+ case ND_NEIGHBOR_SOLICIT:
+ if (dump) {
+ T_LOG("neighbor solicit");
+ }
+ break;
+ case ND_NEIGHBOR_ADVERT:
+ if (dump) {
+ T_LOG("neighbor advert");
+ }
+ break;
+ case ND_ROUTER_SOLICIT:
+ if (dump) {
+ T_LOG("router solicit");
+ }
+ break;
+ default:
+ if (dump) {
+ T_LOG("icmp6 code 0x%x", icmp6->icmp6_type);
+ }
+ break;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+}
+
+static void
+ethernet_frame_validate(const void * buf, u_int buf_len, bool dump)
+{
+ char ether_dst[ETHER_NTOA_BUFSIZE];
+ char ether_src[ETHER_NTOA_BUFSIZE];
+ uint16_t ether_type;
+ const ether_header_t * eh_p;
+
+ T_QUIET;
+ T_ASSERT_GE(buf_len, (u_int)sizeof(*eh_p), NULL);
+ eh_p = (const ether_header_t *)buf;
+ ether_type = ntohs(eh_p->ether_type);
+ ether_ntoa_buf((const ether_addr_t *)&eh_p->ether_dhost,
+ ether_dst, sizeof(ether_dst));
+ ether_ntoa_buf((const ether_addr_t *)&eh_p->ether_shost,
+ ether_src, sizeof(ether_src));
+ if (dump) {
+ T_LOG("ether dst %s src %s type 0x%x",
+ ether_dst, ether_src, ether_type);
+ }
+ switch (ether_type) {
+ case ETHERTYPE_IP:
+ ip_frame_validate(eh_p + 1, (u_int)(buf_len - sizeof(*eh_p)),
+ dump);
+ break;
+ case ETHERTYPE_ARP:
+ arp_frame_validate((const struct ether_arp *)(eh_p + 1),
+ (u_int)(buf_len - sizeof(*eh_p)),
+ dump);
+ break;
+ case ETHERTYPE_IPV6:
+ ip6_frame_validate(eh_p + 1, (u_int)(buf_len - sizeof(*eh_p)),
+ dump);
+ break;
+ default:
+ T_FAIL("unrecognized ethertype 0x%x", ether_type);
+ break;
+ }
+}
+
+static u_int
+ethernet_udp4_frame_populate(void * buf, size_t buf_len,
+ const ether_addr_t * src,
+ struct in_addr src_ip,
+ uint16_t src_port,
+ const ether_addr_t * dst,
+ struct in_addr dst_ip,
+ uint16_t dst_port,
+ const void * data, u_int data_len)
+{
+ ether_header_t * eh_p;
+ u_int frame_length;
+ static int ip_id;
+ ip_udp_header_t * ip_udp;
+ char * payload;
+ udp_pseudo_hdr_t * udp_pseudo;
+
+ frame_length = (u_int)(sizeof(*eh_p) + sizeof(*ip_udp)) + data_len;
+ if (buf_len < frame_length) {
+ return 0;
+ }
+
+ /* determine frame offsets */
+ eh_p = (ether_header_t *)buf;
+ ip_udp = (ip_udp_header_t *)(void *)(eh_p + 1);
+ udp_pseudo = (udp_pseudo_hdr_t *)(void *)
+ (((char *)&ip_udp->udp) - sizeof(*udp_pseudo));
+ payload = (char *)(eh_p + 1) + sizeof(*ip_udp);
+
+ /* ethernet_header */
+ bcopy(src, eh_p->ether_shost, ETHER_ADDR_LEN);
+ bcopy(dst, eh_p->ether_dhost, ETHER_ADDR_LEN);
+ eh_p->ether_type = htons(ETHERTYPE_IP);
+
+ /* copy the data */
+ bcopy(data, payload, data_len);
+
+ /* fill in UDP pseudo header (gets overwritten by IP header below) */
+ bcopy(&src_ip, &udp_pseudo->src_ip, sizeof(src_ip));
+ bcopy(&dst_ip, &udp_pseudo->dst_ip, sizeof(dst_ip));
+ udp_pseudo->zero = 0;
+ udp_pseudo->proto = IPPROTO_UDP;
+ udp_pseudo->length = htons(sizeof(ip_udp->udp) + data_len);
+
+ /* fill in UDP header */
+ ip_udp->udp.uh_sport = htons(src_port);
+ ip_udp->udp.uh_dport = htons(dst_port);
+ ip_udp->udp.uh_ulen = htons(sizeof(ip_udp->udp) + data_len);
+ ip_udp->udp.uh_sum = 0;
+ ip_udp->udp.uh_sum = in_cksum(udp_pseudo, (int)(sizeof(*udp_pseudo)
+ + sizeof(ip_udp->udp) + data_len));
+
+ /* fill in IP header */
+ bzero(ip_udp, sizeof(ip_udp->ip));
+ ip_udp->ip.ip_v = IPVERSION;
+ ip_udp->ip.ip_hl = sizeof(struct ip) >> 2;
+ ip_udp->ip.ip_ttl = MAXTTL;
+ ip_udp->ip.ip_p = IPPROTO_UDP;
+ bcopy(&src_ip, &ip_udp->ip.ip_src, sizeof(src_ip));
+ bcopy(&dst_ip, &ip_udp->ip.ip_dst, sizeof(dst_ip));
+ ip_udp->ip.ip_len = htons(sizeof(*ip_udp) + data_len);
+ ip_udp->ip.ip_id = htons(ip_id++);
+
+ /* compute the IP checksum */
+ ip_udp->ip.ip_sum = 0; /* needs to be zero for checksum */
+ ip_udp->ip.ip_sum = in_cksum(&ip_udp->ip, sizeof(ip_udp->ip));
+
+ return frame_length;
+}
+
+static u_int
+ethernet_udp6_frame_populate(void * buf, size_t buf_len,
+ const ether_addr_t * src,
+ struct in6_addr *src_ip,
+ uint16_t src_port,
+ const ether_addr_t * dst,
+ struct in6_addr * dst_ip,
+ uint16_t dst_port,
+ const void * data, u_int data_len)
+{
+ ether_header_t * eh_p;
+ u_int frame_length;
+ ip6_udp_header_t * ip6_udp;
+ char * payload;
+ udp6_pseudo_hdr_t * udp6_pseudo;
+
+ frame_length = (u_int)(sizeof(*eh_p) + sizeof(*ip6_udp)) + data_len;
+ if (buf_len < frame_length) {
+ return 0;
+ }
+
+ /* determine frame offsets */
+ eh_p = (ether_header_t *)buf;
+ ip6_udp = (ip6_udp_header_t *)(void *)(eh_p + 1);
+ udp6_pseudo = (udp6_pseudo_hdr_t *)(void *)
+ (((char *)&ip6_udp->udp) - sizeof(*udp6_pseudo));
+ payload = (char *)(eh_p + 1) + sizeof(*ip6_udp);
+
+ /* ethernet_header */
+ bcopy(src, eh_p->ether_shost, ETHER_ADDR_LEN);
+ bcopy(dst, eh_p->ether_dhost, ETHER_ADDR_LEN);
+ eh_p->ether_type = htons(ETHERTYPE_IPV6);
+
+ /* copy the data */
+ bcopy(data, payload, data_len);
+
+ /* fill in UDP pseudo header (gets overwritten by IP header below) */
+ bcopy(src_ip, &udp6_pseudo->src_ip, sizeof(*src_ip));
+ bcopy(dst_ip, &udp6_pseudo->dst_ip, sizeof(*dst_ip));
+ udp6_pseudo->zero = 0;
+ udp6_pseudo->proto = IPPROTO_UDP;
+ udp6_pseudo->length = htons(sizeof(ip6_udp->udp) + data_len);
+
+ /* fill in UDP header */
+ ip6_udp->udp.uh_sport = htons(src_port);
+ ip6_udp->udp.uh_dport = htons(dst_port);
+ ip6_udp->udp.uh_ulen = htons(sizeof(ip6_udp->udp) + data_len);
+ ip6_udp->udp.uh_sum = 0;
+ ip6_udp->udp.uh_sum = in_cksum(udp6_pseudo, (int)(sizeof(*udp6_pseudo)
+ + sizeof(ip6_udp->udp) + data_len));
+
+ /* fill in IP header */
+ bzero(&ip6_udp->ip6, sizeof(ip6_udp->ip6));
+ ip6_udp->ip6.ip6_vfc = IPV6_VERSION;
+ ip6_udp->ip6.ip6_nxt = IPPROTO_UDP;
+ bcopy(src_ip, &ip6_udp->ip6.ip6_src, sizeof(*src_ip));
+ bcopy(dst_ip, &ip6_udp->ip6.ip6_dst, sizeof(*dst_ip));
+ ip6_udp->ip6.ip6_plen = htons(sizeof(struct udphdr) + data_len);
+ /* ip6_udp->ip6.ip6_flow = ? */
+ return frame_length;
+}
+
+static u_int
+ethernet_udp_frame_populate(void * buf, size_t buf_len,
+ uint8_t af,
+ const ether_addr_t * src,
+ union ifbrip * src_ip,
+ uint16_t src_port,
+ const ether_addr_t * dst,
+ union ifbrip * dst_ip,
+ uint16_t dst_port,
+ const void * data, u_int data_len)
+{
+ u_int len;
+
+ switch (af) {
+ case AF_INET:
+ len = ethernet_udp4_frame_populate(buf, buf_len,
+ src,
+ src_ip->ifbrip_addr,
+ src_port,
+ dst,
+ dst_ip->ifbrip_addr,
+ dst_port,
+ data, data_len);
+ break;
+ case AF_INET6:
+ len = ethernet_udp6_frame_populate(buf, buf_len,
+ src,
+ &src_ip->ifbrip_addr6,
+ src_port,
+ dst,
+ &dst_ip->ifbrip_addr6,
+ dst_port,
+ data, data_len);
+ break;
+ default:
+ T_FAIL("unrecognized address family %u", af);
+ len = 0;
+ break;
+ }
+ return len;
+}
+
+static u_int
+ethernet_arp_frame_populate(void * buf, u_int buf_len,
+ uint16_t op,
+ const ether_addr_t * sender_hw,
+ struct in_addr sender_ip,
+ const ether_addr_t * target_hw,
+ struct in_addr target_ip)
+{
+ ether_header_t * eh_p;
+ struct ether_arp * earp;
+ struct arphdr * arp_p;
+ u_int frame_length;
+
+ frame_length = sizeof(*earp) + sizeof(*eh_p);
+ T_QUIET;
+ T_ASSERT_GE(buf_len, frame_length,
+ "%s buffer size %u needed %u",
+ __func__, buf_len, frame_length);
+
+ /* ethernet_header */
+ eh_p = (ether_header_t *)buf;
+ bcopy(sender_hw, eh_p->ether_shost, ETHER_ADDR_LEN);
+ if (target_hw != NULL) {
+ bcopy(target_hw, eh_p->ether_dhost,
+ sizeof(eh_p->ether_dhost));
+ } else {
+ bcopy(ðer_broadcast, eh_p->ether_dhost,
+ sizeof(eh_p->ether_dhost));
+ }
+ eh_p->ether_type = htons(ETHERTYPE_ARP);
+
+ /* ARP payload */
+ earp = (struct ether_arp *)(void *)(eh_p + 1);
+ arp_p = &earp->ea_hdr;
+ arp_p->ar_hrd = htons(ARPHRD_ETHER);
+ arp_p->ar_pro = htons(ETHERTYPE_IP);
+ arp_p->ar_hln = sizeof(earp->arp_sha);
+ arp_p->ar_pln = sizeof(struct in_addr);
+ arp_p->ar_op = htons(op);
+ bcopy(sender_hw, earp->arp_sha, sizeof(earp->arp_sha));
+ bcopy(&sender_ip, earp->arp_spa, sizeof(earp->arp_spa));
+ if (target_hw != NULL) {
+ bcopy(target_hw, earp->arp_tha, sizeof(earp->arp_tha));
+ } else {
+ bzero(earp->arp_tha, sizeof(earp->arp_tha));
+ }
+ bcopy(&target_ip, earp->arp_tpa, sizeof(earp->arp_tpa));
+ return frame_length;
+}
+
+static uint32_t G_generation;
+
+static uint32_t
+next_generation(void)
+{
+ return G_generation++;
+}
+
+static const void *
+ethernet_frame_get_udp4_payload(void * buf, u_int buf_len,
+ u_int * ret_payload_length)
+{
+ ether_header_t * eh_p;
+ uint16_t ether_type;
+ ip_udp_header_t * ip_udp;
+ u_int ip_len;
+ u_int left;
+ const void * payload = NULL;
+ u_int payload_length = 0;
+ u_int udp_len;
+
+ T_QUIET;
+ T_ASSERT_GE(buf_len, (u_int)(sizeof(*eh_p) + sizeof(*ip_udp)), NULL);
+ left = buf_len;
+ eh_p = (ether_header_t *)buf;
+ ether_type = ntohs(eh_p->ether_type);
+ T_QUIET;
+ T_ASSERT_EQ((int)ether_type, ETHERTYPE_IP, NULL);
+ ip_udp = (ip_udp_header_t *)(void *)(eh_p + 1);
+ left -= sizeof(*eh_p);
+ ip_len = ntohs(ip_udp->ip.ip_len);
+ T_QUIET;
+ T_ASSERT_GE(left, ip_len, NULL);
+ T_QUIET;
+ T_ASSERT_EQ((int)ip_udp->ip.ip_v, IPVERSION, NULL);
+ T_QUIET;
+ T_ASSERT_EQ((u_int)ip_udp->ip.ip_hl << 2, (u_int)sizeof(struct ip),
+ NULL);
+ T_QUIET;
+ T_ASSERT_EQ((int)ip_udp->ip.ip_p, IPPROTO_UDP, NULL);
+ T_QUIET;
+ T_ASSERT_GE(buf_len, (u_int)sizeof(*ip_udp), NULL);
+ udp_len = ntohs(ip_udp->udp.uh_ulen);
+ T_QUIET;
+ T_ASSERT_GE(udp_len, (u_int)sizeof(ip_udp->udp), NULL);
+ payload_length = udp_len - (int)sizeof(ip_udp->udp);
+ if (payload_length > 0) {
+ payload = (ip_udp + 1);
+ }
+ if (payload == NULL) {
+ payload_length = 0;
+ }
+ *ret_payload_length = payload_length;
+ return payload;
+}
+
+static const void *
+ethernet_frame_get_udp6_payload(void * buf, u_int buf_len,
+ u_int * ret_payload_length)
+{
+ ether_header_t * eh_p;
+ uint16_t ether_type;
+ ip6_udp_header_t * ip6_udp;
+ u_int ip6_len;
+ u_int left;
+ const void * payload = NULL;
+ u_int payload_length = 0;
+ u_int udp_len;
+
+ T_QUIET;
+ T_ASSERT_GE(buf_len, (u_int)(sizeof(*eh_p) + sizeof(*ip6_udp)), NULL);
+ left = buf_len;
+ eh_p = (ether_header_t *)buf;
+ ether_type = ntohs(eh_p->ether_type);
+ T_QUIET;
+ T_ASSERT_EQ((int)ether_type, ETHERTYPE_IPV6, NULL);
+ ip6_udp = (ip6_udp_header_t *)(void *)(eh_p + 1);
+ left -= sizeof(*eh_p);
+ ip6_len = ntohs(ip6_udp->ip6.ip6_plen);
+ T_QUIET;
+ T_ASSERT_GE(left, ip6_len + (u_int)sizeof(struct ip6_hdr), NULL);
+ T_QUIET;
+ T_ASSERT_EQ((int)(ip6_udp->ip6.ip6_vfc & IPV6_VERSION_MASK),
+ IPV6_VERSION, NULL);
+ T_QUIET;
+ T_ASSERT_EQ((int)ip6_udp->ip6.ip6_nxt, IPPROTO_UDP, NULL);
+ T_QUIET;
+ T_ASSERT_GE(buf_len, (u_int)sizeof(*ip6_udp), NULL);
+ udp_len = ntohs(ip6_udp->udp.uh_ulen);
+ T_QUIET;
+ T_ASSERT_GE(udp_len, (u_int)sizeof(ip6_udp->udp), NULL);
+ payload_length = udp_len - (int)sizeof(ip6_udp->udp);
+ if (payload_length > 0) {
+ payload = (ip6_udp + 1);
+ }
+ if (payload == NULL) {
+ payload_length = 0;
+ }
+ *ret_payload_length = payload_length;
+ return payload;
+}
+
+static const void *
+ethernet_frame_get_udp_payload(uint8_t af, void * buf, u_int buf_len,
+ u_int * ret_payload_length)
+{
+ const void * payload;
+
+ switch (af) {
+ case AF_INET:
+ payload = ethernet_frame_get_udp4_payload(buf, buf_len,
+ ret_payload_length);
+ break;
+ case AF_INET6:
+ payload = ethernet_frame_get_udp6_payload(buf, buf_len,
+ ret_payload_length);
+ break;
+ default:
+ T_FAIL("unrecognized address family %u", af);
+ payload = NULL;
+ break;
+ }
+ return payload;
+}
+
+#define MIN_ICMP6_LEN ((u_int)(sizeof(ether_header_t) + \
+ sizeof(struct ip6_hdr) + \
+ sizeof(struct icmp6_hdr)))
+#define ALIGNED_ND_OPT_LEN 8
+#define SET_ND_OPT_LEN(a) (u_int)((a) >> 3)
+#define GET_ND_OPT_LEN(a) (u_int)((a) << 3)
+#define ALIGN_ND_OPT(a) (u_int)roundup(a, ALIGNED_ND_OPT_LEN)
+#define LINKADDR_OPT_LEN (ALIGN_ND_OPT(sizeof(struct nd_opt_hdr) + \
+ sizeof(ether_addr_t)))
+#define ETHER_IPV6_LEN (sizeof(*eh_p) + sizeof(*ip6))
+
+
+
+static u_int
+ethernet_nd6_frame_populate(void * buf, u_int buf_len,
+ uint8_t type,
+ const ether_addr_t * sender_hw,
+ struct in6_addr * sender_ip,
+ const ether_addr_t * dest_ether,
+ const ether_addr_t * target_hw,
+ struct in6_addr * target_ip)
+{
+ u_int data_len = 0;
+ ether_header_t * eh_p;
+ u_int frame_length;
+ struct icmp6_hdr * icmp6;
+ struct ip6_hdr * ip6;
+ struct nd_opt_hdr * nd_opt;
+
+ switch (type) {
+ case ND_ROUTER_SOLICIT:
+ case ND_NEIGHBOR_ADVERT:
+ case ND_NEIGHBOR_SOLICIT:
+ break;
+ default:
+ T_FAIL("%s: unsupported type %u", __func__, type);
+ return 0;
+ }
+
+ T_QUIET;
+ T_ASSERT_GE(buf_len, MIN_ICMP6_LEN, NULL);
+
+ eh_p = (ether_header_t *)buf;
+ ip6 = (struct ip6_hdr *)(void *)(eh_p + 1);
+ icmp6 = (struct icmp6_hdr *)(void *)(ip6 + 1);
+ frame_length = sizeof(*eh_p) + sizeof(*ip6);
+ switch (type) {
+ case ND_NEIGHBOR_SOLICIT: {
+ struct nd_neighbor_solicit * nd_ns;
+ bool sender_is_specified;
+
+ sender_is_specified = !IN6_IS_ADDR_UNSPECIFIED(sender_ip);
+ data_len = sizeof(*nd_ns);
+ if (sender_is_specified) {
+ data_len += LINKADDR_OPT_LEN;
+ }
+ frame_length += data_len;
+ T_QUIET;
+ T_ASSERT_GE(buf_len, frame_length, NULL);
+ nd_ns = (struct nd_neighbor_solicit *)(void *)icmp6;
+ if (sender_is_specified) {
+ /* add the source lladdr option */
+ nd_opt = (struct nd_opt_hdr *)(nd_ns + 1);
+ nd_opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR;
+ nd_opt->nd_opt_len = SET_ND_OPT_LEN(LINKADDR_OPT_LEN);
+ bcopy(sender_hw, (nd_opt + 1), sizeof(*sender_hw));
+ }
+ bcopy(target_ip, &nd_ns->nd_ns_target,
+ sizeof(nd_ns->nd_ns_target));
+ break;
+ }
+ case ND_NEIGHBOR_ADVERT: {
+ struct nd_neighbor_advert * nd_na;
+
+ data_len = sizeof(*nd_na) + LINKADDR_OPT_LEN;
+ frame_length += data_len;
+ T_QUIET;
+ T_ASSERT_GE(buf_len, frame_length, NULL);
+
+ nd_na = (struct nd_neighbor_advert *)(void *)icmp6;
+ bcopy(target_ip, &nd_na->nd_na_target,
+ sizeof(nd_na->nd_na_target));
+ /* add the target lladdr option */
+ nd_opt = (struct nd_opt_hdr *)(nd_na + 1);
+ nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
+ nd_opt->nd_opt_len = SET_ND_OPT_LEN(LINKADDR_OPT_LEN);
+ bcopy(target_hw, (nd_opt + 1), sizeof(*target_hw));
+ break;
+ }
+ case ND_ROUTER_SOLICIT: {
+ struct nd_router_solicit * nd_rs;
+
+ data_len = sizeof(*nd_rs) + LINKADDR_OPT_LEN;
+ frame_length += data_len;
+ T_QUIET;
+ T_ASSERT_GE(buf_len, frame_length, NULL);
+
+ nd_rs = (struct nd_router_solicit *)(void *)icmp6;
+
+ /* add the source lladdr option */
+ nd_opt = (struct nd_opt_hdr *)(nd_rs + 1);
+ nd_opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR;
+ nd_opt->nd_opt_len = SET_ND_OPT_LEN(LINKADDR_OPT_LEN);
+ bcopy(sender_hw, (nd_opt + 1), sizeof(*sender_hw));
+ break;
+ }
+ default:
+ T_FAIL("%s: unsupported type %u", __func__, type);
+ return 0;
+ }
+ /* icmp6 header */
+ icmp6->icmp6_type = type;
+ icmp6->icmp6_code = 0;
+ icmp6->icmp6_cksum = 0;
+ icmp6->icmp6_data32[0] = 0;
+
+ /* ethernet_header */
+ bcopy(sender_hw, eh_p->ether_shost, ETHER_ADDR_LEN);
+ if (dest_ether != NULL) {
+ bcopy(dest_ether, eh_p->ether_dhost,
+ sizeof(eh_p->ether_dhost));
+ } else {
+ /* XXX ether_dhost should be multicast */
+ bcopy(ðer_broadcast, eh_p->ether_dhost,
+ sizeof(eh_p->ether_dhost));
+ }
+ eh_p->ether_type = htons(ETHERTYPE_IPV6);
+
+ /* IPv6 header */
+ bzero(ip6, sizeof(*ip6));
+ ip6->ip6_nxt = IPPROTO_ICMPV6;
+ ip6->ip6_vfc = IPV6_VERSION;
+ bcopy(sender_ip, &ip6->ip6_src, sizeof(ip6->ip6_src));
+ /* XXX ip6_dst should be specific multicast */
+ bcopy(&in6addr_linklocal_allnodes, &ip6->ip6_dst, sizeof(ip6->ip6_dst));
+ ip6->ip6_plen = htons(data_len);
+
+ return frame_length;
+}
+
+/**
+** Switch port
+**/
+static void
+switch_port_check_tx(switch_port_t port)
+{
+ int error;
+ struct kevent kev;
+ int kq;
+ struct timespec ts = { .tv_sec = 0, .tv_nsec = 1000 * 1000};
+
+ kq = kqueue();
+ T_QUIET;
+ T_ASSERT_POSIX_SUCCESS(kq, "kqueue check_tx");
+ EV_SET(&kev, port->fd, EVFILT_WRITE, EV_ADD | EV_ENABLE, 0, 0, NULL);
+ error = kevent(kq, &kev, 1, &kev, 1, &ts);
+ T_QUIET;
+ T_ASSERT_EQ(error, 1, "kevent");
+ T_QUIET;
+ T_ASSERT_EQ((int)kev.filter, EVFILT_WRITE, NULL);
+ T_QUIET;
+ T_ASSERT_EQ((int)kev.ident, port->fd, NULL);
+ T_QUIET;
+ T_ASSERT_NULL(kev.udata, NULL);
+ close(kq);
+ return;
+}
+
+static void
+switch_port_send_arp(switch_port_t port,
+ uint16_t op,
+ const ether_addr_t * sender_hw,
+ struct in_addr sender_ip,
+ const ether_addr_t * target_hw,
+ struct in_addr target_ip)
+{
+ u_int frame_length;
+ ether_packet pkt;
+ ssize_t n;
+
+ /* make sure we can send */
+ switch_port_check_tx(port);
+ frame_length = ethernet_arp_frame_populate(&pkt, sizeof(pkt),
+ op,
+ sender_hw,
+ sender_ip,
+ target_hw,
+ target_ip);
+ T_QUIET;
+ T_ASSERT_GT(frame_length, 0, "%s: frame_length %u",
+ __func__, frame_length);
+ if (S_debug) {
+ T_LOG("Port %s -> %s transmitting %u bytes",
+ port->ifname, port->member_ifname, frame_length);
+ }
+ ethernet_frame_validate(&pkt, frame_length, S_debug);
+ n = write(port->fd, &pkt, frame_length);
+ if (n < 0) {
+ T_ASSERT_POSIX_SUCCESS(n, "%s write fd %d failed %ld",
+ port->ifname, port->fd, n);
+ }
+ T_QUIET;
+ T_ASSERT_EQ((u_int)n, frame_length,
+ "%s fd %d wrote %ld",
+ port->ifname, port->fd, n);
+}
+
+
+static void
+switch_port_send_nd6(switch_port_t port,
+ uint8_t type,
+ const ether_addr_t * sender_hw,
+ struct in6_addr * sender_ip,
+ const ether_addr_t * dest_ether,
+ const ether_addr_t * target_hw,
+ struct in6_addr * target_ip)
+{
+ u_int frame_length;
+ ether_packet pkt;
+ ssize_t n;
+
+ /* make sure we can send */
+ switch_port_check_tx(port);
+ frame_length = ethernet_nd6_frame_populate(&pkt, sizeof(pkt),
+ type,
+ sender_hw,
+ sender_ip,
+ dest_ether,
+ target_hw,
+ target_ip);
+ T_QUIET;
+ T_ASSERT_GT(frame_length, 0, "%s: frame_length %u",
+ __func__, frame_length);
+ if (S_debug) {
+ T_LOG("Port %s -> %s transmitting %u bytes",
+ port->ifname, port->member_ifname, frame_length);
+ }
+ ethernet_frame_validate(&pkt, frame_length, S_debug);
+ n = write(port->fd, &pkt, frame_length);
+ if (n < 0) {
+ T_ASSERT_POSIX_SUCCESS(n, "%s write fd %d failed %ld",
+ port->ifname, port->fd, n);
+ }
+ T_QUIET;
+ T_ASSERT_EQ((u_int)n, frame_length,
+ "%s fd %d wrote %ld",
+ port->ifname, port->fd, n);
+}
+
+
+static void
+switch_port_send_udp(switch_port_t port,
+ uint8_t af,
+ const ether_addr_t * src_eaddr,
+ union ifbrip * src_ip,
+ uint16_t src_port,
+ const ether_addr_t * dst_eaddr,
+ union ifbrip * dst_ip,
+ uint16_t dst_port,
+ const void * payload, u_int payload_length)
+{
+ u_int frame_length;
+ ether_packet pkt;
+ ssize_t n;
+
+ /* make sure we can send */
+ switch_port_check_tx(port);
+
+ /* generate the packet */
+ frame_length
+ = ethernet_udp_frame_populate((void *)&pkt,
+ (u_int)sizeof(pkt),
+ af,
+ src_eaddr,
+ src_ip,
+ src_port,
+ dst_eaddr,
+ dst_ip,
+ dst_port,
+ payload,
+ payload_length);
+ T_QUIET;
+ T_ASSERT_GT(frame_length, 0, NULL);
+ if (S_debug) {
+ T_LOG("Port %s transmitting %u bytes",
+ port->ifname, frame_length);
+ }
+ ethernet_frame_validate(&pkt, frame_length, S_debug);
+ n = write(port->fd, &pkt, frame_length);
+ if (n < 0) {
+ T_ASSERT_POSIX_SUCCESS(n, "%s write fd %d failed %ld",
+ port->ifname, port->fd, n);
+ }
+ T_QUIET;
+ T_ASSERT_EQ((u_int)n, frame_length,
+ "%s fd %d wrote %ld",
+ port->ifname, port->fd, n);
+}
+
+
+
+static void
+switch_port_send_udp_addr_index(switch_port_t port,
+ uint8_t af,
+ u_int addr_index,
+ const ether_addr_t * dst_eaddr,
+ union ifbrip * dst_ip,
+ const void * payload, u_int payload_length)
+{
+ ether_addr_t eaddr;
+ union ifbrip ip;
+
+ /* generate traffic for the unit and address */
+ set_ethernet_address(&eaddr, port->unit, addr_index);
+ get_ip_address(af, port->unit, addr_index, &ip);
+ switch_port_send_udp(port, af,
+ &eaddr, &ip, TEST_SOURCE_PORT,
+ dst_eaddr, dst_ip, TEST_DEST_PORT,
+ payload, payload_length);
+}
+
+typedef void
+(packet_validator)(switch_port_t port, const ether_header_t * eh_p,
+ u_int pkt_len, void * context);
+typedef packet_validator * packet_validator_t;
+
+static void
+switch_port_receive(switch_port_t port,
+ uint8_t af,
+ const void * payload, u_int payload_length,
+ packet_validator_t validator,
+ void * context)
+{
+ ether_header_t * eh_p;
+ ssize_t n;
+ char * offset;
+
+ n = read(port->fd, port->rx_buf, (unsigned)port->rx_buf_size);
+ if (n < 0) {
+ if (errno == EAGAIN) {
+ return;
+ }
+ T_QUIET;
+ T_ASSERT_POSIX_SUCCESS(n, "read %s port %d fd %d",
+ port->ifname, port->unit, port->fd);
+ return;
+ }
+ for (offset = port->rx_buf; n > 0;) {
+ struct bpf_hdr * bpf = (struct bpf_hdr *)(void *)offset;
+ u_int pkt_len;
+ char * pkt;
+ u_int skip;
+
+ pkt = offset + bpf->bh_hdrlen;
+ pkt_len = bpf->bh_caplen;
+
+ eh_p = (ether_header_t *)(void *)pkt;
+ T_QUIET;
+ T_ASSERT_GE(pkt_len, (u_int)sizeof(*eh_p),
+ "short packet %ld", n);
+
+ /* source shouldn't be broadcast/multicast */
+ T_QUIET;
+ T_ASSERT_EQ(eh_p->ether_shost[0] & 0x01, 0,
+ "broadcast/multicast source");
+
+ if (S_debug) {
+ T_LOG("Port %s [unit %d] [fd %d] Received %u bytes",
+ port->ifname, port->unit, port->fd, pkt_len);
+ }
+ ethernet_frame_validate(pkt, pkt_len, S_debug);
+
+ /* call the validation function */
+ (*validator)(port, eh_p, pkt_len, context);
+
+ if (payload != NULL) {
+ const void * p;
+ u_int p_len;
+
+ p = ethernet_frame_get_udp_payload(af, pkt, pkt_len,
+ &p_len);
+ T_QUIET;
+ T_ASSERT_NOTNULL(p, "ethernet_frame_get_udp_payload");
+ T_QUIET;
+ T_ASSERT_EQ(p_len, payload_length,
+ "payload length %u < expected %u",
+ p_len, payload_length);
+ T_QUIET;
+ T_ASSERT_EQ(bcmp(payload, p, payload_length), 0,
+ "unexpected payload");
+ }
+ skip = BPF_WORDALIGN(pkt_len + bpf->bh_hdrlen);
+ if (skip == 0) {
+ break;
+ }
+ offset += skip;
+ n -= skip;
+ }
+ return;
+}
+
+static void
+switch_port_log(switch_port_t port)
+{
+ T_LOG("%s [unit %d] [member %s]%s bpf fd %d bufsize %d\n",
+ port->ifname, port->unit,
+ port->member_ifname,
+ port->mac_nat ? " [mac-nat]" : "",
+ port->fd, port->rx_buf_size);
+}
+
+#define switch_port_list_size(port_count) \
+ offsetof(switch_port_list, list[port_count])
+
+static switch_port_list_t
+switch_port_list_alloc(u_int port_count, bool mac_nat)
+{
+ switch_port_list_t list;
+
+ list = (switch_port_list_t)
+ calloc(1, switch_port_list_size(port_count));;
+ list->size = port_count;
+ list->mac_nat = mac_nat;
+ return list;
+}
+
+static void
+switch_port_list_dealloc(switch_port_list_t list)
+{
+ u_int i;
+ switch_port_t port;
+
+ for (i = 0, port = list->list; i < list->count; i++, port++) {
+ close(port->fd);
+ free(port->rx_buf);
+ }
+ free(list);
+ return;
+}
+
+static errno_t
+switch_port_list_add_port(switch_port_list_t port_list, u_int unit,
+ const char * ifname, const char * member_ifname,
+ ether_addr_t * member_mac,
+ u_int num_addrs, bool mac_nat)
+{
+ int buf_size;
+ errno_t err = EINVAL;
+ int fd = -1;
+ int opt;
+ switch_port_t p;
+
+ if (port_list->count >= port_list->size) {
+ T_LOG("Internal error: port_list count %u >= size %u\n",
+ port_list->count, port_list->size);
+ goto failed;
+ }
+ fd = bpf_new();
+ if (fd < 0) {
+ err = errno;
+ T_LOG("bpf_new");
+ goto failed;
+ }
+ opt = 1;
+ T_QUIET;
+ T_ASSERT_POSIX_SUCCESS(ioctl(fd, FIONBIO, &opt), NULL);
+ T_QUIET;
+ T_ASSERT_POSIX_SUCCESS(bpf_set_immediate(fd, 1), NULL);
+ T_QUIET;
+ T_ASSERT_POSIX_SUCCESS(bpf_setif(fd, ifname), "bpf set if %s",
+ ifname);
+ T_QUIET;
+ T_ASSERT_POSIX_SUCCESS(bpf_set_see_sent(fd, 0), NULL);
+ T_QUIET;
+ T_ASSERT_POSIX_SUCCESS(bpf_set_header_complete(fd, 1), NULL);
+ T_QUIET;
+ T_ASSERT_POSIX_SUCCESS(bpf_get_blen(fd, &buf_size), NULL);
+ if (S_debug) {
+ T_LOG("%s [unit %d] [member %s] bpf fd %d bufsize %d\n",
+ ifname, unit,
+ member_ifname, fd, buf_size);
+ }
+ p = port_list->list + port_list->count++;
+ p->fd = fd;
+ p->unit = unit;
+ strlcpy(p->ifname, ifname, sizeof(p->ifname));
+ strlcpy(p->member_ifname, member_ifname, sizeof(p->member_ifname));
+ p->num_addrs = num_addrs;
+ p->rx_buf_size = buf_size;
+ p->rx_buf = malloc((unsigned)buf_size);
+ p->mac_nat = mac_nat;
+ p->member_mac = *member_mac;
+ return 0;
+
+failed:
+ if (fd >= 0) {
+ close(fd);
+ }
+ return err;
+}
+
+static switch_port_t
+switch_port_list_find_fd(switch_port_list_t ports, int fd)
+{
+ u_int i;
+ switch_port_t port;
+
+ for (i = 0, port = ports->list; i < ports->count; i++, port++) {
+ if (port->fd == fd) {
+ return port;
+ }
+ }
+ return NULL;
+}
+
+static void
+switch_port_list_log(switch_port_list_t port_list)
+{
+ u_int i;
+ switch_port_t port;
+
+ for (i = 0, port = port_list->list; i < port_list->count; i++, port++) {
+ switch_port_log(port);
+ }
+ return;
+}
+
+static switch_port_t
+switch_port_list_find_member(switch_port_list_t ports, const char * member_ifname)
+{
+ u_int i;
+ switch_port_t port;
+
+ for (i = 0, port = ports->list; i < ports->count; i++, port++) {
+ if (strcmp(port->member_ifname, member_ifname) == 0) {
+ return port;
+ }
+ }
+ return NULL;
+}
+
+static void
+switch_port_list_check_receive(switch_port_list_t ports, uint8_t af,
+ const void * payload, u_int payload_length,
+ packet_validator_t validator,
+ void * context)
+{
+ int i;
+ int n_events;
+ struct kevent kev[ports->count];
+ int kq;
+ switch_port_t port;
+ struct timespec ts = { .tv_sec = 0, .tv_nsec = 10 * 1000 * 1000};
+ u_int u;
+
+ kq = kqueue();
+ T_QUIET;
+ T_ASSERT_POSIX_SUCCESS(kq, "kqueue check_receive");
+ for (u = 0, port = ports->list; u < ports->count; u++, port++) {
+ port->test_count = 0;
+ EV_SET(kev + u, port->fd,
+ EVFILT_READ, EV_ADD | EV_ENABLE, 0, 0, NULL);
+ }
+
+ do {
+ n_events = kevent(kq, kev, (int)ports->count, kev,
+ (int)ports->count, &ts);
+ T_QUIET;
+ T_ASSERT_POSIX_SUCCESS(n_events, "kevent receive %d", n_events);
+ for (i = 0; i < n_events; i++) {
+ T_QUIET;
+ T_ASSERT_EQ((int)kev[i].filter, EVFILT_READ, NULL);
+ T_QUIET;
+ T_ASSERT_NULL(kev[i].udata, NULL);
+ port = switch_port_list_find_fd(ports,
+ (int)kev[i].ident);
+ T_QUIET;
+ T_ASSERT_NE(port, NULL,
+ "port %p fd %d", (void *)port,
+ (int)kev[i].ident);
+ switch_port_receive(port, af, payload, payload_length,
+ validator, context);
+ }
+ } while (n_events != 0);
+ close(kq);
+}
+
+static bool
+switch_port_list_verify_rt_table(switch_port_list_t port_list, bool log)
+{
+ bool all_present = true;
+ u_int i;
+ u_int count;
+ struct ifbareq *ifba;
+ struct ifbareq *rt_table;
+ switch_port_t port;
+
+ /* clear out current notion of how many addresses are present */
+ for (i = 0, port = port_list->list; i < port_list->count; i++, port++) {
+ port->test_address_count = 0;
+ port->test_address_present = 0;
+ }
+ rt_table = bridge_rt_table_copy(&count);
+ if (rt_table == NULL) {
+ return false;
+ }
+ if (log) {
+ bridge_rt_table_log(rt_table, count);
+ }
+ for (i = 0, ifba = rt_table; i < count; i++, ifba++) {
+ uint64_t addr_bit;
+ u_int addr_index;
+ u_int unit_index;
+ u_char * ea;
+ ether_addr_t * eaddr;
+
+ eaddr = (ether_addr_t *)&ifba->ifba_dst;
+ ea = eaddr->octet;
+ addr_index = ea[EA_ADDR_INDEX];
+ unit_index = ea[EA_UNIT_INDEX];
+ port = switch_port_list_find_member(port_list,
+ ifba->ifba_ifsname);
+ T_QUIET;
+ T_ASSERT_NOTNULL(port, "switch_port_list_find_member %s",
+ ifba->ifba_ifsname);
+ if (!S_cleaning_up) {
+ T_QUIET;
+ T_ASSERT_EQ(unit_index, port->unit, NULL);
+ addr_bit = 1 << addr_index;
+ T_QUIET;
+ T_ASSERT_BITS_NOTSET(port->test_address_present,
+ addr_bit, "%s address %u",
+ ifba->ifba_ifsname, addr_index);
+ port->test_address_present |= addr_bit;
+ port->test_address_count++;
+ }
+ }
+ for (i = 0, port = port_list->list; i < port_list->count; i++, port++) {
+ if (S_debug) {
+ T_LOG("%s unit %d [member %s] %u expect %u",
+ port->ifname, port->unit, port->member_ifname,
+ port->test_address_count, port->num_addrs);
+ }
+ if (port->test_address_count != port->num_addrs) {
+ all_present = false;
+ }
+ }
+
+ free(rt_table);
+ return all_present;
+}
+
+static bool
+switch_port_list_verify_mac_nat(switch_port_list_t port_list, bool log)
+{
+ bool all_present = true;
+ u_int i;
+ u_int count;
+ static struct ifbrmne * entries;
+ switch_port_t port;
+ struct ifbrmne * scan;
+
+
+ /* clear out current notion of how many addresses are present */
+ for (i = 0, port = port_list->list; i < port_list->count; i++, port++) {
+ port->test_address_count = 0;
+ port->test_address_present = 0;
+ }
+ entries = bridge_mac_nat_entries_copy(&count);
+ if (entries == NULL) {
+ return false;
+ }
+ if (log) {
+ bridge_mac_nat_entries_log(entries, count);
+ }
+ for (i = 0, scan = entries; i < count; i++, scan++) {
+ uint8_t af;
+ uint64_t addr_bit;
+ u_int addr_index;
+ char buf_ip1[INET6_ADDRSTRLEN];
+ char buf_ip2[INET6_ADDRSTRLEN];
+ u_char * ea;
+ ether_addr_t * eaddr;
+ union ifbrip ip;
+ u_int unit_index;
+
+ eaddr = (ether_addr_t *)&scan->ifbmne_mac;
+ ea = eaddr->octet;
+ addr_index = ea[EA_ADDR_INDEX];
+ unit_index = ea[EA_UNIT_INDEX];
+ port = switch_port_list_find_member(port_list,
+ scan->ifbmne_ifname);
+ T_QUIET;
+ T_ASSERT_NOTNULL(port,
+ "switch_port_list_find_member %s",
+ scan->ifbmne_ifname);
+ T_QUIET;
+ T_ASSERT_EQ(unit_index, port->unit, NULL);
+ af = scan->ifbmne_af;
+ get_ip_address(af, port->unit, addr_index, &ip);
+ addr_bit = 1 << addr_index;
+ T_QUIET;
+ T_ASSERT_TRUE(ip_addresses_are_equal(af, &ip, &scan->ifbmne_ip),
+ "mac nat entry IP address %s expected %s",
+ inet_ntop(af, &scan->ifbmne_ip_addr,
+ buf_ip1, sizeof(buf_ip1)),
+ inet_ntop(af, &ip,
+ buf_ip2, sizeof(buf_ip2)));
+ T_QUIET;
+ T_ASSERT_BITS_NOTSET(port->test_address_present,
+ addr_bit, "%s address %u",
+ scan->ifbmne_ifname, addr_index);
+ port->test_address_present |= addr_bit;
+ port->test_address_count++;
+ }
+ for (i = 0, port = port_list->list; i < port_list->count; i++, port++) {
+ if (port->mac_nat) {
+ /* MAC-NAT interface should have no entries */
+ T_QUIET;
+ T_ASSERT_EQ(port->test_address_count, 0,
+ "mac nat interface %s has %u entries",
+ port->member_ifname,
+ port->test_address_count);
+ } else {
+ if (S_debug) {
+ T_LOG("%s unit %d [member %s] %u expect %u",
+ port->ifname, port->unit,
+ port->member_ifname,
+ port->test_address_count, port->num_addrs);
+ }
+ if (port->test_address_count != port->num_addrs) {
+ all_present = false;
+ }
+ }
+ }
+
+ free(entries);
+
+ return all_present;
+}
+
+/**
+** Basic Bridge Tests
+**/
+static void
+send_generation(switch_port_t port, uint8_t af, u_int addr_index,
+ const ether_addr_t * dst_eaddr, union ifbrip * dst_ip,
+ uint32_t generation)
+{
+ uint32_t payload;
+
+ payload = htonl(generation);
+ switch_port_send_udp_addr_index(port, af, addr_index, dst_eaddr, dst_ip,
+ &payload, sizeof(payload));
+}
+
+static void
+check_receive_generation(switch_port_list_t ports, uint8_t af,
+ uint32_t generation, packet_validator_t validator,
+ __unused void * context)
+{
+ uint32_t payload;
+
+ payload = htonl(generation);
+ switch_port_list_check_receive(ports, af, &payload, sizeof(payload),
+ validator, context);
+}
+
+static void
+validate_source_ether_mismatch(switch_port_t port, const ether_header_t * eh_p)
+{
+ /* source shouldn't be our own MAC addresses */
+ T_QUIET;
+ T_ASSERT_NE(eh_p->ether_shost[EA_UNIT_INDEX], port->unit,
+ "ether source matches unit %d", port->unit);
+}
+
+static void
+validate_not_present_dhost(switch_port_t port, const ether_header_t * eh_p,
+ __unused u_int pkt_len,
+ __unused void * context)
+{
+ validate_source_ether_mismatch(port, eh_p);
+ T_QUIET;
+ T_ASSERT_EQ(bcmp(eh_p->ether_dhost, ðer_external,
+ sizeof(eh_p->ether_dhost)), 0,
+ "%s", __func__);
+ port->test_count++;
+}
+
+static void
+validate_broadcast_dhost(switch_port_t port, const ether_header_t * eh_p,
+ __unused u_int pkt_len,
+ __unused void * context)
+{
+ validate_source_ether_mismatch(port, eh_p);
+ T_QUIET;
+ T_ASSERT_NE((eh_p->ether_dhost[0] & 0x01), 0,
+ "%s", __func__);
+ port->test_count++;
+}
+
+static void
+validate_port_dhost(switch_port_t port, const ether_header_t * eh_p,
+ __unused u_int pkt_len,
+ __unused void * context)
+{
+ validate_source_ether_mismatch(port, eh_p);
+ T_QUIET;
+ T_ASSERT_EQ(eh_p->ether_dhost[EA_UNIT_INDEX], port->unit,
+ "wrong dhost unit %d != %d",
+ eh_p->ether_dhost[EA_UNIT_INDEX], port->unit);
+ port->test_count++;
+}
+
+
+static void
+check_received_count(switch_port_list_t port_list,
+ switch_port_t port, uint32_t expected_packets)
+{
+ u_int i;
+ switch_port_t scan;
+
+ for (i = 0, scan = port_list->list; i < port_list->count; i++, scan++) {
+ if (scan == port) {
+ T_QUIET;
+ T_ASSERT_EQ(port->test_count, 0,
+ "unexpected receive on port %d",
+ port->unit);
+ } else if (expected_packets == ALL_ADDRS) {
+ T_QUIET;
+ T_ASSERT_EQ(scan->test_count, scan->num_addrs,
+ "didn't receive on all addrs");
+ } else {
+ T_QUIET;
+ T_ASSERT_EQ(scan->test_count, expected_packets,
+ "wrong receive count on port %s", scan->member_ifname);
+ }
+ }
+}
+
+static void
+unicast_send_all(switch_port_list_t port_list, uint8_t af, switch_port_t port)
+{
+ u_int i;
+ switch_port_t scan;
+
+ for (i = 0, scan = port_list->list; i < port_list->count; i++, scan++) {
+ if (S_debug) {
+ T_LOG("Unicast send on %s", port->ifname);
+ }
+ for (u_int j = 0; j < scan->num_addrs; j++) {
+ ether_addr_t eaddr;
+ union ifbrip ip;
+
+ set_ethernet_address(&eaddr, scan->unit, j);
+ get_ip_address(af, scan->unit, j, &ip);
+ switch_port_send_udp_addr_index(port, af, 0, &eaddr, &ip,
+ NULL, 0);
+ }
+ }
+}
+
+
+static void
+bridge_learning_test_once(switch_port_list_t port_list,
+ uint8_t af,
+ packet_validator_t validator,
+ void * context,
+ const ether_addr_t * dst_eaddr,
+ bool retry)
+{
+ u_int i;
+ union ifbrip dst_ip;
+ switch_port_t port;
+
+ get_broadcast_ip_address(af, &dst_ip);
+ for (i = 0, port = port_list->list; i < port_list->count; i++, port++) {
+ if (port->test_address_count == port->num_addrs) {
+ /* already populated */
+ continue;
+ }
+ if (S_debug) {
+ T_LOG("Sending on %s", port->ifname);
+ }
+ for (u_int j = 0; j < port->num_addrs; j++) {
+ uint32_t generation;
+
+ if (retry) {
+ uint64_t addr_bit;
+
+ addr_bit = 1 << j;
+ if ((port->test_address_present & addr_bit)
+ != 0) {
+ /* already present */
+ continue;
+ }
+ T_LOG("Retry port %s unit %u address %u",
+ port->ifname, port->unit, j);
+ }
+ generation = next_generation();
+ send_generation(port,
+ af,
+ j,
+ dst_eaddr,
+ &dst_ip,
+ generation);
+
+ /* receive across all ports */
+ check_receive_generation(port_list,
+ af,
+ generation,
+ validator,
+ context);
+
+ /* ensure that every port saw the packet */
+ check_received_count(port_list, port, 1);
+ }
+ }
+ return;
+}
+
+static inline const char *
+af_get_str(uint8_t af)
+{
+ return (af == AF_INET) ? "IPv4" : "IPv6";
+}
+
+static void
+bridge_learning_test(switch_port_list_t port_list,
+ uint8_t af,
+ packet_validator_t validator,
+ void * context,
+ const ether_addr_t * dst_eaddr)
+{
+ char ntoabuf[ETHER_NTOA_BUFSIZE];
+ u_int i;
+ switch_port_t port;
+ bool verified = false;
+
+ ether_ntoa_buf(dst_eaddr, ntoabuf, sizeof(ntoabuf));
+
+ /*
+ * Send a broadcast frame from every port in the list so that the bridge
+ * learns our MAC address.
+ */
+#define BROADCAST_MAX_TRIES 20
+ for (int try = 1; try < BROADCAST_MAX_TRIES; try++) {
+ bool retry = (try > 1);
+
+ if (!retry) {
+ T_LOG("%s: %s #ports %u #addrs %u dest %s",
+ __func__,
+ af_get_str(af),
+ port_list->count, port_list->list->num_addrs,
+ ntoabuf);
+ } else {
+ T_LOG("%s: %s #ports %u #addrs %u dest %s (TRY=%d)",
+ __func__,
+ af_get_str(af),
+ port_list->count, port_list->list->num_addrs,
+ ntoabuf, try);
+ }
+ bridge_learning_test_once(port_list, af, validator, context,
+ dst_eaddr, retry);
+ /*
+ * In the event of a memory allocation failure, it's possible
+ * that the address was not learned. Figure out whether
+ * all addresses are present, and if not, we'll retry on
+ * those that are not present.
+ */
+ verified = switch_port_list_verify_rt_table(port_list, false);
+ if (verified) {
+ break;
+ }
+ /* wait a short time to allow the system to recover */
+ usleep(100 * 1000);
+ }
+ T_QUIET;
+ T_ASSERT_TRUE(verified, "All addresses present");
+
+ /*
+ * Since we just broadcast on every port in the switch, the bridge knows
+ * the port's MAC addresses. The bridge should not need to broadcast the
+ * packet to learn, which means the unicast traffic should only arrive
+ * on the intended port.
+ */
+ for (i = 0, port = port_list->list; i < port_list->count; i++, port++) {
+ /* send unicast packets to every other port's MAC addresses */
+ unicast_send_all(port_list, af, port);
+
+ /* receive all of that generated traffic */
+ switch_port_list_check_receive(port_list, af, NULL, 0,
+ validate_port_dhost, NULL);
+ /* check that we saw all of the unicast packets */
+ check_received_count(port_list, port, ALL_ADDRS);
+ }
+ T_PASS("%s", __func__);
+}
+
+/**
+** MAC-NAT tests
+**/
+static void
+mac_nat_check_received_count(switch_port_list_t port_list, switch_port_t port)
+{
+ u_int i;
+ switch_port_t scan;
+
+ for (i = 0, scan = port_list->list; i < port_list->count; i++, scan++) {
+ u_int expected = 0;
+
+ if (scan == port) {
+ expected = scan->num_addrs;
+ }
+ T_QUIET;
+ T_ASSERT_EQ(scan->test_count, expected,
+ "%s [member %s]%s expected %u actual %u",
+ scan->ifname, scan->member_ifname,
+ scan->mac_nat ? " [mac-nat]" : "",
+ expected, scan->test_count);
+ }
+}
+
+static void
+validate_mac_nat(switch_port_t port, const ether_header_t * eh_p,
+ __unused u_int pkt_len,
+ __unused void * context)
+{
+ if (port->mac_nat) {
+ bool equal;
+
+ /* source must match MAC-NAT interface */
+ equal = (bcmp(eh_p->ether_shost, &port->member_mac,
+ sizeof(port->member_mac)) == 0);
+ if (!equal) {
+ ethernet_frame_validate(eh_p, pkt_len, true);
+ }
+ T_QUIET;
+ T_ASSERT_TRUE(equal, "source address match");
+ port->test_count++;
+ } else {
+ validate_not_present_dhost(port, eh_p, pkt_len, NULL);
+ }
+}
+
+static void
+validate_mac_nat_in(switch_port_t port, const ether_header_t * eh_p,
+ u_int pkt_len, __unused void * context)
+{
+ if (S_debug) {
+ T_LOG("%s received %u bytes", port->member_ifname, pkt_len);
+ ethernet_frame_validate(eh_p, pkt_len, true);
+ }
+ T_QUIET;
+ T_ASSERT_EQ(eh_p->ether_dhost[EA_UNIT_INDEX], port->unit,
+ "dhost unit %u expected %u",
+ eh_p->ether_dhost[EA_UNIT_INDEX], port->unit);
+ port->test_count++;
+}
+
+static void
+validate_mac_nat_arp_out(switch_port_t port, const ether_header_t * eh_p,
+ u_int pkt_len, void * context)
+{
+ const struct ether_arp * earp;
+ switch_port_t send_port = (switch_port_t)context;
+
+ if (S_debug) {
+ T_LOG("%s received %u bytes", port->member_ifname, pkt_len);
+ ethernet_frame_validate(eh_p, pkt_len, true);
+ }
+ T_QUIET;
+ T_ASSERT_EQ((int)ntohs(eh_p->ether_type), (int)ETHERTYPE_ARP, NULL);
+ earp = (const struct ether_arp *)(const void *)(eh_p + 1);
+ T_QUIET;
+ T_ASSERT_GE(pkt_len, (u_int)(sizeof(*eh_p) + sizeof(*earp)), NULL);
+ if (port->mac_nat) {
+ bool equal;
+
+ /* source ethernet must match MAC-NAT interface */
+ equal = (bcmp(eh_p->ether_shost, &port->member_mac,
+ sizeof(port->member_mac)) == 0);
+ if (!equal) {
+ ethernet_frame_validate(eh_p, pkt_len, true);
+ }
+ T_QUIET;
+ T_ASSERT_TRUE(equal, "%s -> %s source address translated",
+ send_port->member_ifname,
+ port->member_ifname);
+ /* sender hw must match MAC-NAT interface */
+ equal = (bcmp(earp->arp_sha, &port->member_mac,
+ sizeof(port->member_mac)) == 0);
+ if (!equal) {
+ ethernet_frame_validate(eh_p, pkt_len, true);
+ }
+ T_QUIET;
+ T_ASSERT_TRUE(equal, "%s -> %s sender hardware translated",
+ send_port->member_ifname,
+ port->member_ifname);
+ } else {
+ /* source ethernet must match the sender */
+ T_QUIET;
+ T_ASSERT_EQ(eh_p->ether_shost[EA_UNIT_INDEX], send_port->unit,
+ "%s -> %s unit %u expected %u",
+ send_port->member_ifname,
+ port->member_ifname,
+ eh_p->ether_shost[EA_UNIT_INDEX], send_port->unit);
+ /* source hw must match the sender */
+ T_QUIET;
+ T_ASSERT_EQ(earp->arp_sha[EA_UNIT_INDEX], send_port->unit,
+ "%s -> %s unit %u expected %u",
+ send_port->member_ifname,
+ port->member_ifname,
+ earp->arp_sha[EA_UNIT_INDEX], send_port->unit);
+ }
+ port->test_count++;
+}
+
+static void
+validate_mac_nat_arp_in(switch_port_t port, const ether_header_t * eh_p,
+ u_int pkt_len, void * context)
+{
+ const struct ether_arp * earp;
+ switch_port_t send_port = (switch_port_t)context;
+
+ if (S_debug) {
+ T_LOG("%s received %u bytes", port->member_ifname, pkt_len);
+ ethernet_frame_validate(eh_p, pkt_len, true);
+ }
+ earp = (const struct ether_arp *)(const void *)(eh_p + 1);
+ T_QUIET;
+ T_ASSERT_EQ((int)ntohs(eh_p->ether_type), (int)ETHERTYPE_ARP, NULL);
+ T_QUIET;
+ T_ASSERT_GE(pkt_len, (u_int)(sizeof(*eh_p) + sizeof(*earp)), NULL);
+ T_QUIET;
+ T_ASSERT_FALSE(port->mac_nat, NULL);
+
+ /* destination ethernet must match the unit */
+ T_QUIET;
+ T_ASSERT_EQ(eh_p->ether_dhost[EA_UNIT_INDEX], port->unit,
+ "%s -> %s unit %u expected %u",
+ send_port->member_ifname,
+ port->member_ifname,
+ eh_p->ether_dhost[EA_UNIT_INDEX], port->unit);
+ /* source hw must match the sender */
+ T_QUIET;
+ T_ASSERT_EQ(earp->arp_tha[EA_UNIT_INDEX], port->unit,
+ "%s -> %s unit %u expected %u",
+ send_port->member_ifname,
+ port->member_ifname,
+ earp->arp_tha[EA_UNIT_INDEX], port->unit);
+ port->test_count++;
+}
+
+static void
+mac_nat_test_arp_out(switch_port_list_t port_list)
+{
+ u_int i;
+ struct in_addr ip_dst;
+ switch_port_t port;
+
+ ip_dst = get_external_ipv4_address();
+ for (i = 0, port = port_list->list; i < port_list->count; i++, port++) {
+ if (port->mac_nat) {
+ continue;
+ }
+ for (u_int j = 0; j < port->num_addrs; j++) {
+ ether_addr_t eaddr;
+ struct in_addr ip_src;
+
+ set_ethernet_address(&eaddr, port->unit, j);
+ get_ipv4_address(port->unit, j, &ip_src);
+ switch_port_send_arp(port,
+ ARPOP_REQUEST,
+ &eaddr,
+ ip_src,
+ NULL,
+ ip_dst);
+ switch_port_list_check_receive(port_list, AF_INET,
+ NULL, 0,
+ validate_mac_nat_arp_out,
+ port);
+ check_received_count(port_list, port, 1);
+ }
+ }
+ T_PASS("%s", __func__);
+}
+
+static void
+mac_nat_send_arp_response(switch_port_t ext_port, switch_port_t port)
+{
+ struct in_addr ip_src;
+
+ T_QUIET;
+ T_ASSERT_TRUE(ext_port->mac_nat, "%s is MAC-NAT interface",
+ ext_port->member_ifname);
+ ip_src = get_external_ipv4_address();
+ for (u_int j = 0; j < port->num_addrs; j++) {
+ struct in_addr ip_dst;
+
+ get_ipv4_address(port->unit, j, &ip_dst);
+ if (S_debug) {
+ T_LOG("Generating ARP destined to %s %s",
+ port->ifname, inet_ntoa(ip_dst));
+ }
+ switch_port_send_arp(ext_port,
+ ARPOP_REPLY,
+ ðer_external,
+ ip_src,
+ &ext_port->member_mac,
+ ip_dst);
+ }
+}
+
+static void
+mac_nat_test_arp_in(switch_port_list_t port_list)
+{
+ u_int i;
+ struct in_addr ip_src;
+ switch_port_t port;
+
+ ip_src = get_external_ipv4_address();
+ for (i = 0, port = port_list->list; i < port_list->count; i++, port++) {
+ if (port->mac_nat) {
+ continue;
+ }
+ mac_nat_send_arp_response(port_list->list, port);
+
+ /* receive the generated traffic */
+ switch_port_list_check_receive(port_list, AF_INET, NULL, 0,
+ validate_mac_nat_arp_in,
+ port_list->list);
+
+ /* verify that only the single port got the packet */
+ mac_nat_check_received_count(port_list, port);
+ }
+ T_PASS("%s", __func__);
+}
+
+static void
+validate_mac_nat_dhcp(switch_port_t port, const ether_header_t * eh_p,
+ u_int pkt_len, void * context)
+{
+ u_int dp_flags;
+ const struct bootp_packet * pkt;
+ switch_port_t send_port = (switch_port_t)context;
+
+
+ T_QUIET;
+ T_ASSERT_GE(pkt_len, (u_int)sizeof(*pkt), NULL);
+ T_QUIET;
+ T_ASSERT_EQ((int)ntohs(eh_p->ether_type), (int)ETHERTYPE_IP, NULL);
+ pkt = (const struct bootp_packet *)(const void *)(eh_p + 1);
+
+ dp_flags = ntohs(pkt->bp_bootp.bp_unused);
+ if (port->mac_nat) {
+ bool equal;
+
+ /* Broadcast bit must be set */
+ T_QUIET;
+ T_ASSERT_BITS_SET(dp_flags, (u_int)DHCP_FLAGS_BROADCAST,
+ "%s -> %s: flags 0x%x must have 0x%x",
+ send_port->member_ifname,
+ port->member_ifname,
+ dp_flags, DHCP_FLAGS_BROADCAST);
+
+ /* source must match MAC-NAT interface */
+ equal = (bcmp(eh_p->ether_shost, &port->member_mac,
+ sizeof(port->member_mac)) == 0);
+ if (!equal) {
+ ethernet_frame_validate(eh_p, pkt_len, true);
+ }
+ T_QUIET;
+ T_ASSERT_TRUE(equal, "%s -> %s source address translated",
+ send_port->member_ifname,
+ port->member_ifname);
+ } else {
+ /* Broadcast bit must not be set */
+ T_QUIET;
+ T_ASSERT_BITS_NOTSET(dp_flags, DHCP_FLAGS_BROADCAST,
+ "%s -> %s flags 0x%x must not have 0x%x",
+ send_port->member_ifname,
+ port->member_ifname,
+ dp_flags, DHCP_FLAGS_BROADCAST);
+ T_QUIET;
+ T_ASSERT_EQ(eh_p->ether_shost[EA_UNIT_INDEX], send_port->unit,
+ "%s -> %s unit %u expected %u",
+ send_port->member_ifname,
+ port->member_ifname,
+ eh_p->ether_shost[EA_UNIT_INDEX], send_port->unit);
+ }
+ port->test_count++;
+}
+
+static u_int
+make_dhcp_payload(dhcp_min_payload_t payload, ether_addr_t *eaddr)
+{
+ struct bootp * dhcp;
+ u_int payload_length;
+
+ /* create a minimal BOOTP packet */
+ payload_length = sizeof(*payload);
+ dhcp = (struct bootp *)payload;
+ bzero(dhcp, payload_length);
+ dhcp->bp_op = BOOTREQUEST;
+ dhcp->bp_htype = ARPHRD_ETHER;
+ dhcp->bp_hlen = sizeof(*eaddr);
+ bcopy(eaddr->octet, dhcp->bp_chaddr, sizeof(eaddr->octet));
+ return payload_length;
+}
+
+static void
+mac_nat_test_dhcp(switch_port_list_t port_list)
+{
+ u_int i;
+ struct in_addr ip_dst = { INADDR_BROADCAST };
+ struct in_addr ip_src = { INADDR_ANY };
+ switch_port_t port;
+
+ for (i = 0, port = port_list->list; i < port_list->count; i++, port++) {
+ ether_addr_t eaddr;
+ dhcp_min_payload payload;
+ u_int payload_len;
+
+ if (port->mac_nat) {
+ continue;
+ }
+ set_ethernet_address(&eaddr, port->unit, 0);
+ payload_len = make_dhcp_payload(&payload, &eaddr);
+ if (S_debug) {
+ T_LOG("%s: transmit DHCP packet (member %s)",
+ port->ifname, port->member_ifname);
+ }
+ switch_port_send_udp(port,
+ AF_INET,
+ &eaddr,
+ (union ifbrip *)&ip_src,
+ BOOTP_CLIENT_PORT,
+ ðer_broadcast,
+ (union ifbrip *)&ip_dst,
+ BOOTP_SERVER_PORT,
+ &payload,
+ payload_len);
+
+ switch_port_list_check_receive(port_list, AF_INET, NULL, 0,
+ validate_mac_nat_dhcp,
+ port);
+
+ check_received_count(port_list, port, 1);
+ }
+ T_PASS("%s", __func__);
+}
+
+
+static void
+validate_mac_nat_nd6(switch_port_t port,
+ const struct icmp6_hdr * icmp6,
+ u_int icmp6_len,
+ uint8_t opt_type,
+ u_int nd_hdr_size,
+ switch_port_t send_port)
+{
+ const uint8_t * linkaddr;
+ const uint8_t * ptr;
+ const struct nd_opt_hdr * nd_opt;
+ u_int nd_size;
+
+ ptr = (const uint8_t *)icmp6;
+ nd_size = nd_hdr_size + LINKADDR_OPT_LEN;
+ if (icmp6_len < nd_size) {
+ /* no LINKADDR option */
+ return;
+ }
+ nd_opt = (const struct nd_opt_hdr *)(const void *)(ptr + nd_hdr_size);
+ T_QUIET;
+ T_ASSERT_EQ(nd_opt->nd_opt_type, opt_type, NULL);
+ T_QUIET;
+ T_ASSERT_EQ(GET_ND_OPT_LEN(nd_opt->nd_opt_len), LINKADDR_OPT_LEN, NULL);
+ linkaddr = (const uint8_t *)(nd_opt + 1);
+ if (port->mac_nat) {
+ bool equal;
+
+ equal = (bcmp(linkaddr, &port->member_mac,
+ sizeof(port->member_mac)) == 0);
+ T_QUIET;
+ T_ASSERT_TRUE(equal, "%s -> %s sender hardware translated",
+ send_port->member_ifname,
+ port->member_ifname);
+ } else {
+ /* source hw must match the sender */
+ T_QUIET;
+ T_ASSERT_EQ(linkaddr[EA_UNIT_INDEX], send_port->unit,
+ "%s -> %s unit %u expected %u",
+ send_port->member_ifname,
+ port->member_ifname,
+ linkaddr[EA_UNIT_INDEX], send_port->unit);
+ }
+}
+
+static void
+validate_mac_nat_icmp6_out(switch_port_t port, const struct icmp6_hdr * icmp6,
+ u_int icmp6_len, switch_port_t send_port)
+{
+ switch (icmp6->icmp6_type) {
+ case ND_NEIGHBOR_ADVERT:
+ validate_mac_nat_nd6(port, icmp6, icmp6_len,
+ ND_OPT_TARGET_LINKADDR,
+ sizeof(struct nd_neighbor_advert),
+ send_port);
+ break;
+ case ND_NEIGHBOR_SOLICIT:
+ validate_mac_nat_nd6(port, icmp6, icmp6_len,
+ ND_OPT_SOURCE_LINKADDR,
+ sizeof(struct nd_neighbor_solicit),
+ send_port);
+ break;
+ case ND_ROUTER_SOLICIT:
+ validate_mac_nat_nd6(port, icmp6, icmp6_len,
+ ND_OPT_SOURCE_LINKADDR,
+ sizeof(struct nd_router_solicit),
+ send_port);
+ break;
+ default:
+ T_FAIL("Unsupported icmp6 type %d", icmp6->icmp6_type);
+ break;
+ }
+}
+
+static void
+validate_mac_nat_nd6_out(switch_port_t port, const ether_header_t * eh_p,
+ u_int pkt_len, void * context)
+{
+ const struct icmp6_hdr * icmp6;
+ const struct ip6_hdr * ip6;
+ switch_port_t send_port = (switch_port_t)context;
+
+ if (S_debug) {
+ T_LOG("%s received %u bytes", port->member_ifname, pkt_len);
+ ethernet_frame_validate(eh_p, pkt_len, true);
+ }
+ T_QUIET;
+ T_ASSERT_EQ(ntohs(eh_p->ether_type), (u_short)ETHERTYPE_IPV6, NULL);
+ ip6 = (const struct ip6_hdr *)(const void *)(eh_p + 1);
+ icmp6 = (const struct icmp6_hdr *)(const void *)(ip6 + 1);
+ T_QUIET;
+ T_ASSERT_GE(pkt_len, (u_int)MIN_ICMP6_LEN, NULL);
+ T_QUIET;
+ T_ASSERT_EQ(ip6->ip6_nxt, IPPROTO_ICMPV6, NULL);
+
+ /* validate the ethernet header */
+ if (port->mac_nat) {
+ bool equal;
+
+ /* source ethernet must match MAC-NAT interface */
+ equal = (bcmp(eh_p->ether_shost, &port->member_mac,
+ sizeof(port->member_mac)) == 0);
+ if (!equal) {
+ ethernet_frame_validate(eh_p, pkt_len, true);
+ }
+ T_QUIET;
+ T_ASSERT_TRUE(equal, "%s -> %s source address translated",
+ send_port->member_ifname,
+ port->member_ifname);
+ } else {
+ /* source ethernet must match the sender */
+ T_QUIET;
+ T_ASSERT_EQ(eh_p->ether_shost[EA_UNIT_INDEX], send_port->unit,
+ "%s -> %s unit %u expected %u",
+ send_port->member_ifname,
+ port->member_ifname,
+ eh_p->ether_shost[EA_UNIT_INDEX], send_port->unit);
+ }
+ /* validate the icmp6 payload */
+ validate_mac_nat_icmp6_out(port, icmp6,
+ pkt_len - ETHER_IPV6_LEN,
+ send_port);
+ port->test_count++;
+}
+
+static void
+mac_nat_test_nd6_out(switch_port_list_t port_list)
+{
+ ether_addr_t * ext_mac;
+ switch_port_t ext_port;
+ u_int i;
+ union ifbrip ip_dst;
+ switch_port_t port;
+
+ get_external_ip_address(AF_INET6, &ip_dst);
+ ext_port = port_list->list;
+ T_QUIET;
+ T_ASSERT_TRUE(ext_port->mac_nat, NULL);
+ ext_mac = &ext_port->member_mac;
+ for (i = 0, port = port_list->list; i < port_list->count; i++, port++) {
+ if (port->mac_nat) {
+ continue;
+ }
+ /* neighbor solicit */
+ for (u_int j = 0; j < port->num_addrs; j++) {
+ ether_addr_t eaddr;
+ union ifbrip ip_src;
+
+ set_ethernet_address(&eaddr, port->unit, j);
+ get_ip_address(AF_INET6, port->unit, j, &ip_src);
+ switch_port_send_nd6(port,
+ ND_NEIGHBOR_SOLICIT,
+ &eaddr,
+ &ip_src.ifbrip_addr6,
+ NULL,
+ NULL,
+ &ip_dst.ifbrip_addr6);
+ switch_port_list_check_receive(port_list, AF_INET,
+ NULL, 0,
+ validate_mac_nat_nd6_out,
+ port);
+ check_received_count(port_list, port, 1);
+ }
+ /* neighbor advert */
+ for (u_int j = 0; j < port->num_addrs; j++) {
+ ether_addr_t eaddr;
+ union ifbrip ip_src;
+
+ set_ethernet_address(&eaddr, port->unit, j);
+ get_ip_address(AF_INET6, port->unit, j, &ip_src);
+ switch_port_send_nd6(port,
+ ND_NEIGHBOR_ADVERT,
+ &eaddr,
+ &ip_src.ifbrip_addr6,
+ NULL,
+ &eaddr,
+ &ip_src.ifbrip_addr6);
+ switch_port_list_check_receive(port_list, AF_INET,
+ NULL, 0,
+ validate_mac_nat_nd6_out,
+ port);
+ check_received_count(port_list, port, 1);
+ }
+ /* router solicit */
+ for (u_int j = 0; j < port->num_addrs; j++) {
+ ether_addr_t eaddr;
+ union ifbrip ip_src;
+
+ set_ethernet_address(&eaddr, port->unit, j);
+ get_ip_address(AF_INET6, port->unit, j, &ip_src);
+ //get_ipv6ll_address(port->unit, j, &ip_src.ifbrip_addr6);
+ switch_port_send_nd6(port,
+ ND_ROUTER_SOLICIT,
+ &eaddr,
+ &ip_src.ifbrip_addr6,
+ NULL,
+ NULL,
+ NULL);
+ switch_port_list_check_receive(port_list, AF_INET,
+ NULL, 0,
+ validate_mac_nat_nd6_out,
+ port);
+ check_received_count(port_list, port, 1);
+ }
+ }
+ T_PASS("%s", __func__);
+}
+
+static void
+mac_nat_send_response(switch_port_t ext_port, uint8_t af, switch_port_t port)
+{
+ union ifbrip src_ip;
+
+ T_QUIET;
+ T_ASSERT_TRUE(ext_port->mac_nat, "%s is MAC-NAT interface",
+ ext_port->member_ifname);
+ if (S_debug) {
+ T_LOG("Generating UDP traffic destined to %s", port->ifname);
+ }
+ get_external_ip_address(af, &src_ip);
+ for (u_int j = 0; j < port->num_addrs; j++) {
+ union ifbrip ip;
+
+ get_ip_address(af, port->unit, j, &ip);
+ switch_port_send_udp(ext_port,
+ af,
+ ðer_external,
+ &src_ip,
+ TEST_DEST_PORT,
+ &ext_port->member_mac,
+ &ip,
+ TEST_SOURCE_PORT,
+ NULL, 0);
+ }
+}
+
+
+static void
+mac_nat_test_ip_once(switch_port_list_t port_list, uint8_t af, bool retry)
+{
+ union ifbrip dst_ip;
+ u_int i;
+ switch_port_t port;
+
+ get_external_ip_address(af, &dst_ip);
+ for (i = 0, port = port_list->list; i < port_list->count; i++, port++) {
+ if (port->test_address_count == port->num_addrs) {
+ /* already populated */
+ continue;
+ }
+ if (S_debug) {
+ T_LOG("Sending on %s", port->ifname);
+ }
+ for (u_int j = 0; j < port->num_addrs; j++) {
+ uint32_t generation;
+
+ if (retry) {
+ uint64_t addr_bit;
+
+ addr_bit = 1 << j;
+ if ((port->test_address_present & addr_bit)
+ != 0) {
+ /* already present */
+ continue;
+ }
+ T_LOG("Retry port %s unit %u address %u",
+ port->ifname, port->unit, j);
+ }
+
+ generation = next_generation();
+ send_generation(port,
+ af,
+ j,
+ ðer_external,
+ &dst_ip,
+ generation);
+
+ /* receive across all ports */
+ check_receive_generation(port_list,
+ af,
+ generation,
+ validate_mac_nat,
+ NULL);
+
+ /* ensure that every port saw the packet */
+ check_received_count(port_list, port, 1);
+ }
+ }
+ return;
+}
+
+static void
+mac_nat_test_ip(switch_port_list_t port_list, uint8_t af)
+{
+ u_int i;
+ switch_port_t port;
+ bool verified = false;
+
+ /*
+ * Send a packet from every port in the list so that the bridge
+ * learns the MAC addresses and IP addresses.
+ */
+#define MAC_NAT_MAX_TRIES 20
+ for (int try = 1; try < BROADCAST_MAX_TRIES; try++) {
+ bool retry = (try > 1);
+
+ if (!retry) {
+ T_LOG("%s: #ports %u #addrs %u",
+ __func__,
+ port_list->count, port_list->list->num_addrs);
+ } else {
+ T_LOG("%s: #ports %u #addrs %u destination (TRY=%d)",
+ __func__,
+ port_list->count, port_list->list->num_addrs,
+ try);
+ }
+ mac_nat_test_ip_once(port_list, af, retry);
+ /*
+ * In the event of a memory allocation failure, it's possible
+ * that the address was not learned. Figure out whether
+ * all addresses are present, and if not, we'll retry on
+ * those that are not present.
+ */
+ verified = switch_port_list_verify_mac_nat(port_list, false);
+ if (verified) {
+ break;
+ }
+ /* wait a short time to allow the system to recover */
+ usleep(100 * 1000);
+ }
+ T_QUIET;
+ T_ASSERT_TRUE(verified, "All addresses present");
+
+ /*
+ * The bridge now has an IP address <-> MAC address binding for every
+ * address on each internal interface.
+ *
+ * Generate an inbound packet on the MAC-NAT interface targeting
+ * each interface address. Verify that the packet appears on
+ * the appropriate internal address with appropriate translation.
+ */
+ for (i = 0, port = port_list->list; i < port_list->count; i++, port++) {
+ if (port->mac_nat) {
+ continue;
+ }
+ mac_nat_send_response(port_list->list, af, port);
+
+ /* receive the generated traffic */
+ switch_port_list_check_receive(port_list, AF_INET, NULL, 0,
+ validate_mac_nat_in,
+ NULL);
+
+ /* verify that only the single port got the packet */
+ mac_nat_check_received_count(port_list, port);
+ }
+ T_PASS("%s", __func__);
+}
+
+/**
+** interface management
+**/
+
+static int
+ifnet_get_lladdr(int s, const char * ifname, ether_addr_t * eaddr)
+{
+ int err;
+ struct ifreq ifr;
+
+ bzero(&ifr, sizeof(ifr));
+ strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+ ifr.ifr_addr.sa_family = AF_LINK;
+ ifr.ifr_addr.sa_len = ETHER_ADDR_LEN;
+ err = ioctl(s, SIOCGIFLLADDR, &ifr);
+ T_QUIET;
+ T_ASSERT_POSIX_SUCCESS(err, "SIOCGIFLLADDR %s", ifname);
+ bcopy(ifr.ifr_addr.sa_data, eaddr->octet, ETHER_ADDR_LEN);
+ return err;
+}
+
+
+static int
+ifnet_attach_ip(int s, char * name)
+{
+ int err;
+ struct ifreq ifr;
+
+ bzero(&ifr, sizeof(ifr));
+ strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
+ err = ioctl(s, SIOCPROTOATTACH, &ifr);
+ T_QUIET;
+ T_ASSERT_POSIX_SUCCESS(err, "SIOCPROTOATTACH %s", ifr.ifr_name);
+ return err;
+}
+
+#if 0
+static int
+ifnet_detach_ip(int s, char * name)
+{
+ int err;
+ struct ifreq ifr;
+
+ bzero(&ifr, sizeof(ifr));
+ strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
+ err = ioctl(s, SIOCPROTODETACH, &ifr);
+ T_QUIET;
+ T_ASSERT_POSIX_SUCCESS(err, "SIOCPROTODETACH %s", ifr.ifr_name);
+ return err;
+}
+#endif
+
+static int
+ifnet_destroy(int s, const char * ifname, bool fail_on_error)
+{
+ int err;
+ struct ifreq ifr;
+
+ bzero(&ifr, sizeof(ifr));
+ strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+ err = ioctl(s, SIOCIFDESTROY, &ifr);
+ if (fail_on_error) {
+ T_QUIET;
+ T_ASSERT_POSIX_SUCCESS(err, "SIOCSIFDESTROY %s", ifr.ifr_name);
+ }
+ if (err < 0) {
+ T_LOG("SIOCSIFDESTROY %s", ifr.ifr_name);
+ }
+ return err;
+}
+
+static int
+ifnet_set_flags(int s, const char * ifname,
+ uint16_t flags_set, uint16_t flags_clear)
+{
+ uint16_t flags_after;
+ uint16_t flags_before;
+ struct ifreq ifr;
+ int ret;
+
+ bzero(&ifr, sizeof(ifr));
+ strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+ ret = ioctl(s, SIOCGIFFLAGS, (caddr_t)&ifr);
+ if (ret != 0) {
+ T_LOG("SIOCGIFFLAGS %s", ifr.ifr_name);
+ return ret;
+ }
+ flags_before = (uint16_t)ifr.ifr_flags;
+ ifr.ifr_flags |= flags_set;
+ ifr.ifr_flags &= ~(flags_clear);
+ flags_after = (uint16_t)ifr.ifr_flags;
+ if (flags_before == flags_after) {
+ /* nothing to do */
+ ret = 0;
+ } else {
+ /* issue the ioctl */
+ T_QUIET;
+ T_ASSERT_POSIX_SUCCESS(ioctl(s, SIOCSIFFLAGS, &ifr),
+ "SIOCSIFFLAGS %s 0x%x",
+ ifr.ifr_name, (uint16_t)ifr.ifr_flags);
+ if (S_debug) {
+ T_LOG("setflags(%s set 0x%x clear 0x%x) 0x%x => 0x%x",
+ ifr.ifr_name, flags_set, flags_clear,
+ flags_before, flags_after);
+ }
+ }
+ return ret;
+}
+
+#define BRIDGE_NAME "bridge"
+#define BRIDGE200 BRIDGE_NAME "200"
+
+#define FETH_NAME "feth"
+
+/* On some platforms with DEBUG kernel, we need to wait a while */
+#define SIFCREATE_RETRY 600
+
+static int
+ifnet_create(int s, const char * ifname)
+{
+ int error = 0;
+ struct ifreq ifr;
+
+ bzero(&ifr, sizeof(ifr));
+ strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+
+ for (int i = 0; i < SIFCREATE_RETRY; i++) {
+ if (ioctl(s, SIOCIFCREATE, &ifr) < 0) {
+ error = errno;
+ T_LOG("SIOCSIFCREATE %s: %s", ifname,
+ strerror(error));
+ if (error == EBUSY) {
+ /* interface is tearing down, try again */
+ usleep(10000);
+ } else if (error == EEXIST) {
+ /* interface exists, try destroying it */
+ (void)ifnet_destroy(s, ifname, false);
+ } else {
+ /* unexpected failure */
+ break;
+ }
+ } else {
+ error = 0;
+ break;
+ }
+ }
+ if (error == 0) {
+ error = ifnet_set_flags(s, ifname, IFF_UP, 0);
+ }
+ return error;
+}
+
+static int
+siocdrvspec(int s, const char * ifname,
+ u_long op, void *arg, size_t argsize, bool set)
+{
+ struct ifdrv ifd;
+
+ memset(&ifd, 0, sizeof(ifd));
+ strlcpy(ifd.ifd_name, ifname, sizeof(ifd.ifd_name));
+ ifd.ifd_cmd = op;
+ ifd.ifd_len = argsize;
+ ifd.ifd_data = arg;
+ return ioctl(s, set ? SIOCSDRVSPEC : SIOCGDRVSPEC, &ifd);
+}
+
+
+static int
+fake_set_peer(int s, const char * feth, const char * feth_peer)
+{
+ struct if_fake_request iffr;
+ int ret;
+
+ bzero((char *)&iffr, sizeof(iffr));
+ if (feth_peer != NULL) {
+ strlcpy(iffr.iffr_peer_name, feth_peer,
+ sizeof(iffr.iffr_peer_name));
+ }
+ ret = siocdrvspec(s, feth, IF_FAKE_S_CMD_SET_PEER,
+ &iffr, sizeof(iffr), true);
+ T_QUIET;
+ T_ASSERT_POSIX_SUCCESS(ret,
+ "SIOCDRVSPEC(%s, IF_FAKE_S_CMD_SET_PEER, %s)",
+ feth, (feth_peer != NULL) ? feth_peer : "<none>");
+ return ret;
+}
+
+static int
+bridge_add_member(int s, const char * bridge, const char * member)
+{
+ struct ifbreq req;
+ int ret;
+
+ memset(&req, 0, sizeof(req));
+ strlcpy(req.ifbr_ifsname, member, sizeof(req.ifbr_ifsname));
+ ret = siocdrvspec(s, bridge, BRDGADD, &req, sizeof(req), true);
+ T_QUIET;
+ T_ASSERT_POSIX_SUCCESS(ret, "%s %s %s", __func__, bridge, member);
+ return ret;
+}
+
+
+static int
+bridge_set_mac_nat(int s, const char * bridge, const char * member, bool enable)
+{
+ uint32_t flags;
+ bool need_set = false;
+ struct ifbreq req;
+ int ret;
+
+ memset(&req, 0, sizeof(req));
+ strlcpy(req.ifbr_ifsname, member, sizeof(req.ifbr_ifsname));
+ ret = siocdrvspec(s, bridge, BRDGGIFFLGS, &req, sizeof(req), false);
+ T_QUIET;
+ T_ASSERT_POSIX_SUCCESS(ret, "BRDGGIFFLGS %s %s", bridge, member);
+ flags = req.ifbr_ifsflags;
+ if (enable) {
+ if ((flags & IFBIF_MAC_NAT) == 0) {
+ need_set = true;
+ req.ifbr_ifsflags |= IFBIF_MAC_NAT;
+ }
+ /* need to set it */
+ } else if ((flags & IFBIF_MAC_NAT) != 0) {
+ /* need to clear it */
+ need_set = true;
+ req.ifbr_ifsflags &= ~(uint32_t)IFBIF_MAC_NAT;
+ }
+ if (need_set) {
+ ret = siocdrvspec(s, bridge, BRDGSIFFLGS,
+ &req, sizeof(req), true);
+ T_QUIET;
+ T_ASSERT_POSIX_SUCCESS(ret, "BRDGSIFFLGS %s %s 0x%x => 0x%x",
+ bridge, member,
+ flags, req.ifbr_ifsflags);
+ }
+ return ret;
+}
+
+static struct ifbareq *
+bridge_rt_table_copy_common(const char * bridge, u_int * ret_count)
+{
+ struct ifbaconf ifbac;
+ u_int len = 8 * 1024;
+ char * inbuf = NULL;
+ char * ninbuf;
+ int ret;
+ struct ifbareq * rt_table = NULL;
+ int s;
+
+ s = inet_dgram_socket();
+
+ /*
+ * BRDGRTS should work like other ioctl's where passing in NULL
+ * for the buffer says "tell me how many there are". Unfortunately,
+ * it doesn't so we have to pass in a buffer, then check that it
+ * was too big.
+ */
+ for (;;) {
+ ninbuf = realloc(inbuf, len);
+ T_QUIET;
+ T_ASSERT_NOTNULL((void *)ninbuf, "realloc %u", len);
+ ifbac.ifbac_len = len;
+ ifbac.ifbac_buf = inbuf = ninbuf;
+ ret = siocdrvspec(s, bridge, BRDGRTS,
+ &ifbac, sizeof(ifbac), false);
+ T_QUIET;
+ T_ASSERT_POSIX_SUCCESS(ret, "%s %s", __func__, bridge);
+ if ((ifbac.ifbac_len + sizeof(*rt_table)) < len) {
+ /* we passed a buffer larger than what was required */
+ break;
+ }
+ len *= 2;
+ }
+ if (ifbac.ifbac_len == 0) {
+ free(ninbuf);
+ T_LOG("No bridge routing entries");
+ goto done;
+ }
+ *ret_count = ifbac.ifbac_len / sizeof(*rt_table);
+ rt_table = (struct ifbareq *)(void *)ninbuf;
+done:
+ if (rt_table == NULL) {
+ *ret_count = 0;
+ }
+ if (s >= 0) {
+ close(s);
+ }
+ return rt_table;
+}
+
+static struct ifbareq *
+bridge_rt_table_copy(u_int * ret_count)
+{
+ return bridge_rt_table_copy_common(BRIDGE200, ret_count);
+}
+
+static void
+bridge_rt_table_log(struct ifbareq *rt_table, u_int count)
+{
+ u_int i;
+ char ntoabuf[ETHER_NTOA_BUFSIZE];
+ struct ifbareq * ifba;
+
+ for (i = 0, ifba = rt_table; i < count; i++, ifba++) {
+ ether_ntoa_buf((const ether_addr_t *)&ifba->ifba_dst,
+ ntoabuf, sizeof(ntoabuf));
+ T_LOG("%s %s %lu", ifba->ifba_ifsname, ntoabuf,
+ ifba->ifba_expire);
+ }
+ return;
+}
+
+static struct ifbrmne *
+bridge_mac_nat_entries_copy_common(const char * bridge, u_int * ret_count)
+{
+ char * buf = NULL;
+ u_int count = 0;
+ int err;
+ u_int i;
+ struct ifbrmnelist mnl;
+ struct ifbrmne * ret_list = NULL;
+ int s;
+ char * scan;
+
+
+ s = inet_dgram_socket();
+
+ /* find out how many there are */
+ bzero(&mnl, sizeof(mnl));
+ err = siocdrvspec(s, bridge, BRDGGMACNATLIST, &mnl, sizeof(mnl), false);
+ if (err != 0 && S_cleaning_up) {
+ T_LOG("BRDGGMACNATLIST %s failed %d", bridge, errno);
+ goto done;
+ }
+ T_QUIET;
+ T_ASSERT_POSIX_SUCCESS(err, "BRDGGMACNATLIST %s", bridge);
+ T_QUIET;
+ T_ASSERT_GE(mnl.ifbml_elsize, (uint16_t)sizeof(struct ifbrmne),
+ "mac nat entry size %u minsize %u",
+ mnl.ifbml_elsize, (u_int)sizeof(struct ifbrmne));
+ if (mnl.ifbml_len == 0) {
+ goto done;
+ }
+
+ /* call again with a buffer large enough to hold them */
+ buf = malloc(mnl.ifbml_len);
+ T_QUIET;
+ T_ASSERT_NOTNULL(buf, "mac nat entries buffer");
+ mnl.ifbml_buf = buf;
+ err = siocdrvspec(s, bridge, BRDGGMACNATLIST, &mnl, sizeof(mnl), false);
+ T_QUIET;
+ T_ASSERT_POSIX_SUCCESS(err, "BRDGGMACNATLIST %s", bridge);
+ count = mnl.ifbml_len / mnl.ifbml_elsize;
+ if (count == 0) {
+ goto done;
+ }
+ if (mnl.ifbml_elsize == sizeof(struct ifbrmne)) {
+ /* element size is expected size, no need to "right-size" it */
+ ret_list = (struct ifbrmne *)(void *)buf;
+ buf = NULL;
+ goto done;
+ }
+ /* element size is larger than we expect, create a "right-sized" array */
+ ret_list = malloc(count * sizeof(*ret_list));
+ T_QUIET;
+ T_ASSERT_NOTNULL(ret_list, "mac nat entries list");
+ for (i = 0, scan = buf; i < count; i++, scan += mnl.ifbml_elsize) {
+ struct ifbrmne * ifbmne;
+
+ ifbmne = (struct ifbrmne *)(void *)scan;
+ ret_list[i] = *ifbmne;
+ }
+done:
+ if (s >= 0) {
+ close(s);
+ }
+ if (buf != NULL) {
+ free(buf);
+ }
+ *ret_count = count;
+ return ret_list;
+}
+
+static struct ifbrmne *
+bridge_mac_nat_entries_copy(u_int * ret_count)
+{
+ return bridge_mac_nat_entries_copy_common(BRIDGE200, ret_count);
+}
+
+static void
+bridge_mac_nat_entries_log(struct ifbrmne * entries, u_int count)
+{
+ u_int i;
+ char ntoabuf[ETHER_NTOA_BUFSIZE];
+ char ntopbuf[INET6_ADDRSTRLEN];
+ struct ifbrmne * scan;
+
+ for (i = 0, scan = entries; i < count; i++, scan++) {
+ ether_ntoa_buf((const ether_addr_t *)&scan->ifbmne_mac,
+ ntoabuf, sizeof(ntoabuf));
+ inet_ntop(scan->ifbmne_af, &scan->ifbmne_ip,
+ ntopbuf, sizeof(ntopbuf));
+ printf("%s %s %s %lu\n",
+ scan->ifbmne_ifname, ntopbuf, ntoabuf,
+ (unsigned long)scan->ifbmne_expire);
+ }
+ return;
+}
+
+/**
+** Test Main
+**/
+static u_int S_n_ports;
+static switch_port_list_t S_port_list;
+
+static void
+bridge_cleanup(const char * bridge, u_int n_ports, bool fail_on_error);
+
+static void
+cleanup_common(bool dump_table)
+{
+ if (S_n_ports == 0) {
+ return;
+ }
+ S_cleaning_up = true;
+ if ((S_port_list != NULL && S_port_list->mac_nat)
+ || (dump_table && S_port_list != NULL)) {
+ switch_port_list_log(S_port_list);
+ if (S_port_list->mac_nat) {
+ switch_port_list_verify_mac_nat(S_port_list, true);
+ }
+ (void)switch_port_list_verify_rt_table(S_port_list, true);
+ }
+ if (S_debug) {
+ T_LOG("sleeping for 5 seconds\n");
+ sleep(5);
+ }
+ bridge_cleanup(BRIDGE200, S_n_ports, false);
+ return;
+}
+
+static void
+cleanup(void)
+{
+ cleanup_common(true);
+ return;
+}
+
+static void
+sigint_handler(__unused int sig)
+{
+ cleanup_common(false);
+ signal(SIGINT, SIG_DFL);
+}
+
+static switch_port_list_t
+bridge_setup(char * bridge, u_int n_ports, u_int num_addrs, bool mac_nat)
+{
+ errno_t err;
+ switch_port_list_t list = NULL;
+ int s;
+
+ S_n_ports = n_ports;
+ T_ATEND(cleanup);
+ T_SETUPBEGIN;
+ s = inet_dgram_socket();
+ err = ifnet_create(s, bridge);
+ if (err != 0) {
+ goto done;
+ }
+ list = switch_port_list_alloc(n_ports, mac_nat);
+ for (u_int i = 0; i < n_ports; i++) {
+ bool do_mac_nat;
+ char ifname[IFNAMSIZ];
+ char member_ifname[IFNAMSIZ];
+ ether_addr_t member_mac;
+
+ snprintf(ifname, sizeof(ifname), "%s%d",
+ FETH_NAME, i);
+ snprintf(member_ifname, sizeof(member_ifname), "%s%d",
+ FETH_NAME, i + n_ports);
+ err = ifnet_create(s, ifname);
+ if (err != 0) {
+ goto done;
+ }
+ ifnet_attach_ip(s, ifname);
+ err = ifnet_create(s, member_ifname);
+ if (err != 0) {
+ goto done;
+ }
+ err = ifnet_get_lladdr(s, member_ifname, &member_mac);
+ if (err != 0) {
+ goto done;
+ }
+ err = fake_set_peer(s, ifname, member_ifname);
+ if (err != 0) {
+ goto done;
+ }
+ /* add the interface's peer to the bridge */
+ err = bridge_add_member(s, bridge, member_ifname);
+ if (err != 0) {
+ goto done;
+ }
+
+ do_mac_nat = (i == 0 && mac_nat);
+ if (do_mac_nat) {
+ /* enable MAC NAT on unit 0 */
+ err = bridge_set_mac_nat(s, bridge, member_ifname,
+ true);
+ if (err != 0) {
+ goto done;
+ }
+ }
+ /* we'll send/receive on the interface */
+ err = switch_port_list_add_port(list, i, ifname, member_ifname,
+ &member_mac, num_addrs,
+ do_mac_nat);
+ if (err != 0) {
+ goto done;
+ }
+ }
+done:
+ if (s >= 0) {
+ close(s);
+ }
+ if (err != 0 && list != NULL) {
+ switch_port_list_dealloc(list);
+ list = NULL;
+ }
+ T_SETUPEND;
+ return list;
+}
+
+static void
+bridge_cleanup(const char * bridge, u_int n_ports, bool fail_on_error)
+{
+ int s;
+
+ s = inet_dgram_socket();
+ ifnet_destroy(s, bridge, fail_on_error);
+ for (u_int i = 0; i < n_ports; i++) {
+ char ifname[IFNAMSIZ];
+ char member_ifname[IFNAMSIZ];
+
+ snprintf(ifname, sizeof(ifname), "%s%d",
+ FETH_NAME, i);
+ snprintf(member_ifname, sizeof(member_ifname), "%s%d",
+ FETH_NAME, i + n_ports);
+ ifnet_destroy(s, ifname, fail_on_error);
+ ifnet_destroy(s, member_ifname, fail_on_error);
+ }
+ if (s >= 0) {
+ close(s);
+ }
+ S_n_ports = 0;
+ return;
+}
+
+/*
+ * Basic Bridge Tests
+ *
+ * Broadcast
+ * - two cases: actual broadcast, unknown ethernet
+ * - send broadcast packets
+ * - verify all received
+ * - check bridge rt list contains all expected MAC addresses
+ * - send unicast ARP packets
+ * - verify packets received only on expected port
+ *
+ * MAC-NAT
+ * - verify ARP translation
+ * - verify IPv4 translation
+ * - verify DHCP broadcast bit conversion
+ * - verify IPv6 translation
+ * - verify ND6 translation (Neighbor, Router)
+ */
+
+static void
+bridge_test(packet_validator_t validator,
+ void * context,
+ const ether_addr_t * dst_eaddr,
+ uint8_t af, u_int n_ports, u_int num_addrs)
+{
+#if TARGET_OS_BRIDGE
+ T_SKIP("Test uses too much memory");
+#else /* TARGET_OS_BRIDGE */
+ switch_port_list_t port_list;
+
+ signal(SIGINT, sigint_handler);
+ port_list = bridge_setup(BRIDGE200, n_ports, num_addrs, false);
+ if (port_list == NULL) {
+ T_FAIL("bridge_setup");
+ return;
+ }
+ S_port_list = port_list;
+ bridge_learning_test(port_list, af, validator, context, dst_eaddr);
+
+ //T_LOG("Sleeping for 5 seconds");
+ //sleep(5);
+ bridge_cleanup(BRIDGE200, n_ports, true);
+ switch_port_list_dealloc(port_list);
+ return;
+#endif /* TARGET_OS_BRIDGE */
+}
+
+static void
+bridge_test_mac_nat_ipv4(u_int n_ports, u_int num_addrs)
+{
+#if TARGET_OS_BRIDGE
+ T_SKIP("Test uses too much memory");
+#else /* TARGET_OS_BRIDGE */
+ switch_port_list_t port_list;
+
+ signal(SIGINT, sigint_handler);
+ port_list = bridge_setup(BRIDGE200, n_ports, num_addrs, true);
+ if (port_list == NULL) {
+ T_FAIL("bridge_setup");
+ return;
+ }
+ S_port_list = port_list;
+
+ /* verify that IPv4 packets get translated when necessary */
+ mac_nat_test_ip(port_list, AF_INET);
+
+ /* verify the DHCP broadcast bit gets set appropriately */
+ mac_nat_test_dhcp(port_list);
+
+ /* verify that ARP packet gets translated when necessary */
+ mac_nat_test_arp_out(port_list);
+ mac_nat_test_arp_in(port_list);
+
+ if (S_debug) {
+ T_LOG("Sleeping for 5 seconds");
+ sleep(5);
+ }
+ bridge_cleanup(BRIDGE200, n_ports, true);
+ switch_port_list_dealloc(port_list);
+ return;
+#endif /* TARGET_OS_BRIDGE */
+}
+
+static void
+bridge_test_mac_nat_ipv6(u_int n_ports, u_int num_addrs)
+{
+#if TARGET_OS_BRIDGE
+ T_SKIP("Test uses too much memory");
+#else /* TARGET_OS_BRIDGE */
+ switch_port_list_t port_list;
+
+ signal(SIGINT, sigint_handler);
+ port_list = bridge_setup(BRIDGE200, n_ports, num_addrs, true);
+ if (port_list == NULL) {
+ T_FAIL("bridge_setup");
+ return;
+ }
+ S_port_list = port_list;
+
+ /* verify that IPv6 packets get translated when necessary */
+ mac_nat_test_ip(port_list, AF_INET6);
+
+ /* verify that ND6 packet gets translated when necessary */
+ mac_nat_test_nd6_out(port_list);
+ if (S_debug) {
+ T_LOG("Sleeping for 5 seconds");
+ sleep(5);
+ }
+ bridge_cleanup(BRIDGE200, n_ports, true);
+ switch_port_list_dealloc(port_list);
+ return;
+#endif /* TARGET_OS_BRIDGE */
+}
+
+static void
+system_cmd(const char *cmd, bool fail_on_error)
+{
+ pid_t pid = -1;
+ int exit_status = 0;
+ const char *argv[] = {
+ "/usr/local/bin/bash",
+ "-c",
+ cmd,
+ NULL
+ };
+
+ int rc = dt_launch_tool(&pid, (char **)(void *)argv, false, NULL, NULL);
+ T_QUIET;
+ T_ASSERT_EQ(rc, 0, "dt_launch_tool(%s) failed", cmd);
+
+ if (dt_waitpid(pid, &exit_status, NULL, 30)) {
+ T_QUIET;
+ T_ASSERT_MACH_SUCCESS(exit_status, "command(%s)", cmd);
+ } else {
+ if (fail_on_error) {
+ T_FAIL("dt_waitpid(%s) failed", cmd);
+ }
+ }
+}
+
+static void
+cleanup_pf(void)
+{
+ struct ifbrparam param;
+ int s = inet_dgram_socket();
+
+ system_cmd("pfctl -d", false);
+ system_cmd("pfctl -F all", false);
+
+ param.ifbrp_filter = 0;
+ siocdrvspec(s, BRIDGE200, BRDGSFILT,
+ ¶m, sizeof(param), true);
+ return;
+}
+
+static void
+block_all_traffic(bool input, const char* infname1, const char* infname2)
+{
+ int s = inet_dgram_socket();
+ int ret;
+ struct ifbrparam param;
+ char command[512];
+ char *dir = input ? "in" : "out";
+
+ snprintf(command, sizeof(command), "echo \"block %s on %s all\nblock %s on %s all\n\" | pfctl -vvv -f -",
+ dir, infname1, dir, infname2);
+ /* enable block all filter */
+ param.ifbrp_filter = IFBF_FILT_MEMBER | IFBF_FILT_ONLYIP;
+ ret = siocdrvspec(s, BRIDGE200, BRDGSFILT,
+ ¶m, sizeof(param), true);
+ T_ASSERT_POSIX_SUCCESS(ret,
+ "SIOCDRVSPEC(BRDGSFILT %s, 0x%x)",
+ BRIDGE200, param.ifbrp_filter);
+ // ignore errors such that not having pf.os doesn't raise any issues
+ system_cmd(command, false);
+ system_cmd("pfctl -e", true);
+ system_cmd("pfctl -s all", true);
+}
+
+/*
+ * Basic bridge filter test
+ *
+ * For both broadcast and unicast transfers ensure that data can
+ * be blocked using pf on the bridge
+ */
+
+static void
+filter_test(uint8_t af)
+{
+#if TARGET_OS_BRIDGE
+ T_SKIP("pfctl isn't valid on this platform");
+#else /* TARGET_OS_BRIDGE */
+ switch_port_list_t port_list;
+ switch_port_t port;
+ const u_int n_ports = 2;
+ u_int num_addrs = 1;
+ u_int i;
+ char ntoabuf[ETHER_NTOA_BUFSIZE];
+ union ifbrip dst_ip;
+ bool blocked = true;
+ bool input = true;
+ const char* ifnames[2];
+
+ signal(SIGINT, sigint_handler);
+
+ T_ATEND(cleanup);
+ T_ATEND(cleanup_pf);
+
+ port_list = bridge_setup(BRIDGE200, n_ports, num_addrs, false);
+ if (port_list == NULL) {
+ T_FAIL("bridge_setup");
+ return;
+ }
+
+ ether_ntoa_buf(ðer_broadcast, ntoabuf, sizeof(ntoabuf));
+
+ S_port_list = port_list;
+ for (i = 0, port = port_list->list; i < port_list->count; i++, port++) {
+ ifnames[i] = port->member_ifname;
+ }
+
+ get_broadcast_ip_address(af, &dst_ip);
+ do {
+ do {
+ if (blocked) {
+ block_all_traffic(input, ifnames[0], ifnames[1]);
+ }
+ for (i = 0, port = port_list->list; i < port_list->count; i++, port++) {
+ if (S_debug) {
+ T_LOG("Sending on %s", port->ifname);
+ }
+ for (u_int j = 0; j < port->num_addrs; j++) {
+ uint32_t generation;
+
+ generation = next_generation();
+ send_generation(port,
+ af,
+ j,
+ ðer_broadcast,
+ &dst_ip,
+ generation);
+
+ /* receive across all ports */
+ check_receive_generation(port_list,
+ af,
+ generation,
+ validate_broadcast_dhost,
+ NULL);
+
+ /* ensure that every port saw the right amount of packets*/
+ if (blocked) {
+ check_received_count(port_list, port, 0);
+ } else {
+ check_received_count(port_list, port, 1);
+ }
+ }
+ }
+ T_PASS("%s broadcast %s %s", __func__, blocked ? "blocked" : "not blocked", input ? "input" : "output");
+ input = !input;
+ cleanup_pf();
+ } while (input == false && blocked);
+ blocked = !blocked;
+ } while (blocked == false);
+
+ do {
+ do {
+ if (blocked) {
+ block_all_traffic(input, ifnames[0], ifnames[1]);
+ }
+ for (i = 0, port = port_list->list; i < port_list->count; i++, port++) {
+ /* send unicast packets to every other port's MAC addresses */
+ unicast_send_all(port_list, af, port);
+
+ /* receive all of that generated traffic */
+ switch_port_list_check_receive(port_list, af, NULL, 0,
+ validate_port_dhost, NULL);
+
+ /* ensure that every port saw the right amount of packets*/
+ if (blocked) {
+ check_received_count(port_list, port, 0);
+ } else {
+ check_received_count(port_list, port, 1);
+ }
+ }
+ T_PASS("%s unicast %s %s", __func__, blocked ? "blocked" : "not blocked", input ? "input" : "output");
+ input = !input;
+ cleanup_pf();
+ } while (input == false && blocked);
+ blocked = !blocked;
+ } while (blocked == false);
+
+ bridge_cleanup(BRIDGE200, n_ports, true);
+ switch_port_list_dealloc(port_list);
+ return;
+#endif /* TARGET_OS_BRIDGE */
+}
+
+T_DECL(if_bridge_bcast,
+ "bridge broadcast IPv4",
+ T_META_ASROOT(true))
+{
+ bridge_test(validate_broadcast_dhost, NULL, ðer_broadcast,
+ AF_INET, 5, 1);
+}
+
+T_DECL(if_bridge_bcast_many,
+ "bridge broadcast many IPv4",
+ T_META_ASROOT(true))
+{
+ bridge_test(validate_broadcast_dhost, NULL, ðer_broadcast,
+ AF_INET, 5, 20);
+}
+
+T_DECL(if_bridge_unknown,
+ "bridge unknown host IPv4",
+ T_META_ASROOT(true))
+{
+ bridge_test(validate_not_present_dhost, NULL, ðer_external,
+ AF_INET, 5, 1);
+}
+
+T_DECL(if_bridge_bcast_v6,
+ "bridge broadcast IPv6",
+ T_META_ASROOT(true))
+{
+ bridge_test(validate_broadcast_dhost, NULL, ðer_broadcast,
+ AF_INET6, 5, 1);
+}
+
+T_DECL(if_bridge_bcast_many_v6,
+ "bridge broadcast many IPv6",
+ T_META_ASROOT(true))
+{
+ bridge_test(validate_broadcast_dhost, NULL, ðer_broadcast,
+ AF_INET6, 5, 20);
+}
+
+T_DECL(if_bridge_unknown_v6,
+ "bridge unknown host IPv6",
+ T_META_ASROOT(true))
+{
+ bridge_test(validate_not_present_dhost, NULL, ðer_external,
+ AF_INET6, 5, 1);
+}
+
+T_DECL(if_bridge_mac_nat_ipv4,
+ "bridge mac nat ipv4",
+ T_META_ASROOT(true))
+{
+ bridge_test_mac_nat_ipv4(5, 10);
+}
+
+T_DECL(if_bridge_mac_nat_ipv6,
+ "bridge mac nat ipv6",
+ T_META_ASROOT(true))
+{
+ bridge_test_mac_nat_ipv6(5, 10);
+}
+
+T_DECL(if_bridge_filter_ipv4,
+ "bridge filter ipv4",
+ T_META_ASROOT(true))
+{
+ filter_test(AF_INET);
+}
+
+T_DECL(if_bridge_filter_ipv6,
+ "bridge filter ipv6",
+ T_META_ASROOT(true))
+{
+ filter_test(AF_INET6);
+}
--- /dev/null
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+
+#include <pthread.h>
+#include <sys/kern_control.h>
+#include <sys/kern_event.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/sys_domain.h>
+#include <unistd.h>
+#include <uuid/uuid.h>
+
+#include <darwintest.h>
+
+static int finished = 0;
+
+#ifndef KEV_NETAGENT_SUBCLASS
+#define KEV_NETAGENT_SUBCLASS 9
+#endif
+
+#ifndef NETAGENT_MESSAGE_TYPE_REGISTER
+#define NETAGENT_MESSAGE_TYPE_REGISTER 1
+#endif
+
+#ifndef NETAGENT_MESSAGE_TYPE_UNREGISTER
+#define NETAGENT_MESSAGE_TYPE_UNREGISTER 2
+#endif
+
+struct netagent_message_header {
+ uint8_t message_type;
+ uint8_t message_flags;
+ uint32_t message_id;
+ uint32_t message_error;
+ uint32_t message_payload_length;
+};
+
+struct kev_msg {
+ uint32_t total_size;
+ uint32_t vendor_code;
+ uint32_t kev_class;
+ uint32_t kev_subclass;
+ uint32_t id;
+ uint32_t event_code;
+};
+
+struct kev_netagent_data {
+ uuid_t netagent_uuid;
+};
+
+struct netagent {
+ uuid_t netagent_uuid;
+ char netagent_domain[32];
+ char netagent_type[32];
+ char netagent_desc[128];
+ uint32_t netagent_flags;
+ uint32_t netagent_data_size;
+ uint8_t netagent_data[0];
+};
+
+static void *
+register_sockopt_racer(void *data)
+{
+ int s = *(int *)data;
+ struct {
+ struct netagent_message_header header;
+ struct netagent netagent;
+ } msg;
+
+ bzero(&msg, sizeof(msg));
+ msg.header.message_type = NETAGENT_MESSAGE_TYPE_REGISTER;
+ msg.header.message_payload_length = sizeof(struct netagent);
+
+ while (!finished) {
+ send(s, &msg, sizeof(msg), 0);
+ }
+
+ return NULL;
+}
+
+static void *
+register_message_racer(void *data)
+{
+ int s = *(int *)data;
+ struct netagent netagent;
+
+ bzero(&netagent, sizeof(netagent));
+ while (!finished) {
+ setsockopt(s, SYSPROTO_CONTROL, NETAGENT_MESSAGE_TYPE_REGISTER, &netagent, sizeof(netagent));
+ }
+
+ return NULL;
+}
+
+#define SIZEOF_STRUCT_NETAGENT_WRAPPER 280
+
+static void *
+unregister_racer(void *data)
+{
+ int s = *(int *)data;
+ uint8_t spraybuf[SIZEOF_STRUCT_NETAGENT_WRAPPER];
+
+ memset(spraybuf, 0x41, sizeof(spraybuf));
+
+ while (!finished) {
+ setsockopt(s, SYSPROTO_CONTROL, NETAGENT_MESSAGE_TYPE_UNREGISTER, NULL, 0);
+ ioctl(-1, _IOW('x', 0, spraybuf), spraybuf);
+ }
+
+ return NULL;
+}
+
+#define NITERS 200000
+
+T_DECL(netagent_race_infodisc_56244905, "Netagent race between register and post event.")
+{
+ int s;
+ int evsock;
+ pthread_t reg_th;
+ pthread_t unreg_th;
+ struct kev_request kev_req = {
+ .vendor_code = KEV_VENDOR_APPLE,
+ .kev_class = KEV_NETWORK_CLASS,
+ .kev_subclass = KEV_NETAGENT_SUBCLASS
+ };
+ struct ctl_info ci;
+ struct sockaddr_ctl sc;
+ struct {
+ struct kev_msg msg;
+ struct kev_netagent_data nd;
+ } ev;
+ int n;
+
+ T_SETUPBEGIN;
+ /* set up the event socket so we can receive notifications: */
+ T_ASSERT_POSIX_SUCCESS(evsock = socket(AF_SYSTEM, SOCK_RAW, SYSPROTO_EVENT), NULL);
+ T_ASSERT_POSIX_SUCCESS(ioctl(evsock, SIOCSKEVFILT, &kev_req), NULL);
+
+ /* this is the socket we'll race on: */
+ T_ASSERT_POSIX_SUCCESS(s = socket(AF_SYSTEM, SOCK_DGRAM, SYSPROTO_CONTROL), NULL);
+
+ /* connect to netagent: */
+ bzero(&ci, sizeof(ci));
+ strcpy(ci.ctl_name, "com.apple.net.netagent");
+ T_ASSERT_POSIX_SUCCESS(ioctl(s, CTLIOCGINFO, &ci), NULL);
+
+ bzero(&sc, sizeof(sc));
+ sc.sc_id = ci.ctl_id;
+
+ T_ASSERT_POSIX_SUCCESS(connect(s, (const struct sockaddr *)&sc, sizeof(sc)), NULL);
+ T_SETUPEND;
+
+ /* variant 1: */
+ /* spin off the racer threads: */
+ T_ASSERT_POSIX_ZERO(pthread_create(®_th, NULL, register_message_racer, &s), NULL);
+ T_ASSERT_POSIX_ZERO(pthread_create(&unreg_th, NULL, unregister_racer, &s), NULL);
+
+ /* keep going until we're done: */
+ for (n = 0; n < NITERS; ++n) {
+ bzero(&ev, sizeof(ev));
+ T_ASSERT_POSIX_SUCCESS(recv(evsock, &ev, sizeof(ev), 0), NULL);
+
+ if (ev.nd.netagent_uuid[0] != 0) {
+ finished = 1;
+ T_ASSERT_FAIL("netagent register event leaked data: 0x%08lx", *(unsigned long *)ev.nd.netagent_uuid);
+ }
+ }
+
+ finished = 1;
+
+ T_ASSERT_POSIX_ZERO(pthread_join(reg_th, NULL), NULL);
+ T_ASSERT_POSIX_ZERO(pthread_join(unreg_th, NULL), NULL);
+
+ finished = 0;
+
+ /* variant 2: */
+ /* spin off the racer threads: */
+ T_ASSERT_POSIX_ZERO(pthread_create(®_th, NULL, register_sockopt_racer, &s), NULL);
+ T_ASSERT_POSIX_ZERO(pthread_create(&unreg_th, NULL, unregister_racer, &s), NULL);
+
+ /* keep going until we're done: */
+ for (n = 0; n < NITERS; ++n) {
+ bzero(&ev, sizeof(ev));
+ T_ASSERT_POSIX_SUCCESS(recv(evsock, &ev, sizeof(ev), 0), NULL);
+
+ if (ev.nd.netagent_uuid[0] != 0) {
+ finished = 1;
+ T_ASSERT_FAIL("netagent register event leaked data: 0x%08lx", *(unsigned long *)ev.nd.netagent_uuid);
+ }
+ }
+
+ finished = 1;
+
+ T_ASSERT_POSIX_ZERO(pthread_join(reg_th, NULL), NULL);
+ T_ASSERT_POSIX_ZERO(pthread_join(unreg_th, NULL), NULL);
+}
--- /dev/null
+#include <darwintest.h>
+#include <arpa/inet.h>
+#include <sys/types.h>
+#include <sys/event.h>
+#include <sys/time.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+
+T_GLOBAL_META(T_META_RUN_CONCURRENTLY(true));
+
+#define TEST_ADDR "127.0.0.1"
+#define TEST_PORT 4242
+
+static struct {
+ int fd;
+ struct sockaddr_in addr;
+} server;
+
+static void
+server_listen(void)
+{
+ int r;
+
+ server.fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
+ T_ASSERT_POSIX_SUCCESS(server.fd, "socket");
+
+ memset(&server.addr, 0, sizeof(server.addr));
+ server.addr.sin_family = AF_INET;
+ server.addr.sin_port = htons(TEST_PORT);
+
+ inet_pton(AF_INET, TEST_ADDR, &server.addr.sin_addr);
+
+ r = bind(server.fd, (struct sockaddr*) &server.addr, sizeof(server.addr));
+ T_ASSERT_POSIX_SUCCESS(r, "bind");
+}
+
+static void
+send_message(void)
+{
+ int fd;
+ struct msghdr msg;
+ struct iovec iov;
+
+ fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
+ T_ASSERT_POSIX_SUCCESS(fd, "socket");
+
+ memset(&msg, 0, sizeof(msg));
+
+ msg.msg_name = &server.addr;
+ msg.msg_namelen = sizeof(server.addr);
+
+ iov.iov_base = "";
+ iov.iov_len = 0;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ ssize_t r = sendmsg(fd, &msg, 0);
+ T_ASSERT_EQ(r, (ssize_t)iov.iov_len, "sendmsg");
+
+ close(fd);
+}
+
+static void
+server_poll(void)
+{
+ int kq;
+ struct kevent event = {
+ .flags = EV_ADD,
+ .filter = EVFILT_READ,
+ .ident = (unsigned long)server.fd,
+ };
+ int r;
+
+ kq = kqueue();
+ T_ASSERT_POSIX_SUCCESS(kq, "kqueue");
+
+ /* Add and poll */
+ r = kevent(kq, &event, 1, &event, 1, NULL);
+ T_EXPECT_EQ(r, 1, "should return an event");
+
+ close(kq);
+}
+
+T_DECL(socket_0byte_udp_poll_58140856,
+ "Tests that 0-sized UDP packets wake up kevent")
+{
+ T_LOG("Starting...\n");
+
+ /* Listen on UDP port */
+ server_listen();
+
+ T_LOG("Server bound to [%s]:%d\n", TEST_ADDR, TEST_PORT);
+
+ /* Send 0-UDP packet to that port */
+ send_message();
+
+ T_LOG("Sent message to server\n");
+
+ /* Poll kqueue events */
+ server_poll();
+
+ T_LOG("Got kqueue event\n");
+
+ close(server.fd);
+}
#if !TARGET_OS_OSX
/* allow us to be frozen */
freeze_state = memorystatus_control(MEMORYSTATUS_CMD_GET_PROCESS_IS_FREEZABLE, pid, 0, NULL, 0);
- if (freeze_state == -1) {
- T_SKIP("This device doesn't have CONFIG_FREEZE enabled.");
- } else if (freeze_state == 0) {
+ if (freeze_state == 0) {
T_LOG("CHILD was found to be UNFREEZABLE, enabling freezing.");
memorystatus_control(MEMORYSTATUS_CMD_SET_PROCESS_IS_FREEZABLE, pid, 1, NULL, 0);
freeze_state = memorystatus_control(MEMORYSTATUS_CMD_GET_PROCESS_IS_FREEZABLE, pid, 0, NULL, 0);
T_LOG("parent pid: %d\n", getpid());
T_QUIET; T_ASSERT_POSIX_ZERO(_NSGetExecutablePath(path, &path_size), "_NSGetExecutablePath");
+ /* check if we can run the child successfully */
+#if !TARGET_OS_OSX
+ int freeze_state = memorystatus_control(MEMORYSTATUS_CMD_GET_PROCESS_IS_FREEZABLE, getpid(), 0, NULL, 0);
+ if (freeze_state == -1) {
+ T_SKIP("This device doesn't have CONFIG_FREEZE enabled.");
+ }
+#endif
+
/* setup signal handling */
signal(SIGUSR1, SIG_IGN);
child_sig_src = dispatch_source_create(DISPATCH_SOURCE_TYPE_SIGNAL, SIGUSR1, 0, dq);
T_ATEND(kill_children);
/* wait until the child has recursed enough */
- dispatch_semaphore_wait(child_done_sema, DISPATCH_TIME_FOREVER);
+ dispatch_semaphore_wait(child_done_sema, dispatch_time(DISPATCH_TIME_NOW, 10 /*seconds*/ * 1000000000ULL));
T_LOG("child finished, parent executing");
#include <darwintest_utils.h>
#include <kern/debug.h>
#include <kern/kern_cdata.h>
+#include <kern/block_hint.h>
#include <kdd.h>
#include <libproc.h>
#include <mach-o/dyld.h>
#define PARSE_STACKSHOT_SHAREDCACHE_LAYOUT 0x04
#define PARSE_STACKSHOT_DISPATCH_QUEUE_LABEL 0x08
#define PARSE_STACKSHOT_TURNSTILEINFO 0x10
+#define PARSE_STACKSHOT_WAITINFO_CSEG 0x40
+
+static uint64_t cseg_expected_threadid = 0;
#define TEST_STACKSHOT_QUEUE_LABEL "houston.we.had.a.problem"
#define TEST_STACKSHOT_QUEUE_LABEL_LENGTH sizeof(TEST_STACKSHOT_QUEUE_LABEL)
});
}
+T_DECL(cseg_waitinfo, "test that threads stuck in the compressor report correct waitinfo")
+{
+ int val = 1;
+ struct scenario scenario = {
+ .name = "cseg_waitinfo",
+ .quiet = false,
+ .flags = (STACKSHOT_THREAD_WAITINFO | STACKSHOT_KCDATA_FORMAT),
+ };
+
+ dispatch_queue_t dq = dispatch_queue_create("com.apple.stackshot.cseg_waitinfo", NULL);
+ dispatch_semaphore_t child_ok = dispatch_semaphore_create(0);
+
+ dispatch_async(dq, ^{
+ pthread_threadid_np(NULL, &cseg_expected_threadid);
+ dispatch_semaphore_signal(child_ok);
+ T_ASSERT_POSIX_SUCCESS(sysctlbyname("kern.cseg_wedge_thread", NULL, NULL, &val, sizeof(val)), "wedge child thread");
+ });
+
+ dispatch_semaphore_wait(child_ok, DISPATCH_TIME_FOREVER);
+ sleep(1);
+
+ T_LOG("taking stackshot");
+ take_stackshot(&scenario, ^(void *ssbuf, size_t sslen) {
+ T_ASSERT_POSIX_SUCCESS(sysctlbyname("kern.cseg_unwedge_thread", NULL, NULL, &val, sizeof(val)), "unwedge child thread");
+ parse_stackshot(PARSE_STACKSHOT_WAITINFO_CSEG, ssbuf, sslen, -1);
+ });
+}
+
#pragma mark performance tests
#define SHOULD_REUSE_SIZE_HINT 0x01
{
bool delta = (stackshot_parsing_flags & PARSE_STACKSHOT_DELTA);
bool expect_zombie_child = (stackshot_parsing_flags & PARSE_STACKSHOT_ZOMBIE);
+ bool expect_cseg_waitinfo = (stackshot_parsing_flags & PARSE_STACKSHOT_WAITINFO_CSEG);
bool expect_shared_cache_layout = false;
bool expect_shared_cache_uuid = !delta;
bool expect_dispatch_queue_label = (stackshot_parsing_flags & PARSE_STACKSHOT_DISPATCH_QUEUE_LABEL);
bool expect_turnstile_lock = (stackshot_parsing_flags & PARSE_STACKSHOT_TURNSTILEINFO);
bool found_zombie_child = false, found_shared_cache_layout = false, found_shared_cache_uuid = false;
bool found_dispatch_queue_label = false, found_turnstile_lock = false;
+ bool found_cseg_waitinfo = false;
if (expect_shared_cache_uuid) {
uuid_t shared_cache_uuid;
}
}
+ if (expect_cseg_waitinfo) {
+ NSArray *winfos = container[@"task_snapshots"][@"thread_waitinfo"];
+
+ for (id i in winfos) {
+ if ([i[@"wait_type"] intValue] == kThreadWaitCompressor && [i[@"owner"] intValue] == cseg_expected_threadid) {
+ found_cseg_waitinfo = true;
+ break;
+ }
+ }
+ }
+
int pid = [container[@"task_snapshots"][@"task_snapshot"][@"ts_pid"] intValue];
if (expect_zombie_child && (pid == child_pid)) {
found_zombie_child = true;
T_QUIET; T_ASSERT_TRUE(found_turnstile_lock, "found expected deadlock");
}
+ if (expect_cseg_waitinfo) {
+ T_QUIET; T_ASSERT_TRUE(found_cseg_waitinfo, "found c_seg waitinfo");
+ }
+
T_ASSERT_FALSE(KCDATA_ITER_FOREACH_FAILED(iter), "successfully iterated kcdata");
}
--- /dev/null
+#include <mach/mach.h>
+
+#include <bootstrap.h>
+#include <darwintest.h>
+#include <darwintest_multiprocess.h>
+#include <spawn.h>
+#include <unistd.h>
+
+#if defined(UNENTITLED)
+
+/*
+ * Creating an suid credential should fail without an entitlement.
+ */
+T_DECL(task_create_suid_cred_unentitled, "task_create_suid_cred (no entitlment)", T_META_ASROOT(true))
+{
+ kern_return_t ret = KERN_FAILURE;
+ suid_cred_t sc = SUID_CRED_NULL;
+
+ ret = task_create_suid_cred(mach_task_self(), "/usr/bin/id", 0, &sc);
+ T_ASSERT_MACH_ERROR(ret, KERN_NO_ACCESS, "create a new suid cred for id (no entitlement)");
+}
+
+#else /* ENTITLED */
+
+extern char **environ;
+static const char *server_name = "com.apple.xnu.test.task_create_suid_cred";
+
+/*
+ * This is a positive test case which spawns /usr/bin/id with a properly created
+ * suid credential and verifies that it correctly produces "euid=0"
+ * Not running as root.
+ */
+static void
+test_id_cred(suid_cred_t sc_id)
+{
+ posix_spawnattr_t attr;
+ posix_spawn_file_actions_t file_actions;
+ pid_t pid = -1;
+ int status = -1;
+ char template[] = "/tmp/suid_cred.XXXXXX";
+ char *path = NULL;
+ FILE *file = NULL;
+ char *line = NULL;
+ size_t linecap = 0;
+ ssize_t linelen = 0;
+ char *id[] = {"/usr/bin/id", NULL};
+ kern_return_t ret = KERN_FAILURE;
+
+ /* Send stdout to a temporary file. */
+ path = mktemp(template);
+ T_QUIET; T_ASSERT_NOTNULL(path, NULL);
+
+ ret = posix_spawn_file_actions_init(&file_actions);
+ T_QUIET; T_ASSERT_POSIX_ZERO(ret, NULL);
+
+ ret = posix_spawn_file_actions_addopen(&file_actions, 1, path,
+ O_WRONLY | O_CREAT | O_TRUNC, 0666);
+ T_QUIET; T_ASSERT_POSIX_ZERO(ret, NULL);
+
+ ret = posix_spawnattr_init(&attr);
+ T_QUIET; T_ASSERT_POSIX_ZERO(ret, NULL);
+ T_QUIET; T_ASSERT_NOTNULL(attr, NULL);
+
+ // Attach the suid cred port
+ ret = posix_spawnattr_setsuidcredport_np(&attr, sc_id);
+ T_QUIET; T_ASSERT_POSIX_ZERO(ret, NULL);
+
+ ret = posix_spawnp(&pid, id[0], &file_actions, &attr, id, environ);
+ T_ASSERT_POSIX_ZERO(ret, "spawn with suid cred");
+
+ ret = posix_spawnattr_destroy(&attr);
+ T_QUIET; T_ASSERT_POSIX_ZERO(ret, NULL);
+
+ ret = posix_spawn_file_actions_destroy(&file_actions);
+ T_QUIET; T_ASSERT_POSIX_ZERO(ret, NULL);
+
+ // Wait for id to finish executing and exit.
+ do {
+ ret = waitpid(pid, &status, 0);
+ } while (ret < 0 && errno == EINTR);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, NULL);
+
+ // Read from the temp file and verify that euid is 0.
+ file = fopen(path, "re");
+ T_QUIET; T_ASSERT_NOTNULL(file, NULL);
+
+ linelen = getline(&line, &linecap, file);
+ T_QUIET; T_ASSERT_GT_LONG(linelen, 0L, NULL);
+
+ T_ASSERT_NOTNULL(strstr(line, "euid=0"), "verify that euid is zero");
+
+ free(line);
+ ret = fclose(file);
+ T_QUIET; T_ASSERT_POSIX_ZERO(ret, NULL);
+
+ ret = unlink(path);
+ T_QUIET; T_ASSERT_POSIX_ZERO(ret, NULL);
+}
+
+/*
+ * This is a negative test case which tries to spawn /usr/bin/id with a
+ * previously used credential. It is expected that posix_spawn() fails.
+ * sc_id should have already been used to successfully spawn /usr/bin/id.
+ */
+static void
+test_id_cred_reuse(suid_cred_t sc_id)
+{
+ posix_spawnattr_t attr;
+ char *id[] = {"/usr/bin/id", NULL};
+ kern_return_t ret = KERN_FAILURE;
+
+ ret = posix_spawnattr_init(&attr);
+ T_QUIET; T_ASSERT_POSIX_ZERO(ret, NULL);
+ T_QUIET; T_ASSERT_NOTNULL(attr, NULL);
+
+ // Attach the suid cred port
+ ret = posix_spawnattr_setsuidcredport_np(&attr, sc_id);
+ T_QUIET; T_ASSERT_POSIX_ZERO(ret, NULL);
+
+ ret = posix_spawnp(NULL, id[0], NULL, &attr, id, environ);
+ T_ASSERT_NE(ret, 0, "spawn with used suid cred");
+
+ ret = posix_spawnattr_destroy(&attr);
+ T_QUIET; T_ASSERT_POSIX_ZERO(ret, NULL);
+}
+
+/*
+ * This is a negative test case which tries to spawn /usr/bin/id with a
+ * credential for /bin/ls. It is expected that posix_spawn() fails.
+ */
+static void
+test_ls_cred(suid_cred_t sc_ls)
+{
+ posix_spawnattr_t attr;
+ char *id[] = {"/usr/bin/id", NULL};
+ kern_return_t ret = KERN_FAILURE;
+
+ ret = posix_spawnattr_init(&attr);
+ T_QUIET; T_ASSERT_POSIX_ZERO(ret, NULL);
+ T_QUIET; T_ASSERT_NOTNULL(attr, NULL);
+
+ // Attach the suid cred port
+ ret = posix_spawnattr_setsuidcredport_np(&attr, sc_ls);
+ T_QUIET; T_ASSERT_POSIX_ZERO(ret, NULL);
+
+ ret = posix_spawnp(NULL, id[0], NULL, &attr, id, environ);
+ T_ASSERT_NE(ret, 0, "spawn with bad suid cred");
+
+ ret = posix_spawnattr_destroy(&attr);
+ T_QUIET; T_ASSERT_POSIX_ZERO(ret, NULL);
+}
+
+/*
+ * The privileged/entitled "server" which creates suid credentials to pass to a
+ * client. Two creds are created, one for /usr/bin/id and the other for /bin/ls.
+ * It waits for the client to contact and replies with the above ports.
+ */
+T_HELPER_DECL(suid_cred_server_helper, "suid cred server")
+{
+ mach_port_t server_port = MACH_PORT_NULL;
+ kern_return_t ret = KERN_FAILURE;
+ suid_cred_t sc_id = SUID_CRED_NULL;
+ suid_cred_t sc_ls = SUID_CRED_NULL;
+ mach_msg_empty_rcv_t rmsg = {};
+ struct {
+ mach_msg_header_t header;
+ mach_msg_body_t body;
+ mach_msg_port_descriptor_t id_port;
+ mach_msg_port_descriptor_t ls_port;
+ } smsg = {};
+
+ T_SETUPBEGIN;
+
+ ret = bootstrap_check_in(bootstrap_port, server_name, &server_port);
+ T_ASSERT_MACH_SUCCESS(ret, NULL);
+
+ T_SETUPEND;
+
+ // Wait for a message to reply to.
+ rmsg.header.msgh_size = sizeof(rmsg);
+ rmsg.header.msgh_local_port = server_port;
+
+ ret = mach_msg_receive(&rmsg.header);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(ret, NULL);
+
+ // Setup the reply.
+ smsg.header.msgh_remote_port = rmsg.header.msgh_remote_port;
+ smsg.header.msgh_local_port = MACH_PORT_NULL;
+ smsg.header.msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_MOVE_SEND_ONCE, 0) | MACH_MSGH_BITS_COMPLEX;
+ smsg.header.msgh_size = sizeof(smsg);
+
+ smsg.body.msgh_descriptor_count = 2;
+
+ // Create an suid cred for 'id'
+ ret = task_create_suid_cred(mach_task_self(), "/usr/bin/id", 0, &sc_id);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(ret, "create a new suid cred for id");
+ T_QUIET; T_ASSERT_NE(sc_id, SUID_CRED_NULL, NULL);
+
+ smsg.id_port.name = sc_id;
+ smsg.id_port.disposition = MACH_MSG_TYPE_COPY_SEND;
+ smsg.id_port.type = MACH_MSG_PORT_DESCRIPTOR;
+
+ // Create an suid cred for 'ls'
+ ret = task_create_suid_cred(mach_task_self(), "/bin/ls", 0, &sc_ls);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(ret, "create a new suid cred for ls");
+ T_QUIET; T_ASSERT_NE(sc_ls, SUID_CRED_NULL, NULL);
+
+ smsg.ls_port.name = sc_ls;
+ smsg.ls_port.disposition = MACH_MSG_TYPE_COPY_SEND;
+ smsg.ls_port.type = MACH_MSG_PORT_DESCRIPTOR;
+
+ ret = mach_msg_send(&smsg.header);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(ret, NULL);
+}
+
+/*
+ * The unprivileged "client" which requests suid credentials from the "server",
+ * and runs some test cases with those credentials:
+ * - A positive test case to spawn something with euid 0
+ * - A negative test case to check that a cred can't be used twice
+ * - A negative test case to check that only the approved binary can be used
+ * with the credential.
+ */
+T_HELPER_DECL(suid_cred_client_helper, "suid cred client")
+{
+ mach_port_t server_port = MACH_PORT_NULL;
+ mach_port_t client_port = MACH_PORT_NULL;
+ kern_return_t ret = KERN_FAILURE;
+ suid_cred_t sc_id = SUID_CRED_NULL;
+ suid_cred_t sc_ls = SUID_CRED_NULL;
+ mach_msg_empty_send_t smsg = {};
+ struct {
+ mach_msg_header_t header;
+ mach_msg_body_t body;
+ mach_msg_port_descriptor_t id_port;
+ mach_msg_port_descriptor_t ls_port;
+ mach_msg_trailer_t trailer;
+ } rmsg = {};
+
+ uid_t euid = geteuid();
+
+ T_SETUPBEGIN;
+
+ // Make sure the effective UID is non-root.
+ if (euid == 0) {
+ ret = setuid(501);
+ T_ASSERT_POSIX_ZERO(ret, "setuid");
+ }
+
+ /*
+ * As this can race with the "server" starting, give it time to
+ * start up.
+ */
+ for (int i = 0; i < 30; i++) {
+ ret = bootstrap_look_up(bootstrap_port, server_name, &server_port);
+ if (ret != BOOTSTRAP_UNKNOWN_SERVICE) {
+ break;
+ }
+ sleep(1);
+ }
+
+ T_QUIET; T_ASSERT_NE(server_port, MACH_PORT_NULL, NULL);
+
+ // Create a report to receive the reply on.
+ ret = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &client_port);
+ T_ASSERT_MACH_SUCCESS(ret, NULL);
+
+ T_SETUPEND;
+
+ // Request the SUID cred ports
+ smsg.header.msgh_remote_port = server_port;
+ smsg.header.msgh_local_port = client_port;
+ smsg.header.msgh_bits = MACH_MSGH_BITS_SET(MACH_MSG_TYPE_MOVE_SEND, MACH_MSG_TYPE_MAKE_SEND_ONCE, 0, 0);
+ smsg.header.msgh_size = sizeof(smsg);
+
+ ret = mach_msg_send(&smsg.header);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(ret, NULL);
+
+ // Wait for the reply.
+ rmsg.header.msgh_size = sizeof(rmsg);
+ rmsg.header.msgh_local_port = client_port;
+
+ ret = mach_msg_receive(&rmsg.header);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(ret, NULL);
+
+ sc_id = rmsg.id_port.name;
+ T_QUIET; T_ASSERT_NE(sc_id, SUID_CRED_NULL, NULL);
+ test_id_cred(sc_id);
+ test_id_cred_reuse(sc_id);
+
+ sc_ls = rmsg.ls_port.name;
+ T_QUIET; T_ASSERT_NE(sc_ls, SUID_CRED_NULL, NULL);
+ test_ls_cred(sc_ls);
+}
+
+T_DECL(task_create_suid_cred, "task_create_suid_cred", T_META_ASROOT(true))
+{
+ dt_helper_t helpers[] = {
+ dt_launchd_helper_domain("com.apple.xnu.test.task_create_suid_cred.plist",
+ "suid_cred_server_helper", NULL, LAUNCH_SYSTEM_DOMAIN),
+ dt_fork_helper("suid_cred_client_helper"),
+ };
+
+ dt_run_helpers(helpers, sizeof(helpers) / sizeof(helpers[0]), 60);
+}
+
+/*
+ * Creating an suid credential should fail for non-root (even if entitled).
+ */
+T_DECL(task_create_suid_cred_no_root, "task_create_suid_cred (no root)", T_META_ASROOT(true))
+{
+ kern_return_t ret = KERN_FAILURE;
+ suid_cred_t sc = SUID_CRED_NULL;
+ uid_t euid = geteuid();
+
+ // Make sure the effective UID is non-root.
+ if (euid == 0) {
+ ret = setuid(501);
+ T_QUIET; T_ASSERT_POSIX_ZERO(ret, "setuid");
+ }
+
+ ret = task_create_suid_cred(mach_task_self(), "/usr/bin/id", 0, &sc);
+ T_ASSERT_MACH_ERROR(ret, KERN_NO_ACCESS, "create a new suid cred for id (non-root)");
+}
+
+#endif /* ENTITLED */
--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+
+ <key>com.apple.private.suid_cred</key>
+ <true/>
+
+</dict>
+</plist>
yield obj
link = link.next
+def IterateCircleQueue(queue_head, element_ptr_type, element_field_name):
+ """ iterate over a circle queue in kernel of type circle_queue_head_t. refer to osfmk/kern/circle_queue.h
+ params:
+ queue_head - lldb.SBValue : Value object for queue_head.
+ element_type - lldb.SBType : a pointer type of the element 'next' points to. Typically its structs like thread, task etc..
+ element_field_name - str : name of the field in target struct.
+ returns:
+ A generator does not return. It is used for iterating.
+ SBValue : an object thats of type (element_type) queue_head->next. Always a pointer object
+ """
+ head = queue_head.head.GetSBValue()
+ queue_head_addr = 0x0
+ if head.TypeIsPointerType():
+ queue_head_addr = head.GetValueAsUnsigned()
+ else:
+ queue_head_addr = head.GetAddress().GetLoadAddress(osplugin_target_obj)
+ cur_elt = head
+ while True:
+ if not cur_elt.IsValid() or cur_elt.GetValueAsUnsigned() == 0:
+ break
+ yield containerof(value(cur_elt), element_ptr_type, element_field_name)
+ cur_elt = cur_elt.GetChildMemberWithName('next')
+ if cur_elt.GetValueAsUnsigned() == queue_head_addr:
+ break
def IterateQueue(queue_head, element_ptr_type, element_field_name, backwards=False, unpack_ptr_fn=None):
""" Iterate over an Element Chain queue in kernel of type queue_head_t. (osfmk/kern/queue.h method 2)
yield elt
cur_elt = elt.GetChildMemberWithName(element_field_name).GetChildMemberWithName('next')
-def IterateCircleQueue(queue_head, element_ptr_type, element_field_name):
- """ iterate over a circle queue in kernel of type circle_queue_head_t. refer to osfmk/kern/circle_queue.h
- params:
- queue_head - lldb.SBValue : Value object for queue_head.
- element_type - lldb.SBType : a pointer type of the element 'next' points to. Typically its structs like thread, task etc..
- element_field_name - str : name of the field in target struct.
- returns:
- A generator does not return. It is used for iterating.
- SBValue : an object thats of type (element_type) queue_head->next. Always a pointer object
- """
- head = queue_head.head
- queue_head_addr = 0x0
- if head.TypeIsPointerType():
- queue_head_addr = head.GetValueAsUnsigned()
- else:
- queue_head_addr = head.GetAddress().GetLoadAddress(osplugin_target_obj)
- cur_elt = head
- while True:
- if not cur_elt.IsValid() or cur_elt.GetValueAsUnsigned() == 0:
- break
- elt = cur_elt.Cast(element_ptr_type)
- yield elt
- cur_elt = elt.GetChildMemberWithName(element_field_name).GetChildMemberWithName('next')
- if cur_elt.GetValueAsUnsigned() == queue_head_addr:
- break
-
def GetUniqueSessionID(process_obj):
""" Create a unique session identifier.
params:
params: portval - core.value representation of 'ipc_port *' object
returns: str - string of kobject information
"""
- kobject_str = "{0: <#020x}".format(portval.kdata.kobject)
io_bits = unsigned(portval.ip_object.io_bits)
- objtype_index = io_bits & 0x7ff
+ if io_bits & 0x400 :
+ kobject_val = portval.kdata.kolabel.ikol_kobject
+ else:
+ kobject_val = portval.kdata.kobject
+ kobject_str = "{0: <#020x}".format(kobject_val)
+ objtype_index = io_bits & 0x3ff
if objtype_index < len(xnudefines.kobject_types) :
objtype_str = xnudefines.kobject_types[objtype_index]
if objtype_str == 'IOKIT_OBJ':
- iokit_classnm = GetObjectTypeStr(portval.kdata.kobject)
+ iokit_classnm = GetObjectTypeStr(kobject_val)
if not iokit_classnm:
iokit_classnm = "<unknown class>"
else:
else:
desc_str = "kobject({0:s})".format(objtype_str)
if xnudefines.kobject_types[objtype_index] in ('TASK_RESUME', 'TASK'):
- desc_str += " " + GetProcNameForTask(Cast(portval.kdata.kobject, 'task *'))
+ desc_str += " " + GetProcNameForTask(Cast(kobject_val, 'task *'))
else:
desc_str = "kobject(UNKNOWN) {:d}".format(objtype_index)
return kobject_str + " " + desc_str
kThreadWaitParkedWorkQueue = 0x0f
kThreadWaitWorkloopSyncWait = 0x10
kThreadWaitOnProcess = 0x11
+kThreadWaitCompressor = 0x14
UINT64_MAX = 0xffffffffffffffff
s += "waitpid, for process group %d" % abs(owner - 2**64)
else:
s += "waitpid, for pid %d" % owner
+ elif type == kThreadWaitCompressor:
+ s += "in compressor segment %x, busy for thread %d" % (context, owner)
else:
s += "unknown type %d (owner %d, context %x)" % (type, owner, context)
print "Last dispatch time known: %d MATUs" % cur_abstime
-bucketStr = ["", "FIXPRI (>UI)", "TIMESHARE_FG", "TIMESHARE_IN", "TIMESHARE_DF", "TIMESHARE_UT", "TIMESHARE_BG"]
+bucketStr = ["FIXPRI (>UI)", "TIMESHARE_FG", "TIMESHARE_IN", "TIMESHARE_DF", "TIMESHARE_UT", "TIMESHARE_BG"]
@header(" {:>18s} | {:>20s} | {:>20s} | {:>10s} | {:>10s}".format('Thread Group', 'Interactivity Score', 'Last Timeshare Tick', 'pri_shift', 'highq'))
def GetSchedClutchBucketSummary(clutch_bucket):
print "{:>10s} | {:>20s} | {:>30s} | 0x{:16x} | {:>10d} | {:>10d} | {:>30s} | {:>30s} | {:>15s} | ".format("Root", "*", "*", addressof(root_clutch), root_clutch.scr_priority, root_clutch.scr_thr_count, "*", "*", "*")
print "-" * 300
- for i in range(1, 7):
+ for i in range(0, 6):
root_bucket = root_clutch.scr_buckets[i]
print "{:>10s} | {:>20s} | {:>30s} | 0x{:16x} | {:>10s} | {:>10s} | {:>30s} | {:>30s} | {:>15d} | ".format("*", bucketStr[i], "*", addressof(root_bucket), "*", "*", "*", "*", root_bucket.scrb_deadline)
- prioq = root_bucket.scrb_clutch_buckets
+ clutch_bucket_runq = root_bucket.scrb_clutch_buckets
clutch_bucket_list = []
- for clutch_bucket in IteratePriorityQueue(prioq, 'struct sched_clutch_bucket', 'scb_pqlink'):
- clutch_bucket_list.append(clutch_bucket)
+ for pri in range(0,128):
+ clutch_bucket_circleq = clutch_bucket_runq.scbrq_queues[pri]
+ for clutch_bucket in IterateCircleQueue(clutch_bucket_circleq, 'struct sched_clutch_bucket', 'scb_runqlink'):
+ clutch_bucket_list.append(clutch_bucket)
if len(clutch_bucket_list) > 0:
clutch_bucket_list.sort(key=lambda x: x.scb_priority, reverse=True)
for clutch_bucket in clutch_bucket_list:
print "{:<30s} : {:d}".format("Deadline", root_bucket.scrb_deadline)
print "{:<30s} : {:d}".format("Current Timestamp", GetRecentTimestamp())
print "\n"
- prioq = root_bucket.scrb_clutch_buckets
+ clutch_bucket_runq = root_bucket.scrb_clutch_buckets
clutch_bucket_list = []
- for clutch_bucket in IteratePriorityQueue(prioq, 'struct sched_clutch_bucket', 'scb_pqlink'):
- clutch_bucket_list.append(clutch_bucket)
+ for pri in range(0,128):
+ clutch_bucket_circleq = clutch_bucket_runq.scbrq_queues[pri]
+ for clutch_bucket in IterateCircleQueue(clutch_bucket_circleq, 'struct sched_clutch_bucket', 'scb_runqlink'):
+ clutch_bucket_list.append(clutch_bucket)
if len(clutch_bucket_list) > 0:
print "=" * 240
print "{:>30s} | {:>18s} | {:>20s} | {:>20s} | ".format("Name", "Clutch Bucket", "Priority", "Count") + GetSchedClutchBucketSummary.header