options CONFIG_FORCE_OUT_IFP # Force IP output to use an interface # <config_force_out_ifp>
options CONFIG_MBUF_NOEXPAND # limit mbuf expansion # <config_mbuf_noexpand>
options CONFIG_MBUF_JUMBO # jumbo cluster pool # <config_mbuf_jumbo>
+options CONFIG_SCOPEDROUTING # scoped routing on by default # <config_scopedrouting>
options CONFIG_IP_EDGEHOLE # Drop tagged packets at EDGE interface # <config_ip_edgehole>
options CONFIG_WORKQUEUE # <config_workqueue>
ip_fw2_compat.o \
kpi_ipfilter.o \
in_gif.o \
- in_pcb.o \
ip_divert.o \
ip_dummynet.o \
ip_icmp.o \
ip_fw2_compat.o \
kpi_ipfilter.o \
in_gif.o \
- in_pcb.o \
ip_divert.o \
ip_dummynet.o \
ip_icmp.o \
return (0);
#else
+#pragma unused(state)
+
return 1; /* Darwin doesn't do zones. */
#endif /* __APPLE__ */
}
dtrace_dstate_percpu_t *dcpu;
int i, work = 0;
- for (i = 0; i < NCPU; i++) {
+ for (i = 0; i < (int)NCPU; i++) {
dcpu = &dstate->dtds_percpu[i];
ASSERT(dcpu->dtdsc_rinsing == NULL);
dtrace_sync();
- for (i = 0; i < NCPU; i++) {
+ for (i = 0; i < (int)NCPU; i++) {
dcpu = &dstate->dtds_percpu[i];
if (dcpu->dtdsc_rinsing == NULL)
case DTRACE_DSTATE_CLEAN: {
void *sp = &dstate->dtds_state;
- if (++cpu >= NCPU)
+ if (++cpu >= (int)NCPU)
cpu = 0;
if (dcpu->dtdsc_dirty != NULL &&
static void
dtrace_aggregate_min(uint64_t *oval, uint64_t nval, uint64_t arg)
{
+#pragma unused(arg)
if (nval < *oval)
*oval = nval;
}
static void
dtrace_aggregate_max(uint64_t *oval, uint64_t nval, uint64_t arg)
{
+#pragma unused(arg)
if (nval > *oval)
*oval = nval;
}
static void
dtrace_aggregate_avg(uint64_t *data, uint64_t nval, uint64_t arg)
{
+#pragma unused(arg)
data[0]++;
data[1] += nval;
}
static void
dtrace_aggregate_count(uint64_t *oval, uint64_t nval, uint64_t arg)
{
+#pragma unused(nval,arg)
*oval = *oval + 1;
}
static void
dtrace_aggregate_sum(uint64_t *oval, uint64_t nval, uint64_t arg)
{
+#pragma unused(arg)
*oval += nval;
}
dtrace_aggregate(dtrace_aggregation_t *agg, dtrace_buffer_t *dbuf,
intptr_t offset, dtrace_buffer_t *buf, uint64_t expr, uint64_t arg)
{
+#pragma unused(arg)
dtrace_recdesc_t *rec = &agg->dtag_action.dta_rec;
uint32_t i, ndx, size, fsize;
uint32_t align = sizeof (uint64_t) - 1;
* string -- setting a bit in the map for every character
* found in the token string.
*/
- for (i = 0; i < sizeof (tokmap); i++)
+ for (i = 0; i < (int)sizeof (tokmap); i++)
tokmap[i] = 0;
for (; tokaddr < toklimit; tokaddr++) {
size_t sz = v->dtdv_type.dtdt_size;
sz += sizeof (uint64_t);
- ASSERT(svar->dtsv_size == NCPU * sz);
+ ASSERT(svar->dtsv_size == (int)NCPU * sz);
a += CPU->cpu_id * sz;
if (*(uint8_t *)a == UINT8_MAX) {
break;
}
- ASSERT(svar->dtsv_size == NCPU * sizeof (uint64_t));
+ ASSERT(svar->dtsv_size == (int)NCPU * sizeof (uint64_t));
tmp = (uint64_t *)(uintptr_t)svar->dtsv_data;
regs[rd] = tmp[CPU->cpu_id];
break;
size_t sz = v->dtdv_type.dtdt_size;
sz += sizeof (uint64_t);
- ASSERT(svar->dtsv_size == NCPU * sz);
+ ASSERT(svar->dtsv_size == (int)NCPU * sz);
a += CPU->cpu_id * sz;
if (regs[rd] == NULL) {
break;
}
- ASSERT(svar->dtsv_size == NCPU * sizeof (uint64_t));
+ ASSERT(svar->dtsv_size == (int)NCPU * sizeof (uint64_t));
tmp = (uint64_t *)(uintptr_t)svar->dtsv_data;
tmp[CPU->cpu_id] = regs[rd];
break;
#ifdef lint
uint64_t val = 0;
#else
- uint64_t val;
+ uint64_t val = 0;
#endif
mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE;
static int
dtrace_match_nul(const char *s, const char *p, int depth)
{
+#pragma unused(s,p,depth)
return (1); /* always match the empty pattern */
}
static int
dtrace_match_nonzero(const char *s, const char *p, int depth)
{
+#pragma unused(p,depth)
return (s != NULL && s[0] != '\0');
}
}
do {
- kmod_info_t *ktl;
/*
* First, call the blanket provide operation.
*/
lck_mtx_unlock(&mod_lock);
#else
-#if 0 /* XXX Workaround for PR_4643546 XXX */
+#if 0 /* FIXME: Workaround for PR_4643546 */
simple_lock(&kmod_lock);
- ktl = kmod;
+ kmod_info_t *ktl = kmod;
while (ktl) {
prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ktl);
ktl = ktl->next;
svarp = &vstate->dtvs_locals;
if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF)
- dsize = NCPU * (v->dtdv_type.dtdt_size +
+ dsize = (int)NCPU * (v->dtdv_type.dtdt_size +
sizeof (uint64_t));
else
- dsize = NCPU * sizeof (uint64_t);
+ dsize = (int)NCPU * sizeof (uint64_t);
break;
*/
diff = offs + sizeof (dtrace_aggid_t);
- if (diff = (diff & (sizeof (uint64_t) - 1)))
+ if ((diff = (diff & (sizeof (uint64_t) - 1))))
offs += sizeof (uint64_t) - diff;
aggbase = offs - sizeof (dtrace_aggid_t);
* of creating our own (saving both time and space).
*/
dtrace_ecb_t *cached = dtrace_ecb_create_cache;
- dtrace_action_t *act = cached->dte_action;
+ dtrace_action_t *act_if = cached->dte_action;
- if (act != NULL) {
- ASSERT(act->dta_refcnt > 0);
- act->dta_refcnt++;
- ecb->dte_action = act;
+ if (act_if != NULL) {
+ ASSERT(act_if->dta_refcnt > 0);
+ act_if->dta_refcnt++;
+ ecb->dte_action = act_if;
ecb->dte_action_last = cached->dte_action_last;
ecb->dte_needed = cached->dte_needed;
ecb->dte_size = cached->dte_size;
return (EFBIG);
#if defined(__APPLE__)
- if (size > (sane_size / 8) / NCPU) /* As in kdbg_set_nkdbufs(), roughly. */
+ if (size > (sane_size / 8) / (int)NCPU) /* As in kdbg_set_nkdbufs(), roughly. */
return (ENOMEM);
#endif /* __APPLE__ */
intptr_t offs = buf->dtb_offset, soffs;
intptr_t woffs;
caddr_t tomax;
- size_t total;
+ size_t total_off;
if (buf->dtb_flags & DTRACEBUF_INACTIVE)
return (-1);
goto out;
}
- total = needed + (offs & (align - 1));
+ total_off = needed + (offs & (align - 1));
/*
* For a ring buffer, life is quite a bit more complicated. Before
* is required.)
*/
if ((buf->dtb_flags & DTRACEBUF_WRAPPED) ||
- offs + total > buf->dtb_size) {
+ offs + total_off > buf->dtb_size) {
woffs = buf->dtb_xamot_offset;
- if (offs + total > buf->dtb_size) {
+ if (offs + total_off > buf->dtb_size) {
/*
* We can't fit in the end of the buffer. First, a
* sanity check that we can fit in the buffer at all.
*/
- if (total > buf->dtb_size) {
+ if (total_off > buf->dtb_size) {
dtrace_buffer_drop(buf);
return (-1);
}
* that the top of the buffer is aligned.
*/
offs = 0;
- total = needed;
+ total_off = needed;
buf->dtb_flags |= DTRACEBUF_WRAPPED;
} else {
/*
}
}
- while (offs + total > woffs) {
+ while (offs + total_off > woffs) {
dtrace_epid_t epid = *(uint32_t *)(tomax + woffs);
size_t size;
if (offs == 0) {
buf->dtb_flags &= ~DTRACEBUF_WRAPPED;
buf->dtb_offset = 0;
- woffs = total;
+ woffs = total_off;
while (woffs < buf->dtb_size)
tomax[woffs++] = 0;
{
int i;
- for (i = 0; i < NCPU; i++) {
+ for (i = 0; i < (int)NCPU; i++) {
dtrace_buffer_t *buf = &bufs[i];
if (buf->dtb_tomax == NULL) {
dtrace_enabling_matchstate(dtrace_state_t *state, int *nmatched)
{
dtrace_enabling_t *enab;
- int matched, total = 0, err;
+ int matched, total_matched = 0, err;
lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED);
lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
if ((err = dtrace_enabling_match(enab, &matched)) != 0)
return (err);
- total += matched;
+ total_matched += matched;
}
if (nmatched != NULL)
- *nmatched = total;
+ *nmatched = total_matched;
return (0);
}
static void
dtrace_dof_error(dof_hdr_t *dof, const char *str)
{
+#pragma unused(dof)
if (dtrace_err_verbose)
cmn_err(CE_WARN, "failed to process DOF: %s", str);
size_t ttl = 0;
dof_difohdr_t *dofd;
uintptr_t daddr = (uintptr_t)dof;
- size_t max = dtrace_difo_maxsize;
+ size_t max_size = dtrace_difo_maxsize;
int i, l, n;
static const struct {
dofd->dofd_links[l])) == NULL)
goto err; /* invalid section link */
- if (ttl + subsec->dofs_size > max) {
+ if (ttl + subsec->dofs_size > max_size) {
dtrace_dof_error(dof, "exceeds maximum size");
goto err;
}
int
dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size)
{
- size_t hashsize, maxper, min, chunksize = dstate->dtds_chunksize;
+ size_t hashsize, maxper, min_size, chunksize = dstate->dtds_chunksize;
void *base;
uintptr_t limit;
dtrace_dynvar_t *dvar, *next, *start;
if ((dstate->dtds_chunksize = chunksize) == 0)
dstate->dtds_chunksize = DTRACE_DYNVAR_CHUNKSIZE;
- if (size < (min = dstate->dtds_chunksize + sizeof (dtrace_dynhash_t)))
- size = min;
+ if (size < (min_size = dstate->dtds_chunksize + sizeof (dtrace_dynhash_t)))
+ size = min_size;
if ((base = kmem_zalloc(size, KM_NOSLEEP)) == NULL)
return (ENOMEM);
dstate->dtds_size = size;
dstate->dtds_base = base;
dstate->dtds_percpu = kmem_cache_alloc(dtrace_state_cache, KM_SLEEP);
- bzero(dstate->dtds_percpu, NCPU * sizeof (dtrace_dstate_percpu_t));
+ bzero(dstate->dtds_percpu, (int)NCPU * sizeof (dtrace_dstate_percpu_t));
hashsize = size / (dstate->dtds_chunksize + sizeof (dtrace_dynhash_t));
((uintptr_t)base + hashsize * sizeof (dtrace_dynhash_t));
limit = (uintptr_t)base + size;
- maxper = (limit - (uintptr_t)start) / NCPU;
+ maxper = (limit - (uintptr_t)start) / (int)NCPU;
maxper = (maxper / dstate->dtds_chunksize) * dstate->dtds_chunksize;
- for (i = 0; i < NCPU; i++) {
+ for (i = 0; i < (int)NCPU; i++) {
dstate->dtds_percpu[i].dtdsc_free = dvar = start;
/*
* whatever is left over. In either case, we set the limit to
* be the limit of the dynamic variable space.
*/
- if (maxper == 0 || i == NCPU - 1) {
+ if (maxper == 0 || i == (int)NCPU - 1) {
limit = (uintptr_t)base + size;
start = NULL;
} else {
char c[30];
dtrace_state_t *state;
dtrace_optval_t *opt;
- int bufsize = NCPU * sizeof (dtrace_buffer_t), i;
+ int bufsize = (int)NCPU * sizeof (dtrace_buffer_t), i;
lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED);
dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which)
{
dtrace_optval_t *opt = state->dts_options, size;
- processorid_t cpu;
+ processorid_t cpu = 0;
int flags = 0, rval;
lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
dtrace_buffer_t *buf;
cyc_handler_t hdlr;
cyc_time_t when;
- int rval = 0, i, bufsize = NCPU * sizeof (dtrace_buffer_t);
+ int rval = 0, i, bufsize = (int)NCPU * sizeof (dtrace_buffer_t);
dtrace_icookie_t cookie;
lck_mtx_lock(&cpu_lock);
dtrace_ecb_t *ecb;
dtrace_vstate_t *vstate = &state->dts_vstate;
minor_t minor = getminor(state->dts_dev);
- int i, bufsize = NCPU * sizeof (dtrace_buffer_t);
+ int i, bufsize = (int)NCPU * sizeof (dtrace_buffer_t);
dtrace_speculation_t *spec = state->dts_speculations;
int nspec = state->dts_nspeculations;
uint32_t match;
if ((svar = vstate->dtvs_locals[i]) == NULL)
continue;
- ASSERT(svar->dtsv_size >= NCPU * sizeof (uint64_t));
+ ASSERT(svar->dtsv_size >= (int)NCPU * sizeof (uint64_t));
ent->dtht_locals[i] =
((uint64_t *)(uintptr_t)svar->dtsv_data)[CPU->cpu_id];
}
uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
uint64_t sarg0 = mstate->dtms_arg[0];
uint64_t sarg1 = mstate->dtms_arg[1];
- uint64_t rval;
+ uint64_t rval = 0;
dtrace_helpers_t *helpers = curproc->p_dtrace_helpers;
dtrace_helper_action_t *helper;
dtrace_vstate_t *vstate;
* given generation number.
*/
for (;;) {
- dtrace_helper_provider_t *prov;
+ dtrace_helper_provider_t *prov = NULL;
/*
* Look for a helper provider with the right generation. We
1, INT_MAX, 0);
dtrace_state_cache = kmem_cache_create("dtrace_state_cache",
- sizeof (dtrace_dstate_percpu_t) * NCPU, DTRACE_STATE_ALIGN,
+ sizeof (dtrace_dstate_percpu_t) * (int)NCPU, DTRACE_STATE_ALIGN,
NULL, NULL, NULL, NULL, NULL, 0);
lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED);
static int
dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
{
+#pragma unused(flag,otyp,cred_p)
minor_t minor = getminor(dev);
dtrace_state_t *state;
static int
dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
{
+#pragma unused(md)
+
minor_t minor = getminor(dev);
dtrace_state_t *state;
int rval;
if (copyin((void *)arg, &desc, sizeof (desc)) != 0)
return (EFAULT);
- if (desc.dtbd_cpu < 0 || desc.dtbd_cpu >= NCPU)
+ if (desc.dtbd_cpu < 0 || desc.dtbd_cpu >= (int)NCPU)
return (EINVAL);
lck_mtx_lock(&dtrace_lock);
nerrs = state->dts_errors;
dstate = &state->dts_vstate.dtvs_dynvars;
- for (i = 0; i < NCPU; i++) {
+ for (i = 0; i < (int)NCPU; i++) {
dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[i];
stat.dtst_dyndrops += dcpu->dtdsc_drops;
mach_vm_address_t addr = 0LL;
mach_vm_size_t size = PAGE_SIZE; // We need some way to assert that this matches vm_map_round_page() !!!
+#if CONFIG_EMBEDDED
+ /* The embedded OS has extra permissions for writable and executable pages. We can't pass in the flags
+ * we need for the correct permissions from mach_vm_allocate, so need to call mach_vm_map directly. */
+ vm_map_offset_t map_addr = 0;
+ kern_return_t kr = mach_vm_map(map, &map_addr, size, 0, VM_FLAGS_ANYWHERE, IPC_PORT_NULL, 0, FALSE, VM_PROT_READ|VM_PROT_EXECUTE, VM_PROT_READ|VM_PROT_EXECUTE, VM_INHERIT_DEFAULT);
+ if (kr != KERN_SUCCESS) {
+ goto err;
+ }
+ addr = map_addr;
+#else
kern_return_t kr = mach_vm_allocate(map, &addr, size, VM_FLAGS_ANYWHERE);
if (kr != KERN_SUCCESS) {
goto err;
mach_vm_deallocate(map, addr, size);
goto err;
}
+#endif
// Chain the page entries.
int i;
tp->ftt_pc = pdata->ftps_offs[i] + pdata->ftps_pc;
tp->ftt_pid = pdata->ftps_pid;
+
pp->ftp_tps[0].fit_tp = tp;
pp->ftp_tps[0].fit_id.fti_probe = pp;
#if defined(__APPLE__)
* Yes, this is a WAG.
*/
fasttrap_max = (sane_size >> 28) * 100000;
+ if (fasttrap_max == 0)
+ fasttrap_max = 50000;
#endif
fasttrap_total = 0;
*/
void lockstat_hot_patch(boolean_t active)
{
+#pragma unused(active)
int i;
static void
lockstat_enable(void *arg, dtrace_id_t id, void *parg)
{
+#pragma unused(arg)
lockstat_probe_t *probe = parg;
ASSERT(!lockstat_probemap[probe->lsp_probe]);
static void
lockstat_disable(void *arg, dtrace_id_t id, void *parg)
{
+#pragma unused(arg,id)
lockstat_probe_t *probe = parg;
int i;
static void
lockstat_provide(void *arg, const dtrace_probedesc_t *desc)
{
+#pragma unused(arg,desc)
int i = 0;
for (i = 0; lockstat_probes[i].lsp_func != NULL; i++) {
static void
lockstat_destroy(void *arg, dtrace_id_t id, void *parg)
{
+#pragma unused(arg,id)
lockstat_probe_t *probe = parg;
ASSERT(!lockstat_probemap[probe->lsp_probe]);
CPU->cpu_profile_upc, late, 0, 0);
#else
#if defined(__ppc__) || defined(__ppc64__)
+ {
struct savearea *sv = find_kern_regs(current_thread());
if (sv) {
dtrace_probe(prof->prof_id, 0xcafebabe,
0x0, late, 0, 0); /* XXX_BOGUS also see profile_usermode() below. */
}
+ }
#elif defined(__i386__) || defined(__x86_64__)
+ {
x86_saved_state32_t *kern_regs = find_kern_regs(current_thread());
if (NULL != kern_regs) {
dtrace_probe(prof->prof_id, 0x0, regs->eip, 0, 0, 0);
}
}
+ }
#else
#error Unknown architecture
#endif
CPU->cpu_profile_upc, 0, 0, 0);
#else
#if defined(__ppc__) || defined(__ppc64__)
+ {
struct savearea *sv = find_kern_regs(current_thread());
if (sv) {
dtrace_probe(prof->prof_id, 0xcafebabe,
0x0, 0, 0, 0); /* XXX_BOGUS also see profile_usermode() below. */
}
+ }
#elif defined(__i386__) || defined(__x86_64__)
+ {
x86_saved_state32_t *kern_regs = find_kern_regs(current_thread());
if (NULL != kern_regs) {
dtrace_probe(prof->prof_id, 0x0, regs->eip, 0, 0, 0);
}
}
+ }
#else
#error Unknown architecture
#endif
sdt_provide_module(void *arg, struct modctl *ctl)
{
#pragma unused(ctl)
+#pragma unused(arg)
__sdt_provide_module(arg, &g_sdt_kernctl);
sdt_probedesc_t *sdpd = g_sdt_mach_module.sdt_probes;
// Bounds "check" the value of code a la unix_syscall
sy = (code >= NUM_SYSENT) ? &systrace_sysent[63] : &systrace_sysent[code];
- if ((id = sy->stsy_entry) != DTRACE_IDNONE)
- (*systrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4));
+ if ((id = sy->stsy_entry) != DTRACE_IDNONE) {
+ if (ip)
+ (*systrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4));
+ else
+ (*systrace_probe)(id, 0, 0, 0, 0, 0);
+ }
#if 0 /* XXX */
/*
sizeof(cpu_info->cpuid_logical_per_package));
}
+static int
+hw_cpu_sysctl_nehalem SYSCTL_HANDLER_ARGS
+{
+ i386_cpu_info_t *cpu_info = cpuid_info();
+
+ if (cpu_info->cpuid_model != 26)
+ return ENOENT;
+
+ hw_cpu_sysctl(oidp, arg1, arg2, req);
+}
+
+static int
+hw_cpu_flex_ratio_desired SYSCTL_HANDLER_ARGS
+{
+ __unused struct sysctl_oid *unused_oidp = oidp;
+ __unused void *unused_arg1 = arg1;
+ __unused int unused_arg2 = arg2;
+ i386_cpu_info_t *cpu_info = cpuid_info();
+
+ if (cpu_info->cpuid_model != 26)
+ return ENOENT;
+
+ return SYSCTL_OUT(req, &flex_ratio, sizeof(flex_ratio));
+}
+
+static int
+hw_cpu_flex_ratio_min SYSCTL_HANDLER_ARGS
+{
+ __unused struct sysctl_oid *unused_oidp = oidp;
+ __unused void *unused_arg1 = arg1;
+ __unused int unused_arg2 = arg2;
+ i386_cpu_info_t *cpu_info = cpuid_info();
+
+ if (cpu_info->cpuid_model != 26)
+ return ENOENT;
+
+ return SYSCTL_OUT(req, &flex_ratio_min, sizeof(flex_ratio_min));
+}
+
+static int
+hw_cpu_flex_ratio_max SYSCTL_HANDLER_ARGS
+{
+ __unused struct sysctl_oid *unused_oidp = oidp;
+ __unused void *unused_arg1 = arg1;
+ __unused int unused_arg2 = arg2;
+ i386_cpu_info_t *cpu_info = cpuid_info();
+
+ if (cpu_info->cpuid_model != 26)
+ return ENOENT;
+
+ return SYSCTL_OUT(req, &flex_ratio_max, sizeof(flex_ratio_max));
+}
+
SYSCTL_NODE(_machdep, OID_AUTO, cpu, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
"CPU info");
sizeof(uint32_t),
hw_cpu_sysctl, "I", "Number of enabled threads per package");
+SYSCTL_NODE(_machdep_cpu, OID_AUTO, flex_ratio, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
+ "Flex ratio");
+
+SYSCTL_PROC(_machdep_cpu_flex_ratio, OID_AUTO, desired,
+ CTLTYPE_INT | CTLFLAG_RD,
+ 0, 0,
+ hw_cpu_flex_ratio_desired, "I", "Flex ratio desired (0 disabled)");
+
+SYSCTL_PROC(_machdep_cpu_flex_ratio, OID_AUTO, min,
+ CTLTYPE_INT | CTLFLAG_RD,
+ 0, 0,
+ hw_cpu_flex_ratio_min, "I", "Flex ratio min (efficiency)");
+
+SYSCTL_PROC(_machdep_cpu_flex_ratio, OID_AUTO, max,
+ CTLTYPE_INT | CTLFLAG_RD,
+ 0, 0,
+ hw_cpu_flex_ratio_max, "I", "Flex ratio max (non-turbo)");
uint64_t pmap_pv_hashlist_walks;
uint64_t pmap_pv_hashlist_cnts;
#include <sys/vnode.h>
#include <sys/sysctl.h>
#include <dev/ppc/cons.h>
+#include <pexpert/pexpert.h>
extern vm_map_t mb_map;
__private_extern__ int customnbuf = 0;
int srv = 0; /* Flag indicates a server boot when set */
int ncl = 0;
+static unsigned int mbuf_poolsz;
vm_map_t buffer_map;
vm_map_t bufferhdr_map;
bufinit();
}
+/* 512 MB hard limit on size of the mbuf pool */
+#define MAX_MBUF_POOL (512 << MBSHIFT)
+#define MAX_NCL (MAX_MBUF_POOL >> MCLSHIFT)
/*
* this has been broken out into a separate routine that
int
bsd_mbuf_cluster_reserve(void)
{
- if (sane_size > (64 * 1024 * 1024) || ncl) {
+ /* If called more than once, return the previously calculated size */
+ if (mbuf_poolsz != 0)
+ goto done;
+
+ PE_parse_boot_argn("ncl", &ncl, sizeof (ncl));
+ if (sane_size > (64 * 1024 * 1024) || ncl) {
if ((nmbclusters = ncl) == 0) {
if ((nmbclusters = ((sane_size / 16)/MCLBYTES)) > 32768)
nmbclusters = 32768;
/* Make sure it's not odd in case ncl is manually set */
if (nmbclusters & 0x1)
--nmbclusters;
- }
+ /* And obey the upper limit */
+ if (nmbclusters > MAX_NCL)
+ nmbclusters = MAX_NCL;
+
+ }
+ mbuf_poolsz = nmbclusters << MCLSHIFT;
+done:
return (nmbclusters * MCLBYTES);
}
lck_mtx_t hfs_mutex; /* protects access to hfsmount data */
void *hfs_freezing_proc; /* who froze the fs */
+ void *hfs_downgrading_proc; /* process who's downgrading to rdonly */
lck_rw_t hfs_insync; /* protects sync/freeze interaction */
/* Resize variables: */
#define HFS_VIRTUAL_DEVICE 0x20000
/* When set, we're in hfs_changefs, so hfs_sync should do nothing. */
#define HFS_IN_CHANGEFS 0x40000
+/* When set, we are in process of downgrading or have downgraded to read-only,
+ * so hfs_start_transaction should return EROFS. */
+#define HFS_RDONLY_DOWNGRADE 0x80000
/* Macro to update next allocation block in the HFS mount structure. If
lck_mtx_unlock(&encodinglst_mutex);
FREE(encp, M_TEMP);
+ record_kext_unload(id);
kmod_destroy((host_priv_t) host_priv_self(), id);
return (0);
}
}
tdcp = VTOC(tdvp);
cp = VTOC(vp);
+
+ /*
+ * Make sure we don't race the src or dst parent directories with rmdir.
+ * Note that we should only have a src parent directory cnode lock
+ * if we're dealing with a directory hardlink here.
+ */
+ if (fdcp) {
+ if (fdcp->c_flag & (C_NOEXISTS | C_DELETED)) {
+ error = ENOENT;
+ goto out;
+ }
+ }
+
+ if (tdcp->c_flag & (C_NOEXISTS | C_DELETED)) {
+ error = ENOENT;
+ goto out;
+ }
+
+ /* Check src for errors: too many links, immutable, race with unlink */
if (cp->c_linkcount >= HFS_LINK_MAX) {
error = EMLINK;
goto out;
vfs_isrdonly(mp)) {
int flags;
+ /* Set flag to indicate that a downgrade to read-only
+ * is in progress and therefore block any further
+ * modifications to the file system.
+ */
+ hfs_global_exclusive_lock_acquire(hfsmp);
+ hfsmp->hfs_flags |= HFS_RDONLY_DOWNGRADE;
+ hfsmp->hfs_downgrading_proc = current_thread();
+ hfs_global_exclusive_lock_release(hfsmp);
+
/* use VFS_SYNC to push out System (btree) files */
retval = VFS_SYNC(mp, MNT_WAIT, context);
- if (retval && ((cmdflags & MNT_FORCE) == 0))
+ if (retval && ((cmdflags & MNT_FORCE) == 0)) {
+ hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
+ hfsmp->hfs_downgrading_proc = NULL;
goto out;
+ }
flags = WRITECLOSE;
if (cmdflags & MNT_FORCE)
flags |= FORCECLOSE;
- if ((retval = hfs_flushfiles(mp, flags, p)))
+ if ((retval = hfs_flushfiles(mp, flags, p))) {
+ hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
+ hfsmp->hfs_downgrading_proc = NULL;
goto out;
+ }
/* mark the volume cleanly unmounted */
hfsmp->vcbAtrb |= kHFSVolumeUnmountedMask;
}
}
if (retval) {
+ hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
+ hfsmp->hfs_downgrading_proc = NULL;
hfsmp->hfs_flags &= ~HFS_READ_ONLY;
goto out;
}
hfs_global_exclusive_lock_release(hfsmp);
}
+
+ hfsmp->hfs_downgrading_proc = NULL;
}
/* Change to a writable file system. */
/* Only clear HFS_READ_ONLY after a successfull write */
hfsmp->hfs_flags &= ~HFS_READ_ONLY;
+ /* If this mount point was downgraded from read-write
+ * to read-only, clear that information as we are now
+ * moving back to read-write.
+ */
+ hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
+ hfsmp->hfs_downgrading_proc = NULL;
+
/* mark the volume dirty (clear clean unmount bit) */
hfsmp->vcbAtrb &= ~kHFSVolumeUnmountedMask;
* block size to be 4k if there are more than 31-bits
* worth of blocks but to insure compatibility with
* pre-Tiger systems we have to do it.
+ *
+ * If the device size is not a multiple of 4K (8 * 512), then
+ * switching the logical block size isn't going to help because
+ * we will be unable to write the alternate volume header.
+ * In this case, just leave the logical block size unchanged.
*/
- if (log_blkcnt > 0x000000007fffffff) {
+ if (log_blkcnt > 0x000000007fffffff && (log_blkcnt & 7) == 0) {
minblksize = log_blksize = 4096;
if (phys_blksize < log_blksize)
phys_blksize = log_blksize;
}
hfsmp->hfs_logical_block_size = log_blksize;
hfsmp->hfs_logical_block_count = log_blkcnt;
+ hfsmp->hfs_physical_block_size = log_blksize;
+ hfsmp->hfs_log_per_phys = 1;
}
if (args) {
hfsmp->hfs_encoding = args->hfs_encoding;
hfsmp->hfs_logical_block_count *=
hfsmp->hfs_logical_block_size / log_blksize;
hfsmp->hfs_logical_block_size = log_blksize;
+
+ /* Update logical/physical block size */
+ hfsmp->hfs_physical_block_size = log_blksize;
+ phys_blksize = log_blksize;
+ hfsmp->hfs_log_per_phys = 1;
}
disksize = (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.blockCount) *
/* Note: relative block count adjustment (in case this is an embedded volume). */
hfsmp->hfs_logical_block_count *= hfsmp->hfs_logical_block_size / log_blksize;
hfsmp->hfs_logical_block_size = log_blksize;
+ hfsmp->hfs_log_per_phys = hfsmp->hfs_physical_block_size / log_blksize;
if (hfsmp->jnl) {
// close and re-open this with the new block size
/* If ioctl is not supported, force physical and logical sector size to be same */
phys_sectorsize = sectorsize;
}
- if (phys_sectorsize != hfsmp->hfs_physical_block_size) {
- return (ENXIO);
- }
oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
/*
/* Now move any files that are in the way. */
for (i = 0; i < filecnt; ++i) {
struct vnode * rvp;
+ struct cnode * cp;
if (hfs_vget(hfsmp, cnidbufp[i], &vp, 0) != 0)
continue;
+ /* Relocating directory hard links is not supported, so we
+ * punt (see radar 6217026). */
+ cp = VTOC(vp);
+ if ((cp->c_flag & C_HARDLINK) && vnode_isdir(vp)) {
+ printf("hfs_reclaimspace: unable to relocate directory hard link %d\n", cp->c_cnid);
+ error = EINVAL;
+ goto out;
+ }
+
/* Relocate any data fork blocks. */
- if (VTOF(vp)->ff_blocks > 0) {
+ if (VTOF(vp) && VTOF(vp)->ff_blocks > 0) {
error = hfs_relocate(vp, hfsmp->hfs_metazone_end + 1, kauth_cred_get(), current_proc());
}
if (error)
break;
/* Relocate any resource fork blocks. */
- if ((VTOC((vp))->c_blocks - VTOF((vp))->ff_blocks) > 0) {
+ if ((cp->c_blocks - (VTOF(vp) ? VTOF((vp))->ff_blocks : 0)) > 0) {
error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE);
if (error)
break;
if (error)
break;
}
- hfs_unlock(VTOC(vp));
+ hfs_unlock(cp);
vnode_put(vp);
vp = NULL;
*/
if (blockSize < hfsmp->hfs_physical_block_size) {
hfsmp->hfs_physical_block_size = hfsmp->hfs_logical_block_size;
+ hfsmp->hfs_log_per_phys = 1;
}
/*
retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
&hfsmp->hfs_extents_vp);
if (retval)
+ {
goto ErrorExit;
+ }
hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
hfs_unlock(hfsmp->hfs_extents_cp);
retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
(KeyCompareProcPtr) CompareExtentKeysPlus));
if (retval)
+ {
goto ErrorExit;
+ }
/*
* Set up Catalog B-tree vnode
*/
unlock_on_err = 1;
}
+ /* If a downgrade to read-only mount is in progress, no other
+ * process than the downgrade process is allowed to modify
+ * the file system.
+ */
+ if ((hfsmp->hfs_flags & HFS_RDONLY_DOWNGRADE) &&
+ (hfsmp->hfs_downgrading_proc != thread)) {
+ ret = EROFS;
+ goto out;
+ }
+
if (hfsmp->jnl) {
ret = journal_start_transaction(hfsmp->jnl);
if (ret == 0) {
ret = 0;
}
+out:
if (ret != 0 && unlock_on_err) {
lck_rw_unlock_shared(&hfsmp->hfs_global_lock);
}
int
-kdbg_control(int *name, __unused u_int namelen, user_addr_t where, size_t *sizep)
+kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep)
{
int ret=0;
size_t size=*sizep;
- unsigned int value = name[1];
+ unsigned int value = 0;
kd_regtype kd_Reg;
kbufinfo_t kd_bufinfo;
pid_t curpid;
struct proc *p, *curproc;
-
+ if (name[0] == KERN_KDGETENTROPY ||
+ name[0] == KERN_KDEFLAGS ||
+ name[0] == KERN_KDDFLAGS ||
+ name[0] == KERN_KDENABLE ||
+ name[0] == KERN_KDSETBUF) {
+
+ if ( namelen < 2 )
+ return(EINVAL);
+ value = name[1];
+ }
+
kdbg_lock_init();
if ( !(kdebug_flags & KDBG_LOCKINIT))
* vp The exec vnode
* scriptl The script MAC label
* execl The executable MAC label
+ * disjointp Pointer to flag to set if old
+ * and returned credentials are
+ * disjoint
*
* Returns: (kauth_cred_t) The updated credential
*
+ * Implicit returns:
+ * *disjointp Set to 1 for disjoint creds
+ *
* IMPORTANT: This function is implemented via kauth_cred_update(), which,
* if it returns a credential other than the one it is passed,
* will have dropped the reference on the passed credential. All
static
kauth_cred_t
kauth_cred_label_update_execve(kauth_cred_t cred, vfs_context_t ctx,
- struct vnode *vp, struct label *scriptl, struct label *execl)
+ struct vnode *vp, struct label *scriptl, struct label *execl,
+ int *disjointp)
{
kauth_cred_t newcred;
struct ucred temp_cred;
mac_cred_label_init(&temp_cred);
mac_cred_label_associate(cred, &temp_cred);
- mac_cred_label_update_execve(ctx, &temp_cred,
- vp, scriptl, execl);
+ *disjointp = mac_cred_label_update_execve(ctx, &temp_cred,
+ vp, scriptl, execl);
newcred = kauth_cred_update(cred, &temp_cred, TRUE);
mac_cred_label_destroy(&temp_cred);
* scriptl The script MAC label
* execl The executable MAC label
*
+ * Returns: 0 Label update did not make credential
+ * disjoint
+ * 1 Label update caused credential to be
+ * disjoint
+ *
* Notes: The credential associated with the process WILL change as a
* result of this call. The caller should not assume the process
* reference to the old credential still exists.
*/
-int kauth_proc_label_update_execve(struct proc *p, vfs_context_t ctx,
+int
+kauth_proc_label_update_execve(struct proc *p, vfs_context_t ctx,
struct vnode *vp, struct label *scriptl, struct label *execl)
{
kauth_cred_t my_cred, my_new_cred;
+ int disjoint = 0;
my_cred = kauth_cred_proc_ref(p);
* passed in. The subsequent compare is safe, because it is
* a pointer compare rather than a contents compare.
*/
- my_new_cred = kauth_cred_label_update_execve(my_cred, ctx, vp, scriptl, execl);
+ my_new_cred = kauth_cred_label_update_execve(my_cred, ctx, vp, scriptl, execl, &disjoint);
if (my_cred != my_new_cred) {
DEBUG_CRED_CHANGE("kauth_proc_label_update_execve_unlocked CH(%d): %p/0x%08x -> %p/0x%08x\n", p->p_pid, my_cred, my_cred->cr_flags, my_new_cred, my_new_cred->cr_flags);
/* Drop old proc reference or our extra reference */
kauth_cred_unref(&my_cred);
- return (0);
+ return (disjoint);
}
#if 1
kauth_cred_t cred = vfs_context_ucred(imgp->ip_vfs_context);
proc_t p = vfs_context_proc(imgp->ip_vfs_context);
int i;
- int is_member = 0;
+ int leave_sugid_clear = 0;
int error = 0;
struct vnode *dev_null = NULLVP;
-#if CONFIG_MACF
- kauth_cred_t my_cred;
-#endif
-
#if CONFIG_MACF
int mac_transition;
- mac_transition = mac_cred_check_label_update_execve(imgp->ip_vfs_context, imgp->ip_vp,
- imgp->ip_scriptlabelp, imgp->ip_execlabelp, p);
+
+ /*
+ * Determine whether a call to update the MAC label will result in the
+ * credential changing.
+ *
+ * Note: MAC policies which do not actually end up modifying
+ * the label subsequently are strongly encouraged to
+ * return 0 for this check, since a non-zero answer will
+ * slow down the exec fast path for normal binaries.
+ */
+ mac_transition = mac_cred_check_label_update_execve(
+ imgp->ip_vfs_context,
+ imgp->ip_vp,
+ imgp->ip_scriptlabelp,
+ imgp->ip_execlabelp, p);
#endif
OSBitAndAtomic(~((uint32_t)P_SUGID), (UInt32 *)&p->p_flag);
/*
* Order of the following is important; group checks must go last,
- * as we use the success of the 'is_member' check combined with the
+ * as we use the success of the 'ismember' check combined with the
* failure of the explicit match to indicate that we will be setting
* the egid of the process even though the new process did not
* require VSUID/VSGID bits in order for it to set the new group as
*/
if (((imgp->ip_origvattr->va_mode & VSUID) != 0 &&
kauth_cred_getuid(cred) != imgp->ip_origvattr->va_uid) ||
-#if CONFIG_MACF
- mac_transition || /* A policy wants to transition */
-#endif
((imgp->ip_origvattr->va_mode & VSGID) != 0 &&
- ((kauth_cred_ismember_gid(cred, imgp->ip_origvattr->va_gid, &is_member) || !is_member) ||
+ ((kauth_cred_ismember_gid(cred, imgp->ip_origvattr->va_gid, &leave_sugid_clear) || !leave_sugid_clear) ||
(cred->cr_gid != imgp->ip_origvattr->va_gid)))) {
+#if CONFIG_MACF
+/* label for MAC transition and neither VSUID nor VSGID */
+handle_mac_transition:
+#endif
+
/*
* Replace the credential with a copy of itself if euid or
* egid change.
#if CONFIG_MACF
/*
- * XXXMAC: In FreeBSD, we set P_SUGID on a MAC transition
- * to protect against debuggers being attached by an
- * insufficiently privileged process onto the result of
- * a transition to a more privileged credential. This is
- * too conservative on FreeBSD, but we need to do
- * something similar here, or risk vulnerability.
- *
- * Before we make the call into the MAC policies, get a new
+ * If a policy has indicated that it will transition the label,
+ * before making the call into the MAC policies, get a new
* duplicate credential, so they can modify it without
* modifying any others sharing it.
*/
- if (mac_transition && !imgp->ip_no_trans) {
- kauth_proc_label_update_execve(p,
- imgp->ip_vfs_context,
- imgp->ip_vp,
- imgp->ip_scriptlabelp, imgp->ip_execlabelp);
+ if (mac_transition) {
+ kauth_cred_t my_cred;
+ if (kauth_proc_label_update_execve(p,
+ imgp->ip_vfs_context,
+ imgp->ip_vp,
+ imgp->ip_scriptlabelp,
+ imgp->ip_execlabelp)) {
+ /*
+ * If updating the MAC label resulted in a
+ * disjoint credential, flag that we need to
+ * set the P_SUGID bit. This protects
+ * against debuggers being attached by an
+ * insufficiently privileged process onto the
+ * result of a transition to a more privileged
+ * credential.
+ */
+ leave_sugid_clear = 0;
+ }
my_cred = kauth_cred_proc_ref(p);
mac_task_label_update_cred(my_cred, p->task);
kauth_cred_unref(&my_cred);
}
-#endif
+#endif /* CONFIG_MACF */
+
/*
* Have mach reset the task and thread ports.
* We don't want anyone who had the ports before
}
/*
- * If 'is_member' is non-zero, then we passed the VSUID and
- * MACF checks, and successfully determined that the previous
- * cred was a member of the VSGID group, but that it was not
- * the default at the time of the execve. So we don't set the
- * P_SUGID on the basis of simply running this code.
+ * If 'leave_sugid_clear' is non-zero, then we passed the
+ * VSUID and MACF checks, and successfully determined that
+ * the previous cred was a member of the VSGID group, but
+ * that it was not the default at the time of the execve,
+ * and that the post-labelling credential was not disjoint.
+ * So we don't set the P_SUGID on the basis of simply
+ * running this code.
*/
- if (!is_member)
+ if (!leave_sugid_clear)
OSBitOrAtomic(P_SUGID, (UInt32 *)&p->p_flag);
/* Cache the vnode for /dev/null the first time around */
dev_null = NULLVP;
}
}
+#if CONFIG_MACF
+ else {
+ /*
+ * We are here because we were told that the MAC label will
+ * be transitioned, and the binary is not VSUID or VSGID; to
+ * deal with this case, we could either duplicate a lot of
+ * code, or we can indicate we want to default the P_SUGID
+ * bit clear and jump back up.
+ */
+ if (mac_transition) {
+ leave_sugid_clear = 1;
+ goto handle_mac_transition;
+ }
+ }
+#endif /* CONFIG_MACF */
/*
* Implement the semantic where the effective user and group become
static int lf_getlock(struct lockf *, struct flock *);
static int lf_setlock(struct lockf *);
static int lf_split(struct lockf *, struct lockf *);
-static void lf_wakelock(struct lockf *);
+static void lf_wakelock(struct lockf *, boolean_t);
+
+
+/*
+ * in order to mitigate risk
+ * don't switch to new wake-one method unless
+ * we have at least this many waiters to wake up
+ */
+#define SAFE_WAITER_LIMIT 20
/*
lock->lf_type = fl->l_type;
lock->lf_head = head;
lock->lf_next = (struct lockf *)0;
+ lock->lf_waiters = 0;
TAILQ_INIT(&lock->lf_blkhd);
lock->lf_flags = ap->a_flags;
+ if (ap->a_flags & F_FLOCK)
+ lock->lf_flags |= F_WAKE1_SAFE;
+
lck_mtx_lock(&vp->v_lock); /* protect the lockf list */
/*
* Do the requested operation.
*/
lock->lf_next = block;
TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block);
+ block->lf_waiters++;
+
+ if ( !(lock->lf_flags & F_FLOCK))
+ block->lf_flags &= ~F_WAKE1_SAFE;
+
#ifdef LOCKF_DEBUGGING
if (lockf_debug & 1) {
lf_print("lf_setlock: blocking on", block);
}
#endif /* LOCKF_DEBUGGING */
error = msleep(lock, &vp->v_lock, priority, lockstr, 0);
+
+ if (!TAILQ_EMPTY(&lock->lf_blkhd)) {
+ struct lockf *tlock;
+
+ if ((block = lf_getblock(lock))) {
+ TAILQ_FOREACH(tlock, &lock->lf_blkhd, lf_block) {
+ tlock->lf_next = block;
+ }
+ TAILQ_CONCAT(&block->lf_blkhd, &lock->lf_blkhd, lf_block);
+
+ block->lf_waiters += lock->lf_waiters;
+ lock->lf_waiters = 0;
+ }
+ }
if (error) { /* XXX */
/*
* We may have been awakened by a signal and/or by a
*/
if (lock->lf_next) {
TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block);
+ lock->lf_next->lf_waiters--;
lock->lf_next = NOLOCKF;
}
+ if (!TAILQ_EMPTY(&lock->lf_blkhd))
+ lf_wakelock(lock, TRUE);
+
FREE(lock, M_LOCKF);
return (error);
} /* XXX */
*/
if (lock->lf_type == F_RDLCK &&
overlap->lf_type == F_WRLCK)
- lf_wakelock(overlap);
+ lf_wakelock(overlap, TRUE);
overlap->lf_type = lock->lf_type;
FREE(lock, M_LOCKF);
lock = overlap; /* for lf_coelesce_adjacent() */
return (ENOLCK);
}
}
- lf_wakelock(overlap);
+ lf_wakelock(overlap, TRUE);
break;
case OVERLAP_CONTAINED_BY_LOCK:
*/
if (lock->lf_type == F_RDLCK &&
overlap->lf_type == F_WRLCK) {
- lf_wakelock(overlap);
+ lf_wakelock(overlap, TRUE);
} else {
while (!TAILQ_EMPTY(&overlap->lf_blkhd)) {
ltmp = TAILQ_FIRST(&overlap->lf_blkhd);
TAILQ_REMOVE(&overlap->lf_blkhd, ltmp,
lf_block);
+ overlap->lf_waiters--;
+
TAILQ_INSERT_TAIL(&lock->lf_blkhd,
ltmp, lf_block);
+ lock->lf_waiters++;
+
ltmp->lf_next = lock;
}
}
overlap->lf_next = lock;
overlap->lf_end = lock->lf_start - 1;
prev = &lock->lf_next;
- lf_wakelock(overlap);
+ lf_wakelock(overlap, TRUE);
needtolink = 0;
continue;
lock->lf_next = overlap;
}
overlap->lf_start = lock->lf_end + 1;
- lf_wakelock(overlap);
+ lf_wakelock(overlap, TRUE);
break;
}
break;
/*
* Wakeup the list of locks to be retried.
*/
- lf_wakelock(overlap);
+ lf_wakelock(overlap, FALSE);
switch (ovcase) {
case OVERLAP_NONE: /* satisfy compiler enum/switch */
* in a real-world performance problem.
*/
static void
-lf_wakelock(struct lockf *listhead)
+lf_wakelock(struct lockf *listhead, boolean_t force_all)
{
struct lockf *wakelock;
+ boolean_t wake_all = TRUE;
+
+ if (force_all == FALSE && (listhead->lf_flags & F_WAKE1_SAFE) && listhead->lf_waiters > SAFE_WAITER_LIMIT)
+ wake_all = FALSE;
while (!TAILQ_EMPTY(&listhead->lf_blkhd)) {
wakelock = TAILQ_FIRST(&listhead->lf_blkhd);
TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block);
+ listhead->lf_waiters--;
+
wakelock->lf_next = NOLOCKF;
#ifdef LOCKF_DEBUGGING
if (lockf_debug & 2)
lf_print("lf_wakelock: awakening", wakelock);
#endif /* LOCKF_DEBUGGING */
+ if (wake_all == FALSE) {
+
+ TAILQ_CONCAT(&wakelock->lf_blkhd, &listhead->lf_blkhd, lf_block);
+ wakelock->lf_waiters = listhead->lf_waiters;
+ listhead->lf_waiters = 0;
+
+ if (!TAILQ_EMPTY(&wakelock->lf_blkhd)) {
+ struct lockf *tlock;
+
+ TAILQ_FOREACH(tlock, &wakelock->lf_blkhd, lf_block) {
+ tlock->lf_next = wakelock;
+ }
+ }
+ }
wakeup(wakelock);
+
+ if (wake_all == FALSE)
+ break;
}
}
# warning we do not support this platform yet
#endif /* __ppc__ */
-
}
-
{
int ret=0;
+ if (namelen == 0)
+ return(ENOTSUP);
+
ret = suser(kauth_cred_get(), &p->p_acflag);
if (ret)
return(ret);
}
static int
-sysctl_procargsx(int *name, __unused u_int namelen, user_addr_t where,
+sysctl_procargsx(int *name, u_int namelen, user_addr_t where,
size_t *sizep, proc_t cur_proc, int argc_yes)
{
proc_t p;
kauth_cred_t my_cred;
uid_t uid;
+ if ( namelen < 1 )
+ return(EINVAL);
+
if (argc_yes)
buflen -= sizeof(int); /* reserve first word to return argc */
#include <kern/locks.h>
#include <net/kext_net.h>
+#include <libkern/libkern.h>
+
#include <string.h>
static struct socket_filter_list sock_filter_head;
lck_mtx_unlock(sock_filter_lock);
return;
}
- }
- else {
+ } else {
/*
* Clear the removing flag. We will perform the detach here or
* request a delayed detach. Since we do an extra ref release
if (entry->sfe_socket->so_filteruse != 0) {
entry->sfe_flags |= SFEF_DETACHUSEZERO;
lck_mtx_unlock(sock_filter_lock);
+
+ if (unregistering) {
+#if DEBUG
+ printf("sflt_detach_private unregistering SFEF_DETACHUSEZERO "
+ "so%p so_filteruse %u so_usecount %d\n",
+ entry->sfe_socket, entry->sfe_socket->so_filteruse,
+ entry->sfe_socket->so_usecount);
+#endif
+ socket_unlock(entry->sfe_socket, 0);
+ }
+
return;
- }
- else {
+ } else {
/*
* Check if we are removing the last attached filter and
* the parent filter is being unregistered.
(struct encryption_info_command *) lcp,
addr, map, vp);
if (ret != LOAD_SUCCESS) {
- printf("proc %d: set unprotect error %d "
+ printf("proc %d: set_code_unprotect() error %d "
"for file \"%s\"\n",
p->p_pid, ret, vp->v_name);
- ret = LOAD_SUCCESS; /* ignore error */
+ /* Don't let the app run if it's
+ * encrypted but we failed to set up the
+ * decrypter */
+ psignal(p, SIGKILL);
}
break;
#endif
cryptname="com.apple.null";
break;
default:
- return LOAD_FAILURE;
+ return LOAD_BADMACHO;
}
len = MAXPATHLEN;
kr=text_crypter_create(&crypt_info, cryptname, (void*)vpath);
if(kr) {
- printf("set_code_unprotect: unable to find decrypter %s, kr=%d\n",
+ printf("set_code_unprotect: unable to create decrypter %s, kr=%d\n",
cryptname, kr);
- return LOAD_FAILURE;
+ return LOAD_RESOURCE;
}
/* this is terrible, but we have to rescan the load commands to find the
}
/* if we get here, did not find anything */
- return LOAD_FAILURE;
+ return LOAD_BADMACHO;
remap_now:
/* now remap using the decrypter */
kr = vm_map_apple_protected(map, map_offset, map_offset+map_size, &crypt_info);
- if(kr) printf("set_code_unprotect(): mapping failed with %x\n", kr);
+ if(kr) {
+ printf("set_code_unprotect(): mapping failed with %x\n", kr);
+ crypt_info.crypt_end(crypt_info.crypt_ops);
+ return LOAD_PROTECT;
+ }
return LOAD_SUCCESS;
}
KERNEL_DEBUG(0xefffd008 | DBG_FUNC_NONE, (int)item, 0, 0, 0, 0);
+ if ((prio < 0) || (prio >= 5))
+ return (EINVAL);
+
workqueue_lock_spin(p);
if ((wq = (struct workqueue *)p->p_wqptr) == NULL) {
break;
case WQOPS_QUEUE_REMOVE: {
+ if ((prio < 0) || (prio >= 5))
+ return (EINVAL);
+
workqueue_lock_spin(p);
if ((wq = (struct workqueue *)p->p_wqptr) == NULL) {
* whenever a new piece of memory mapped in from the VM crosses the 1MB
* boundary.
*/
-#define MBSHIFT 20 /* 1MB */
#define NSLABSPMB ((1 << MBSHIFT) >> MCLSHIFT) /* 512 slabs/grp */
typedef struct mcl_slabg {
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
/*XXX*/
#include <netinet/in.h>
#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
#if INET6
#include <netinet6/in6_var.h>
#include <netinet6/in6_ifattach.h>
static int if_cloners_count;
LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
+static struct ifaddr *ifa_ifwithnet_common(const struct sockaddr *,
+ unsigned int);
+
#if INET6
/*
* XXX: declare here to avoid to include many inet6 related files..
return result;
}
+/*
+ * Locate the source address of an interface based on a complete address.
+ */
+struct ifaddr *
+ifa_ifwithaddr_scoped(const struct sockaddr *addr, unsigned int ifscope)
+{
+ struct ifaddr *result = NULL;
+ struct ifnet *ifp;
+
+ if (ifscope == IFSCOPE_NONE)
+ return (ifa_ifwithaddr(addr));
+
+ ifnet_head_lock_shared();
+ if (ifscope > (unsigned int)if_index) {
+ ifnet_head_done();
+ return (NULL);
+ }
+
+ ifp = ifindex2ifnet[ifscope];
+ if (ifp != NULL) {
+ struct ifaddr *ifa = NULL;
+
+ /*
+ * This is suboptimal; there should be a better way
+ * to search for a given address of an interface.
+ */
+ ifnet_lock_shared(ifp);
+ for (ifa = ifp->if_addrhead.tqh_first; ifa != NULL;
+ ifa = ifa->ifa_link.tqe_next) {
+ if (ifa->ifa_addr->sa_family != addr->sa_family)
+ continue;
+ if (equal(addr, ifa->ifa_addr)) {
+ result = ifa;
+ break;
+ }
+ if ((ifp->if_flags & IFF_BROADCAST) &&
+ ifa->ifa_broadaddr != NULL &&
+ /* IP6 doesn't have broadcast */
+ ifa->ifa_broadaddr->sa_len != 0 &&
+ equal(ifa->ifa_broadaddr, addr)) {
+ result = ifa;
+ break;
+ }
+ }
+ if (result != NULL)
+ ifaref(result);
+ ifnet_lock_done(ifp);
+ }
+ ifnet_head_done();
+
+ return (result);
+}
+
+struct ifaddr *
+ifa_ifwithnet(const struct sockaddr *addr)
+{
+ return (ifa_ifwithnet_common(addr, IFSCOPE_NONE));
+}
+
+struct ifaddr *
+ifa_ifwithnet_scoped(const struct sockaddr *addr, unsigned int ifscope)
+{
+ return (ifa_ifwithnet_common(addr, ifscope));
+}
+
/*
* Find an interface on a specific network. If many, choice
* is most specific found.
*/
-struct ifaddr *
-ifa_ifwithnet(
- const struct sockaddr *addr)
+static struct ifaddr *
+ifa_ifwithnet_common(const struct sockaddr *addr, unsigned int ifscope)
{
struct ifnet *ifp;
struct ifaddr *ifa = NULL;
u_int af = addr->sa_family;
const char *addr_data = addr->sa_data, *cplim;
+ if (!ip_doscopedroute || addr->sa_family != AF_INET)
+ ifscope = IFSCOPE_NONE;
+
ifnet_head_lock_shared();
/*
* AF_LINK addresses can be looked up directly by their index number,
} else
#endif /* __APPLE__*/
{
+ /*
+ * If we're looking up with a scope,
+ * find using a matching interface.
+ */
+ if (ifscope != IFSCOPE_NONE &&
+ ifp->if_index != ifscope)
+ continue;
+
/*
* if we have a special address handler,
* then use it instead of the generic one.
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
void ifma_release(struct ifmultiaddr *ifma);
struct ifaddr *ifa_ifwithaddr(const struct sockaddr *);
+struct ifaddr *ifa_ifwithaddr_scoped(const struct sockaddr *, unsigned int);
struct ifaddr *ifa_ifwithdstaddr(const struct sockaddr *);
struct ifaddr *ifa_ifwithnet(const struct sockaddr *);
+struct ifaddr *ifa_ifwithnet_scoped(const struct sockaddr *, unsigned int);
struct ifaddr *ifa_ifwithroute(int, const struct sockaddr *, const struct sockaddr *);
struct ifaddr *ifa_ifwithroute_locked(int, const struct sockaddr *, const struct sockaddr *);
+struct ifaddr *ifa_ifwithroute_scoped_locked(int, const struct sockaddr *,
+ const struct sockaddr *, unsigned int);
struct ifaddr *ifaof_ifpforaddr(const struct sockaddr *, struct ifnet *);
struct ifaddr *ifa_ifpgetprimary(struct ifnet *, int);
void ifafree(struct ifaddr *);
/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
static struct radix_mask *
rn_new_radix_mask(struct radix_node *tt,
struct radix_mask *next);
-static int rn_satsifies_leaf(char *trial, struct radix_node *leaf,
- int skip);
+static int rn_satisfies_leaf(char *trial, struct radix_node *leaf, int skip,
+ rn_matchf_t *f, void *w);
+
+#define RN_MATCHF(rn, f, arg) (f == NULL || (*f)((rn), arg))
/*
* The data structure for the keys is a radix tree with one way
struct radix_node *
rn_lookup(void *v_arg, void *m_arg, struct radix_node_head *head)
+{
+ return (rn_lookup_args(v_arg, m_arg, head, NULL, NULL));
+}
+
+struct radix_node *
+rn_lookup_args(void *v_arg, void *m_arg, struct radix_node_head *head,
+ rn_matchf_t *f, void *w)
{
struct radix_node *x;
caddr_t netmask = NULL;
return (NULL);
netmask = x->rn_key;
}
- x = rn_match(v_arg, head);
+ x = rn_match_args(v_arg, head, f, w);
if (x && netmask) {
while (x && x->rn_mask != netmask)
x = x->rn_dupedkey;
return x;
}
+/*
+ * Returns true if address 'trial' has no bits differing from the
+ * leaf's key when compared under the leaf's mask. In other words,
+ * returns true when 'trial' matches leaf. If a leaf-matching
+ * routine is passed in, it is also used to find a match on the
+ * conditions defined by the caller of rn_match.
+ */
static int
-rn_satsifies_leaf(char *trial, struct radix_node *leaf, int skip)
+rn_satisfies_leaf(char *trial, struct radix_node *leaf, int skip,
+ rn_matchf_t *f, void *w)
{
char *cp = trial, *cp2 = leaf->rn_key, *cp3 = leaf->rn_mask;
char *cplim;
for (cp += skip; cp < cplim; cp++, cp2++, cp3++)
if ((*cp ^ *cp2) & *cp3)
return 0;
- return 1;
+
+ return (RN_MATCHF(leaf, f, w));
}
struct radix_node *
rn_match(void *v_arg, struct radix_node_head *head)
+{
+ return (rn_match_args(v_arg, head, NULL, NULL));
+}
+
+struct radix_node *
+rn_match_args(void *v_arg, struct radix_node_head *head,
+ rn_matchf_t *f, void *w)
{
caddr_t v = v_arg;
struct radix_node *t = head->rnh_treetop, *x;
*/
if (t->rn_flags & RNF_ROOT)
t = t->rn_dupedkey;
- return t;
+ if (t == NULL || RN_MATCHF(t, f, w)) {
+ return (t);
+ } else {
+ /*
+ * Although we found an exact match on the key,
+ * f() is looking for some other criteria as well.
+ * Continue looking as if the exact match failed.
+ */
+ if (t->rn_parent->rn_flags & RNF_ROOT) {
+ /* Hit the top; have to give up */
+ return (NULL);
+ }
+ b = 0;
+ goto keeplooking;
+ }
on1:
test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */
for (b = 7; (test >>= 1) > 0;)
b--;
+keeplooking:
matched_off = cp - v;
b += matched_off << 3;
rn_bit = -1 - b;
*/
if ((saved_t = t)->rn_mask == 0)
t = t->rn_dupedkey;
- for (; t; t = t->rn_dupedkey)
+ for (; t; t = t->rn_dupedkey) {
/*
* Even if we don't match exactly as a host,
* we may match if the leaf we wound up at is
* a route to a net.
*/
if (t->rn_flags & RNF_NORMAL) {
- if (rn_bit <= t->rn_bit)
- return t;
- } else if (rn_satsifies_leaf(v, t, matched_off))
- return t;
+ if ((rn_bit <= t->rn_bit) && RN_MATCHF(t, f, w))
+ return (t);
+ } else if (rn_satisfies_leaf(v, t, matched_off, f, w)) {
+ return (t);
+ }
+ }
t = saved_t;
/* start searching up the tree */
do {
*/
while (m) {
if (m->rm_flags & RNF_NORMAL) {
- if (rn_bit <= m->rm_bit)
+ if ((rn_bit <= m->rm_bit) &&
+ RN_MATCHF(m->rm_leaf, f, w))
return (m->rm_leaf);
} else {
off = min(t->rn_offset, matched_off);
x = rn_search_m(v, t, m->rm_mask);
while (x && x->rn_mask != m->rm_mask)
x = x->rn_dupedkey;
- if (x && rn_satsifies_leaf(v, x, off))
- return x;
+ if (x && rn_satisfies_leaf(v, x, off, f, w))
+ return (x);
}
m = m->rm_mklist;
}
} while (t != top);
- return NULL;
+ return (NULL);
}
#ifdef RN_DEBUG
rnh->rnh_addaddr = rn_addroute;
rnh->rnh_deladdr = rn_delete;
rnh->rnh_matchaddr = rn_match;
+ rnh->rnh_matchaddr_args = rn_match_args;
rnh->rnh_lookup = rn_lookup;
+ rnh->rnh_lookup_args = rn_lookup_args;
rnh->rnh_walktree = rn_walktree;
rnh->rnh_walktree_from = rn_walktree_from;
rnh->rnh_treetop = t;
/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#define MKFree(m) { (m)->rm_mklist = rn_mkfreelist; rn_mkfreelist = (m);}
typedef int walktree_f_t(struct radix_node *, void *);
+typedef int rn_matchf_t(struct radix_node *, void *);
struct radix_node_head {
struct radix_node *rnh_treetop;
(void *v, void *mask, struct radix_node_head *head);
struct radix_node *(*rnh_matchaddr) /* locate based on sockaddr */
(void *v, struct radix_node_head *head);
+ /* locate based on sockaddr and rn_matchf_t() */
+ struct radix_node *(*rnh_matchaddr_args)
+ (void *v, struct radix_node_head *head,
+ rn_matchf_t *f, void *w);
struct radix_node *(*rnh_lookup) /* locate based on sockaddr */
(void *v, void *mask, struct radix_node_head *head);
+ /* locate based on sockaddr, mask and rn_matchf_t() */
+ struct radix_node *(*rnh_lookup_args)
+ (void *v, void *mask, struct radix_node_head *head,
+ rn_matchf_t *f, void *);
struct radix_node *(*rnh_matchpkt) /* locate based on packet hdr */
(void *v, struct radix_node_head *head);
int (*rnh_walktree) /* traverse tree */
struct radix_node [2]),
*rn_delete(void *, void *, struct radix_node_head *),
*rn_lookup(void *v_arg, void *m_arg, struct radix_node_head *head),
- *rn_match(void *, struct radix_node_head *);
+ *rn_lookup_args(void *v_arg, void *m_arg, struct radix_node_head *head,
+ rn_matchf_t *, void *),
+ *rn_match(void *, struct radix_node_head *),
+ *rn_match_args(void *, struct radix_node_head *, rn_matchf_t *, void *);
#endif /* PRIVATE */
#endif /* _RADIX_H_ */
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <net/route.h>
#include <netinet/in.h>
+#include <netinet/in_var.h>
#include <netinet/ip_mroute.h>
+#include <netinet/ip_var.h>
#include <net/if_dl.h>
static void rtable_init(void **);
static inline void rtref_audit(struct rtentry_dbg *);
static inline void rtunref_audit(struct rtentry_dbg *);
+static struct rtentry *rtalloc1_common_locked(struct sockaddr *, int, u_long,
+ unsigned int);
+static int rtrequest_common_locked(int, struct sockaddr *,
+ struct sockaddr *, struct sockaddr *, int, struct rtentry **,
+ unsigned int);
+static void rtalloc_ign_common_locked(struct route *, u_long, unsigned int);
+static inline void sa_set_ifscope(struct sockaddr *, unsigned int);
+static struct sockaddr *sin_copy(struct sockaddr_in *, struct sockaddr_in *,
+ unsigned int);
+static struct sockaddr *mask_copy(struct sockaddr *, struct sockaddr_in *,
+ unsigned int);
+static struct radix_node *node_lookup(struct sockaddr *, struct sockaddr *,
+ unsigned int);
+static struct radix_node *node_lookup_default(void);
+static int rn_match_ifscope(struct radix_node *, void *);
+static struct ifaddr *ifa_ifwithroute_common_locked(int,
+ const struct sockaddr *, const struct sockaddr *, unsigned int);
__private_extern__ u_long route_generation = 0;
extern int use_routegenid;
+/*
+ * sockaddr_in with embedded interface scope; this is used internally
+ * to keep track of scoped route entries in the routing table. The
+ * fact that such a scope is embedded in the structure is an artifact
+ * of the current implementation which could change in future.
+ */
+struct sockaddr_inifscope {
+ __uint8_t sin_len;
+ sa_family_t sin_family;
+ in_port_t sin_port;
+ struct in_addr sin_addr;
+ /*
+ * To avoid possible conflict with an overlaid sockaddr_inarp
+ * having sin_other set to SIN_PROXY, we use the first 4-bytes
+ * of sin_zero since sin_srcaddr is one of the unused fields
+ * in sockaddr_inarp.
+ */
+ union {
+ char sin_zero[8];
+ struct {
+ __uint32_t ifscope;
+ } _in_index;
+ } un;
+#define sin_ifscope un._in_index.ifscope
+};
+
+#define SIN(sa) ((struct sockaddr_in *)(size_t)(sa))
+#define SINIFSCOPE(sa) ((struct sockaddr_inifscope *)(size_t)(sa))
+
+#define ASSERT_SINIFSCOPE(sa) { \
+ if ((sa)->sa_family != AF_INET || \
+ (sa)->sa_len < sizeof (struct sockaddr_in)) \
+ panic("%s: bad sockaddr_in %p\n", __func__, sa); \
+}
+
+/*
+ * Argument to leaf-matching routine; at present it is scoped routing
+ * specific but can be expanded in future to include other search filters.
+ */
+struct matchleaf_arg {
+ unsigned int ifscope; /* interface scope */
+};
+
+/*
+ * For looking up the non-scoped default route (sockaddr instead
+ * of sockaddr_in for convenience).
+ */
+static struct sockaddr sin_def = {
+ sizeof (struct sockaddr_in), AF_INET, { 0, }
+};
+
+/*
+ * Interface index (scope) of the primary interface; determined at
+ * the time when the default, non-scoped route gets added, changed
+ * or deleted. Protected by rt_mtx.
+ */
+static unsigned int primary_ifscope = IFSCOPE_NONE;
+
+#define INET_DEFAULT(dst) \
+ ((dst)->sa_family == AF_INET && SIN(dst)->sin_addr.s_addr == 0)
+
+#define RT(r) ((struct rtentry *)r)
+#define RT_HOST(r) (RT(r)->rt_flags & RTF_HOST)
+
+/*
+ * Given a route, determine whether or not it is the non-scoped default
+ * route; dst typically comes from rt_key(rt) but may be coming from
+ * a separate place when rt is in the process of being created.
+ */
+boolean_t
+rt_inet_default(struct rtentry *rt, struct sockaddr *dst)
+{
+ return (INET_DEFAULT(dst) && !(rt->rt_flags & RTF_IFSCOPE));
+}
+
+/*
+ * Set the ifscope of the primary interface; caller holds rt_mtx.
+ */
+void
+set_primary_ifscope(unsigned int ifscope)
+{
+ primary_ifscope = ifscope;
+}
+
+/*
+ * Return the ifscope of the primary interface; caller holds rt_mtx.
+ */
+unsigned int
+get_primary_ifscope(void)
+{
+ return (primary_ifscope);
+}
+
+/*
+ * Embed ifscope into a given a sockaddr_in.
+ */
+static inline void
+sa_set_ifscope(struct sockaddr *sa, unsigned int ifscope)
+{
+ /* Caller must pass in sockaddr_in */
+ ASSERT_SINIFSCOPE(sa);
+
+ SINIFSCOPE(sa)->sin_ifscope = ifscope;
+}
+
+/*
+ * Given a sockaddr_in, return the embedded ifscope to the caller.
+ */
+unsigned int
+sa_get_ifscope(struct sockaddr *sa)
+{
+ /* Caller must pass in sockaddr_in */
+ ASSERT_SINIFSCOPE(sa);
+
+ return (SINIFSCOPE(sa)->sin_ifscope);
+}
+
+/*
+ * Copy a sockaddr_in src to dst and embed ifscope into dst.
+ */
+static struct sockaddr *
+sin_copy(struct sockaddr_in *src, struct sockaddr_in *dst, unsigned int ifscope)
+{
+ *dst = *src;
+ sa_set_ifscope(SA(dst), ifscope);
+
+ return (SA(dst));
+}
+
+/*
+ * Copy a mask from src to a sockaddr_in dst and embed ifscope into dst.
+ */
+static struct sockaddr *
+mask_copy(struct sockaddr *src, struct sockaddr_in *dst, unsigned int ifscope)
+{
+ /* We know dst is at least the size of sockaddr{_in} */
+ bzero(dst, sizeof (*dst));
+ rt_maskedcopy(src, SA(dst), src);
+
+ /*
+ * The length of the mask sockaddr would need to be adjusted
+ * to cover the additional sin_ifscope field; when ifscope is
+ * IFSCOPE_NONE, we'd end up clearing the embedded ifscope on
+ * the destination mask in addition to extending the length
+ * of the sockaddr, as a side effect. This is okay, as any
+ * trailing zeroes would be skipped by rn_addmask prior to
+ * inserting or looking up the mask in the mask tree.
+ */
+ SINIFSCOPE(dst)->sin_ifscope = ifscope;
+ SINIFSCOPE(dst)->sin_len =
+ offsetof(struct sockaddr_inifscope, sin_ifscope) +
+ sizeof (SINIFSCOPE(dst)->sin_ifscope);
+
+ return (SA(dst));
+}
+
+/*
+ * Callback leaf-matching routine for rn_matchaddr_args used
+ * for looking up an exact match for a scoped route entry.
+ */
+static int
+rn_match_ifscope(struct radix_node *rn, void *arg)
+{
+ struct rtentry *rt = (struct rtentry *)rn;
+ struct matchleaf_arg *ma = arg;
+
+ if (!(rt->rt_flags & RTF_IFSCOPE) || rt_key(rt)->sa_family != AF_INET)
+ return (0);
+
+ return (SINIFSCOPE(rt_key(rt))->sin_ifscope == ma->ifscope);
+}
static void
rtable_init(void **table)
void
rtalloc_ign_locked(struct route *ro, u_long ignore)
+{
+ return (rtalloc_ign_common_locked(ro, ignore, IFSCOPE_NONE));
+}
+
+void
+rtalloc_scoped_ign_locked(struct route *ro, u_long ignore, unsigned int ifscope)
+{
+ return (rtalloc_ign_common_locked(ro, ignore, ifscope));
+}
+
+static void
+rtalloc_ign_common_locked(struct route *ro, u_long ignore,
+ unsigned int ifscope)
{
struct rtentry *rt;
if ((rt = ro->ro_rt) != NULL) {
if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP)
return;
- /* XXX - We are probably always at splnet here already. */
rtfree_locked(rt);
ro->ro_rt = NULL;
}
- ro->ro_rt = rtalloc1_locked(&ro->ro_dst, 1, ignore);
+ ro->ro_rt = rtalloc1_common_locked(&ro->ro_dst, 1, ignore, ifscope);
if (ro->ro_rt)
ro->ro_rt->generation_id = route_generation;
}
lck_mtx_unlock(rt_mtx);
}
+struct rtentry *
+rtalloc1_locked(struct sockaddr *dst, int report, u_long ignflags)
+{
+ return (rtalloc1_common_locked(dst, report, ignflags, IFSCOPE_NONE));
+}
+
+struct rtentry *
+rtalloc1_scoped_locked(struct sockaddr *dst, int report, u_long ignflags,
+ unsigned int ifscope)
+{
+ return (rtalloc1_common_locked(dst, report, ignflags, ifscope));
+}
+
/*
* Look up the route that matches the address given
* Or, at least try.. Create a cloned route if needed.
*/
-struct rtentry *
-rtalloc1_locked(struct sockaddr *dst, int report, u_long ignflags)
+static struct rtentry *
+rtalloc1_common_locked(struct sockaddr *dst, int report, u_long ignflags,
+ unsigned int ifscope)
{
struct radix_node_head *rnh = rt_tables[dst->sa_family];
- struct rtentry *rt;
- struct radix_node *rn;
- struct rtentry *newrt = 0;
+ struct rtentry *rt, *newrt = NULL;
struct rt_addrinfo info;
u_long nflags;
int err = 0, msgtype = RTM_MISS;
+
+ if (rnh == NULL)
+ goto unreachable;
+
/*
- * Look up the address in the table for that Address Family
+ * Find the longest prefix or exact (in the scoped case) address match;
+ * callee adds a reference to entry and checks for root node as well
*/
- if (rnh && (rn = rnh->rnh_matchaddr((caddr_t)dst, rnh)) &&
- ((rn->rn_flags & RNF_ROOT) == 0)) {
+ rt = rt_lookup(FALSE, dst, NULL, rnh, ifscope);
+ if (rt == NULL)
+ goto unreachable;
+
+ newrt = rt;
+ nflags = rt->rt_flags & ~ignflags;
+ if (report && (nflags & (RTF_CLONING | RTF_PRCLONING))) {
/*
- * If we find it and it's not the root node, then
- * get a refernce on the rtentry associated.
+ * We are apparently adding (report = 0 in delete).
+ * If it requires that it be cloned, do so.
+ * (This implies it wasn't a HOST route.)
*/
- newrt = rt = (struct rtentry *)rn;
- nflags = rt->rt_flags & ~ignflags;
- if (report && (nflags & (RTF_CLONING | RTF_PRCLONING))) {
+ err = rtrequest_locked(RTM_RESOLVE, dst, NULL, NULL, 0, &newrt);
+ if (err) {
/*
- * We are apparently adding (report = 0 in delete).
- * If it requires that it be cloned, do so.
- * (This implies it wasn't a HOST route.)
+ * If the cloning didn't succeed, maybe what we
+ * have from lookup above will do. Return that;
+ * no need to hold another reference since it's
+ * already done.
*/
- err = rtrequest_locked(RTM_RESOLVE, dst, SA(0),
- SA(0), 0, &newrt);
- if (err) {
- /*
- * If the cloning didn't succeed, maybe
- * what we have will do. Return that.
- */
- newrt = rt;
- rtref(rt);
- goto miss;
- }
- if ((rt = newrt) && (rt->rt_flags & RTF_XRESOLVE)) {
- /*
- * If the new route specifies it be
- * externally resolved, then go do that.
- */
- msgtype = RTM_RESOLVE;
- goto miss;
- }
- } else
- rtref(rt);
- } else {
+ newrt = rt;
+ goto miss;
+ }
+
/*
- * Either we hit the root or couldn't find any match,
- * Which basically means
- * "caint get there frm here"
+ * We cloned it; drop the original route found during lookup.
+ * The resulted cloned route (newrt) would now have an extra
+ * reference held during rtrequest.
*/
- rtstat.rts_unreach++;
- miss: if (report) {
+ rtfree_locked(rt);
+ if ((rt = newrt) && (rt->rt_flags & RTF_XRESOLVE)) {
/*
- * If required, report the failure to the supervising
- * Authorities.
- * For a delete, this is not an error. (report == 0)
+ * If the new route specifies it be
+ * externally resolved, then go do that.
*/
- bzero((caddr_t)&info, sizeof(info));
- info.rti_info[RTAX_DST] = dst;
- rt_missmsg(msgtype, &info, 0, err);
+ msgtype = RTM_RESOLVE;
+ goto miss;
}
}
+ goto done;
+
+unreachable:
+ /*
+ * Either we hit the root or couldn't find any match,
+ * Which basically means "cant get there from here"
+ */
+ rtstat.rts_unreach++;
+miss:
+ if (report) {
+ /*
+ * If required, report the failure to the supervising
+ * Authorities.
+ * For a delete, this is not an error. (report == 0)
+ */
+ bzero((caddr_t)&info, sizeof(info));
+ info.rti_info[RTAX_DST] = dst;
+ rt_missmsg(msgtype, &info, 0, err);
+ }
+done:
return (newrt);
}
if (rt->rt_refcnt > 0)
return;
- if ((rt->rt_flags & RTF_TRACKREFS) != 0)
- printf("%s rt(%p)->rt_refcnt(%d), caller=%p\n", __FUNCTION__,
- rt, rt->rt_refcnt, __builtin_return_address(0));
-
/*
* On last reference give the "close method" a chance to cleanup
* private state. This also permits (for IPv4 and IPv6) a chance
rtref_audit((struct rtentry_dbg *)p);
p->rt_refcnt++;
-
- if ((p->rt_flags & RTF_TRACKREFS) != 0)
- printf("%s rt(%p)->rt_refcnt(%d), caller=%p\n", __FUNCTION__,
- p, p->rt_refcnt, __builtin_return_address(0));
}
static inline void
* destination to go through the given gateway.
* Normally called as a result of a routing redirect
* message from the network layer.
- *
- * N.B.: must be called at splnet
- *
*/
void
-rtredirect(struct sockaddr *dst, struct sockaddr *gateway,
- struct sockaddr *netmask, int flags, struct sockaddr *src,
- struct rtentry **rtp)
+rtredirect(struct ifnet *ifp, struct sockaddr *dst, struct sockaddr *gateway,
+ struct sockaddr *netmask, int flags, struct sockaddr *src,
+ struct rtentry **rtp)
{
- struct rtentry *rt;
+ struct rtentry *rt = NULL;
int error = 0;
short *stat = 0;
struct rt_addrinfo info;
struct ifaddr *ifa = NULL;
+ unsigned int ifscope = (ifp != NULL) ? ifp->if_index : IFSCOPE_NONE;
+ struct sockaddr_in sin;
lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_NOTOWNED);
lck_mtx_lock(rt_mtx);
- /* verify the gateway is directly reachable */
- if ((ifa = ifa_ifwithnet(gateway)) == 0) {
+ /*
+ * Verify the gateway is directly reachable; if scoped routing
+ * is enabled, verify that it is reachable from the interface
+ * where the ICMP redirect arrived on.
+ */
+ if ((ifa = ifa_ifwithnet_scoped(gateway, ifscope)) == NULL) {
error = ENETUNREACH;
goto out;
}
- rt = rtalloc1_locked(dst, 0, RTF_CLONING | RTF_PRCLONING);
+ /* Lookup route to the destination (from the original IP header) */
+ rt = rtalloc1_scoped_locked(dst, 0, RTF_CLONING|RTF_PRCLONING, ifscope);
+
+ /* Embed scope in src for comparison against rt_gateway below */
+ if (ip_doscopedroute && src->sa_family == AF_INET)
+ src = sin_copy(SIN(src), &sin, ifscope);
+
/*
* If the redirect isn't from our current router for this dst,
* it's either old or wrong. If it redirects us to ourselves,
if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
/*
* Changing from route to net => route to host.
- * Create new route, rather than smashing route to net.
+ * Create new route, rather than smashing route
+ * to net; similar to cloned routes, the newly
+ * created host route is scoped as well.
*/
create:
flags |= RTF_GATEWAY | RTF_DYNAMIC;
- error = rtrequest_locked((int)RTM_ADD, dst, gateway,
- netmask, flags,
- (struct rtentry **)0);
+ error = rtrequest_scoped_locked(RTM_ADD, dst,
+ gateway, netmask, flags, NULL, ifscope);
stat = &rtstat.rts_dynamic;
} else {
/*
/*
* add the key and gateway (in one malloc'd chunk).
*/
- rt_setgate(rt, rt_key(rt), gateway);
+ error = rt_setgate(rt, rt_key(rt), gateway);
}
- } else
+ } else {
error = EHOSTUNREACH;
+ }
done:
if (rt) {
if (rtp && !error)
rtfree_locked(rt);
}
out:
- if (error)
+ if (error) {
rtstat.rts_badredirect++;
- else if (stat != NULL)
- (*stat)++;
+ } else {
+ if (stat != NULL)
+ (*stat)++;
+ if (use_routegenid)
+ route_generation++;
+ }
bzero((caddr_t)&info, sizeof(info));
info.rti_info[RTAX_DST] = dst;
info.rti_info[RTAX_GATEWAY] = gateway;
}
struct ifaddr *
-ifa_ifwithroute_locked(
- int flags,
- const struct sockaddr *dst,
- const struct sockaddr *gateway)
+ifa_ifwithroute_locked(int flags, const struct sockaddr *dst,
+ const struct sockaddr *gateway)
+{
+ return (ifa_ifwithroute_common_locked((flags & ~RTF_IFSCOPE), dst,
+ gateway, IFSCOPE_NONE));
+}
+
+struct ifaddr *
+ifa_ifwithroute_scoped_locked(int flags, const struct sockaddr *dst,
+ const struct sockaddr *gateway, unsigned int ifscope)
+{
+ if (ifscope != IFSCOPE_NONE)
+ flags |= RTF_IFSCOPE;
+ else
+ flags &= ~RTF_IFSCOPE;
+
+ return (ifa_ifwithroute_common_locked(flags, dst, gateway, ifscope));
+}
+
+static struct ifaddr *
+ifa_ifwithroute_common_locked(int flags, const struct sockaddr *dst,
+ const struct sockaddr *gateway, unsigned int ifscope)
{
struct ifaddr *ifa = NULL;
struct rtentry *rt = NULL;
+ struct sockaddr_in dst_in, gw_in;
lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED);
+ if (ip_doscopedroute) {
+ /*
+ * Just in case the sockaddr passed in by the caller
+ * contains embedded scope, make sure to clear it since
+ * IPv4 interface addresses aren't scoped.
+ */
+ if (dst != NULL && dst->sa_family == AF_INET)
+ dst = sin_copy(SIN(dst), &dst_in, IFSCOPE_NONE);
+ if (gateway != NULL && gateway->sa_family == AF_INET)
+ gateway = sin_copy(SIN(gateway), &gw_in, IFSCOPE_NONE);
+ }
+
if (!(flags & RTF_GATEWAY)) {
/*
* If we are adding a route to an interface,
ifa = ifa_ifwithdstaddr(dst);
}
if (ifa == NULL)
- ifa = ifa_ifwithaddr(gateway);
+ ifa = ifa_ifwithaddr_scoped(gateway, ifscope);
} else {
/*
* If we are adding a route to a remote net
ifa = ifa_ifwithdstaddr(gateway);
}
if (ifa == NULL)
- ifa = ifa_ifwithnet(gateway);
+ ifa = ifa_ifwithnet_scoped(gateway, ifscope);
if (ifa == NULL) {
/* Workaround to avoid gcc warning regarding const variable */
- rt = rtalloc1_locked((struct sockaddr *)(size_t)dst, 0, 0UL);
+ rt = rtalloc1_scoped_locked((struct sockaddr *)(size_t)dst,
+ 0, 0UL, ifscope);
if (rt != NULL) {
ifa = rt->rt_ifa;
if (ifa != NULL)
*/
if ((ifa == NULL ||
!equal(ifa->ifa_addr, (struct sockaddr *)(size_t)gateway)) &&
- (rt = rtalloc1_locked((struct sockaddr *)(size_t)gateway,
- 0, 0UL)) != NULL) {
+ (rt = rtalloc1_scoped_locked((struct sockaddr *)(size_t)gateway,
+ 0, 0UL, ifscope)) != NULL) {
if (ifa != NULL)
ifafree(ifa);
ifa = rt->rt_ifa;
ifaref(ifa);
rtunref(rt);
}
+ /*
+ * If an interface scope was specified, the interface index of
+ * the found ifaddr must be equivalent to that of the scope;
+ * otherwise there is no match.
+ */
+ if ((flags & RTF_IFSCOPE) &&
+ ifa != NULL && ifa->ifa_ifp->if_index != ifscope) {
+ ifafree(ifa);
+ ifa = NULL;
+ }
+
return (ifa);
}
struct radix_node_head *rnh;
};
+int
+rtrequest_locked(int req, struct sockaddr *dst, struct sockaddr *gateway,
+ struct sockaddr *netmask, int flags, struct rtentry **ret_nrt)
+{
+ return (rtrequest_common_locked(req, dst, gateway, netmask,
+ (flags & ~RTF_IFSCOPE), ret_nrt, IFSCOPE_NONE));
+}
+
+int
+rtrequest_scoped_locked(int req, struct sockaddr *dst,
+ struct sockaddr *gateway, struct sockaddr *netmask, int flags,
+ struct rtentry **ret_nrt, unsigned int ifscope)
+{
+ if (ifscope != IFSCOPE_NONE)
+ flags |= RTF_IFSCOPE;
+ else
+ flags &= ~RTF_IFSCOPE;
+
+ return (rtrequest_common_locked(req, dst, gateway, netmask,
+ flags, ret_nrt, ifscope));
+}
+
/*
- * Do appropriate manipulations of a routing tree given
- * all the bits of info needed
+ * Do appropriate manipulations of a routing tree given all the bits of
+ * info needed.
+ *
+ * Embedding the scope in the radix key is an internal job that should be
+ * left to routines in this module. Callers should specify the scope value
+ * to the "scoped" variants of route routines instead of manipulating the
+ * key itself. This is typically done when creating a scoped route, e.g.
+ * rtrequest(RTM_ADD). Once such a route is created and marked with the
+ * RTF_IFSCOPE flag, callers can simply use its rt_key(rt) to clone it
+ * (RTM_RESOLVE) or to remove it (RTM_DELETE). An exception to this is
+ * during certain routing socket operations where the search key might be
+ * derived from the routing message itself, in which case the caller must
+ * specify the destination address and scope value for RTM_ADD/RTM_DELETE.
*/
-int
-rtrequest_locked(
- int req,
- struct sockaddr *dst,
- struct sockaddr *gateway,
- struct sockaddr *netmask,
- int flags,
- struct rtentry **ret_nrt)
+static int
+rtrequest_common_locked(int req, struct sockaddr *dst0,
+ struct sockaddr *gateway, struct sockaddr *netmask, int flags,
+ struct rtentry **ret_nrt, unsigned int ifscope)
{
int error = 0;
struct rtentry *rt;
struct radix_node *rn;
struct radix_node_head *rnh;
struct ifaddr *ifa = NULL;
- struct sockaddr *ndst;
+ struct sockaddr *ndst, *dst = dst0;
+ struct sockaddr_in sin, mask;
#define senderr(x) { error = x ; goto bad; }
lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED);
*/
if (flags & RTF_HOST)
netmask = 0;
+
+ /*
+ * If RTF_IFSCOPE is specified, use a local copy of the destination
+ * address to embed the scope into. This logic is repeated below
+ * in the RTM_RESOLVE handler since the caller does not normally
+ * specify such a flag during a resolve; instead it passes in the
+ * route used for cloning for which the scope info is derived from.
+ * Note also that in the case of RTM_DELETE, the address passed in
+ * by the caller might already contain the embedded scope info when
+ * it is the key itself, thus making RTF_IFSCOPE unnecessary; one
+ * instance where it is explicitly set is inside route_output()
+ * as part of handling a routing socket request.
+ */
+ if (req != RTM_RESOLVE && (flags & RTF_IFSCOPE)) {
+ /* Scoped routing is for AF_INET only */
+ if (dst->sa_family != AF_INET ||
+ (req == RTM_ADD && !ip_doscopedroute))
+ senderr(EINVAL);
+
+ if (ifscope == IFSCOPE_NONE) {
+ flags &= ~RTF_IFSCOPE;
+ } else {
+ /* Embed ifscope into the key (local copy) */
+ dst = sin_copy(SIN(dst), &sin, ifscope);
+
+ /* Embed ifscope into netmask (local copy) */
+ if (netmask != NULL)
+ netmask = mask_copy(netmask, &mask, ifscope);
+ }
+ }
+
switch (req) {
case RTM_DELETE:
/*
(struct rtentry_dbg *)rt, rtd_trash_link);
}
+ /*
+ * If this is the (non-scoped) default route, clear
+ * the interface index used for the primary ifscope.
+ */
+ if (rt_inet_default(rt, rt_key(rt)))
+ set_primary_ifscope(IFSCOPE_NONE);
+
/*
* If the caller wants it, then it can have it,
* but it's up to it to free the rtentry as we won't be
gateway = rt->rt_gateway;
if ((netmask = rt->rt_genmask) == 0)
flags |= RTF_HOST;
+
+ if (!ip_doscopedroute || dst->sa_family != AF_INET)
+ goto makeroute;
+ /*
+ * When scoped routing is enabled, cloned entries are
+ * always scoped according to the interface portion of
+ * the parent route. The exception to this are IPv4
+ * link local addresses.
+ */
+ if (!IN_LINKLOCAL(ntohl(SIN(dst)->sin_addr.s_addr))) {
+ if (flags & RTF_IFSCOPE) {
+ ifscope = sa_get_ifscope(rt_key(rt));
+ } else {
+ ifscope = rt->rt_ifp->if_index;
+ flags |= RTF_IFSCOPE;
+ }
+ } else {
+ ifscope = IFSCOPE_NONE;
+ flags &= ~RTF_IFSCOPE;
+ }
+
+ /* Embed or clear ifscope into/from the key (local copy) */
+ dst = sin_copy(SIN(dst), &sin, ifscope);
+
+ /* Embed or clear ifscope into/from netmask (local copy) */
+ if (netmask != NULL)
+ netmask = mask_copy(netmask, &mask, ifscope);
+
goto makeroute;
case RTM_ADD:
if ((flags & RTF_GATEWAY) && !gateway)
- panic("rtrequest: GATEWAY but no gateway");
+ panic("rtrequest: RTF_GATEWAY but no gateway");
- if ((ifa = ifa_ifwithroute_locked(flags, dst, gateway)) == 0)
+ if (flags & RTF_IFSCOPE) {
+ ifa = ifa_ifwithroute_scoped_locked(flags, dst0,
+ gateway, ifscope);
+ } else {
+ ifa = ifa_ifwithroute_locked(flags, dst0, gateway);
+ }
+ if (ifa == NULL)
senderr(ENETUNREACH);
-
- makeroute:
+makeroute:
if ((rt = rte_alloc()) == NULL)
senderr(ENOBUFS);
Bzero(rt, sizeof(*rt));
rt->rt_flags = RTF_UP | flags;
+
/*
* Add the gateway. Possibly re-malloc-ing the storage for it
* also add the rt_gwroute if possible.
/*
* make sure it contains the value we want (masked if needed).
*/
- if (netmask) {
+ if (netmask)
rt_maskedcopy(dst, ndst, netmask);
- } else
+ else
Bcopy(dst, ndst, dst->sa_len);
/*
* mechanism, then we just blow it away and retry
* the insertion of the new one.
*/
- rt2 = rtalloc1_locked(dst, 0,
- RTF_CLONING | RTF_PRCLONING);
+ if (flags & RTF_IFSCOPE) {
+ rt2 = rtalloc1_scoped_locked(dst0, 0,
+ RTF_CLONING | RTF_PRCLONING, ifscope);
+ } else {
+ rt2 = rtalloc1_locked(dst, 0,
+ RTF_CLONING | RTF_PRCLONING);
+ }
if (rt2 && rt2->rt_parent) {
rtrequest_locked(RTM_DELETE,
(struct sockaddr *)rt_key(rt2),
rt_fixchange, &arg);
}
+ /*
+ * If this is the (non-scoped) default route, record
+ * the interface index used for the primary ifscope.
+ */
+ if (rt_inet_default(rt, rt_key(rt)))
+ set_primary_ifscope(rt->rt_ifp->if_index);
+
/*
* actually return a resultant rtentry and
* give the caller a single reference.
* routine just for adds. I'm not sure why I thought it was necessary to do
* changes this way.
*/
-#ifdef DEBUG
-static int rtfcdebug = 0;
-#endif
-
static int
rt_fixchange(struct radix_node *rn, void *vp)
{
u_char *xk1, *xm1, *xk2, *xmp;
int i, len, mlen;
-#ifdef DEBUG
- if (rtfcdebug)
- printf("rt_fixchange: rt %p, rt0 %p\n", rt, rt0);
-#endif
-
lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED);
if (!rt->rt_parent ||
- (rt->rt_flags & (RTF_PINNED | RTF_CLONING | RTF_PRCLONING))) {
-#ifdef DEBUG
- if(rtfcdebug) printf("no parent or pinned\n");
-#endif
- return 0;
- }
+ (rt->rt_flags & (RTF_PINNED | RTF_CLONING | RTF_PRCLONING)))
+ return (0);
- if (rt->rt_parent == rt0) {
-#ifdef DEBUG
- if(rtfcdebug) printf("parent match\n");
-#endif
- return rtrequest_locked(RTM_DELETE, rt_key(rt),
- (struct sockaddr *)0, rt_mask(rt),
- rt->rt_flags, (struct rtentry **)0);
- }
+ if (rt->rt_parent == rt0)
+ goto delete_rt;
/*
* There probably is a function somewhere which does this...
* if not, there should be.
*/
- len = imin(((struct sockaddr *)rt_key(rt0))->sa_len,
- ((struct sockaddr *)rt_key(rt))->sa_len);
+ len = imin(rt_key(rt0)->sa_len, rt_key(rt)->sa_len);
xk1 = (u_char *)rt_key(rt0);
xm1 = (u_char *)rt_mask(rt0);
/* avoid applying a less specific route */
xmp = (u_char *)rt_mask(rt->rt_parent);
- mlen = ((struct sockaddr *)rt_key(rt->rt_parent))->sa_len;
- if (mlen > ((struct sockaddr *)rt_key(rt0))->sa_len) {
-#if DEBUG
- if (rtfcdebug)
- printf("rt_fixchange: inserting a less "
- "specific route\n");
-#endif
- return 0;
- }
+ mlen = rt_key(rt->rt_parent)->sa_len;
+ if (mlen > rt_key(rt0)->sa_len)
+ return (0);
+
for (i = rnh->rnh_treetop->rn_offset; i < mlen; i++) {
- if ((xmp[i] & ~(xmp[i] ^ xm1[i])) != xmp[i]) {
-#if DEBUG
- if (rtfcdebug)
- printf("rt_fixchange: inserting a less "
- "specific route\n");
-#endif
- return 0;
- }
+ if ((xmp[i] & ~(xmp[i] ^ xm1[i])) != xmp[i])
+ return (0);
}
for (i = rnh->rnh_treetop->rn_offset; i < len; i++) {
- if ((xk2[i] & xm1[i]) != xk1[i]) {
-#ifdef DEBUG
- if(rtfcdebug) printf("no match\n");
-#endif
- return 0;
- }
+ if ((xk2[i] & xm1[i]) != xk1[i])
+ return (0);
}
/*
* OK, this node is a clone, and matches the node currently being
* changed/added under the node's mask. So, get rid of it.
*/
-#ifdef DEBUG
- if(rtfcdebug) printf("deleting\n");
-#endif
- return rtrequest_locked(RTM_DELETE, rt_key(rt), (struct sockaddr *)0,
- rt_mask(rt), rt->rt_flags, (struct rtentry **)0);
+delete_rt:
+ return (rtrequest_locked(RTM_DELETE, rt_key(rt), NULL,
+ rt_mask(rt), rt->rt_flags, NULL));
}
int
-rt_setgate(struct rtentry *rt0, struct sockaddr *dst, struct sockaddr *gate)
+rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
{
- caddr_t new, old;
int dlen = ROUNDUP(dst->sa_len), glen = ROUNDUP(gate->sa_len);
- struct rtentry *rt = rt0;
struct radix_node_head *rnh = rt_tables[dst->sa_family];
+
+ lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED);
+
/*
* A host route with the destination equal to the gateway
* will interfere with keeping LLINFO in the routing
* table, so disallow it.
*/
-
- lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED);
-
- if (((rt0->rt_flags & (RTF_HOST|RTF_GATEWAY|RTF_LLINFO)) ==
- (RTF_HOST|RTF_GATEWAY)) &&
- (dst->sa_len == gate->sa_len) &&
+ if (((rt->rt_flags & (RTF_HOST|RTF_GATEWAY|RTF_LLINFO)) ==
+ (RTF_HOST|RTF_GATEWAY)) && (dst->sa_len == gate->sa_len) &&
(bcmp(dst, gate, dst->sa_len) == 0)) {
/*
* The route might already exist if this is an RTM_CHANGE
* or a routing redirect, so try to delete it.
*/
- if (rt_key(rt0))
- rtrequest_locked(RTM_DELETE, (struct sockaddr *)rt_key(rt0),
- rt0->rt_gateway, rt_mask(rt0), rt0->rt_flags, 0);
- return EADDRNOTAVAIL;
+ if (rt_key(rt))
+ rtrequest_locked(RTM_DELETE, rt_key(rt),
+ rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
+ return (EADDRNOTAVAIL);
}
/*
- * Both dst and gateway are stored in the same malloc'd chunk
- * (If I ever get my hands on....)
- * if we need to malloc a new chunk, then keep the old one around
- * till we don't need it any more.
+ * The destination is not directly reachable. Get a route
+ * to the next-hop gateway and store it in rt_gwroute.
*/
- if (rt->rt_gateway == 0 || glen > ROUNDUP(rt->rt_gateway->sa_len)) {
- old = (caddr_t)rt_key(rt);
- R_Malloc(new, caddr_t, dlen + glen);
- if (new == 0)
- return ENOBUFS;
- rt->rt_nodes->rn_key = new;
- } else {
+ if (rt->rt_flags & RTF_GATEWAY) {
+ struct rtentry *gwrt;
+ unsigned int ifscope;
+
+ ifscope = (dst->sa_family == AF_INET) ?
+ sa_get_ifscope(dst) : IFSCOPE_NONE;
+
+ gwrt = rtalloc1_scoped_locked(gate, 1, RTF_PRCLONING, ifscope);
+
/*
- * otherwise just overwrite the old one
+ * Cloning loop avoidance:
+ *
+ * In the presence of protocol-cloning and bad configuration,
+ * it is possible to get stuck in bottomless mutual recursion
+ * (rtrequest rt_setgate rtalloc1). We avoid this by not
+ * allowing protocol-cloning to operate for gateways (which
+ * is probably the correct choice anyway), and avoid the
+ * resulting reference loops by disallowing any route to run
+ * through itself as a gateway. This is obviously mandatory
+ * when we get rt->rt_output(). It implies that a route to
+ * the gateway must already be present in the system in order
+ * for the gateway to be referred to by another route.
*/
- new = rt->rt_nodes->rn_key;
- old = 0;
+ if (gwrt == rt) {
+ rtunref(gwrt);
+ return (EADDRINUSE); /* failure */
+ }
+
+ /* If scoped, the gateway route must use the same interface */
+ if (ifscope != IFSCOPE_NONE && (rt->rt_flags & RTF_IFSCOPE) &&
+ gwrt != NULL && gwrt->rt_ifp != NULL &&
+ gwrt->rt_ifp->if_index != ifscope) {
+ rtfree_locked(gwrt);
+ return ((rt->rt_flags & RTF_HOST) ?
+ EHOSTUNREACH : ENETUNREACH);
+ }
+
+ if (rt->rt_gwroute != NULL)
+ rtfree_locked(rt->rt_gwroute);
+ rt->rt_gwroute = gwrt;
+
+ /*
+ * In case the (non-scoped) default route gets modified via
+ * an ICMP redirect, record the interface index used for the
+ * primary ifscope. Also done in rt_setif() to take care
+ * of the non-redirect cases.
+ */
+ if (rt_inet_default(rt, dst) && rt->rt_ifp != NULL)
+ set_primary_ifscope(rt->rt_ifp->if_index);
+
+ /*
+ * Tell the kernel debugger about the new default gateway
+ * if the gateway route uses the primary interface, or
+ * if we are in a transient state before the non-scoped
+ * default gateway is installed (similar to how the system
+ * was behaving in the past). In future, it would be good
+ * to do all this only when KDP is enabled.
+ */
+ if ((dst->sa_family == AF_INET) &&
+ gwrt != NULL && gwrt->rt_gateway->sa_family == AF_LINK &&
+ (gwrt->rt_ifp->if_index == get_primary_ifscope() ||
+ get_primary_ifscope() == IFSCOPE_NONE))
+ kdp_set_gateway_mac(SDL(gwrt->rt_gateway)->sdl_data);
}
/*
- * copy the new gateway value into the memory chunk
+ * Prepare to store the gateway in rt_gateway. Both dst and gateway
+ * are stored one after the other in the same malloc'd chunk. If we
+ * have room, reuse the old buffer since rt_gateway already points
+ * to the right place. Otherwise, malloc a new block and update
+ * the 'dst' address and point rt_gateway to the right place.
*/
- Bcopy(gate, (rt->rt_gateway = (struct sockaddr *)(new + dlen)), glen);
+ if (rt->rt_gateway == NULL || glen > ROUNDUP(rt->rt_gateway->sa_len)) {
+ caddr_t new;
- /*
- * if we are replacing the chunk (or it's new) we need to
- * replace the dst as well
- */
- if (old) {
+ /* The underlying allocation is done with M_WAITOK set */
+ R_Malloc(new, caddr_t, dlen + glen);
+ if (new == NULL) {
+ if (rt->rt_gwroute != NULL)
+ rtfree_locked(rt->rt_gwroute);
+ rt->rt_gwroute = NULL;
+ return (ENOBUFS);
+ }
+
+ /*
+ * Copy from 'dst' and not rt_key(rt) because we can get
+ * here to initialize a newly allocated route entry, in
+ * which case rt_key(rt) is NULL (and so does rt_gateway).
+ */
Bcopy(dst, new, dlen);
- R_Free(old);
+ R_Free(rt_key(rt)); /* free old block; NULL is okay */
+ rt->rt_nodes->rn_key = new;
+ rt->rt_gateway = (struct sockaddr *)(new + dlen);
}
/*
- * If there is already a gwroute, it's now almost definitly wrong
- * so drop it.
+ * Copy the new gateway value into the memory chunk.
*/
- if (rt->rt_gwroute) {
- rt = rt->rt_gwroute; rtfree_locked(rt);
- rt = rt0; rt->rt_gwroute = 0;
- }
+ Bcopy(gate, rt->rt_gateway, glen);
+
/*
- * Cloning loop avoidance:
- * In the presence of protocol-cloning and bad configuration,
- * it is possible to get stuck in bottomless mutual recursion
- * (rtrequest rt_setgate rtalloc1). We avoid this by not allowing
- * protocol-cloning to operate for gateways (which is probably the
- * correct choice anyway), and avoid the resulting reference loops
- * by disallowing any route to run through itself as a gateway.
- * This is obviously mandatory when we get rt->rt_output().
+ * For consistency between rt_gateway and rt_key(gwrt).
*/
- if (rt->rt_flags & RTF_GATEWAY) {
- rt->rt_gwroute = rtalloc1_locked(gate, 1, RTF_PRCLONING);
- if (rt->rt_gwroute == rt) {
- rtfree_locked(rt->rt_gwroute);
- rt->rt_gwroute = 0;
- return EDQUOT; /* failure */
- }
- /* Tell the kernel debugger about the new default gateway */
- if ((AF_INET == rt->rt_gateway->sa_family) &&
- rt->rt_gwroute && rt->rt_gwroute->rt_gateway &&
- (AF_LINK == rt->rt_gwroute->rt_gateway->sa_family)) {
- kdp_set_gateway_mac(((struct sockaddr_dl *)rt0->rt_gwroute->rt_gateway)->sdl_data);
- }
+ if ((rt->rt_flags & RTF_GATEWAY) && rt->rt_gwroute != NULL &&
+ (rt->rt_gwroute->rt_flags & RTF_IFSCOPE) &&
+ rt->rt_gateway->sa_family == AF_INET &&
+ rt_key(rt->rt_gwroute)->sa_family == AF_INET) {
+ sa_set_ifscope(rt->rt_gateway,
+ sa_get_ifscope(rt_key(rt->rt_gwroute)));
}
/*
arg.rnh = rnh;
arg.rt0 = rt;
rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt),
- rt_fixchange, &arg);
+ rt_fixchange, &arg);
}
- return 0;
+ return (0);
}
static void
bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2));
}
+/*
+ * Lookup an AF_INET scoped or non-scoped route depending on the ifscope
+ * value passed in by the caller (IFSCOPE_NONE implies non-scoped).
+ */
+static struct radix_node *
+node_lookup(struct sockaddr *dst, struct sockaddr *netmask,
+ unsigned int ifscope)
+{
+ struct radix_node_head *rnh = rt_tables[AF_INET];
+ struct radix_node *rn;
+ struct sockaddr_in sin, mask;
+ struct matchleaf_arg ma = { ifscope };
+ rn_matchf_t *f = rn_match_ifscope;
+ void *w = &ma;
+
+ if (dst->sa_family != AF_INET)
+ return (NULL);
+
+ /*
+ * Embed ifscope into the search key; for a non-scoped
+ * search this will clear out any embedded scope value.
+ */
+ dst = sin_copy(SIN(dst), &sin, ifscope);
+
+ /* Embed (or clear) ifscope into netmask */
+ if (netmask != NULL)
+ netmask = mask_copy(netmask, &mask, ifscope);
+
+ if (ifscope == IFSCOPE_NONE)
+ f = w = NULL;
+
+ rn = rnh->rnh_lookup_args(dst, netmask, rnh, f, w);
+ if (rn != NULL && (rn->rn_flags & RNF_ROOT))
+ rn = NULL;
+
+ return (rn);
+}
+
+/*
+ * Lookup the AF_INET non-scoped default route.
+ */
+static struct radix_node *
+node_lookup_default(void)
+{
+ struct radix_node_head *rnh = rt_tables[AF_INET];
+ return (rnh->rnh_lookup(&sin_def, NULL, rnh));
+}
+
+/*
+ * Common routine to lookup/match a route. It invokes the lookup/matchaddr
+ * callback which could be address family-specific. The main difference
+ * between the two (at least for AF_INET/AF_INET6) is that a lookup does
+ * not alter the expiring state of a route, whereas a match would unexpire
+ * or revalidate the route.
+ *
+ * The optional scope or interface index property of a route allows for a
+ * per-interface route instance. This permits multiple route entries having
+ * the same destination (but not necessarily the same gateway) to exist in
+ * the routing table; each of these entries is specific to the corresponding
+ * interface. This is made possible by embedding the scope value into the
+ * radix key, thus making each route entry unique. These scoped entries
+ * exist along with the regular, non-scoped entries in the same radix tree
+ * for a given address family (currently AF_INET only); the scope logically
+ * partitions it into multiple per-interface sub-trees.
+ *
+ * When a scoped route lookup is performed, the routing table is searched for
+ * the best match that would result in a route using the same interface as the
+ * one associated with the scope (the exception to this are routes that point
+ * to the loopback interface). The search rule follows the longest matching
+ * prefix with the additional interface constraint.
+ */
+struct rtentry *
+rt_lookup(boolean_t lookup_only, struct sockaddr *dst, struct sockaddr *netmask,
+ struct radix_node_head *rnh, unsigned int ifscope)
+{
+ struct radix_node *rn0, *rn;
+ boolean_t dontcare = (ifscope == IFSCOPE_NONE);
+
+ lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED);
+
+ if (!lookup_only)
+ netmask = NULL;
+
+ /*
+ * Non-scoped route lookup.
+ */
+ if (!ip_doscopedroute || dst->sa_family != AF_INET) {
+ if (lookup_only)
+ rn = rnh->rnh_lookup(dst, netmask, rnh);
+ else
+ rn = rnh->rnh_matchaddr(dst, rnh);
+ goto done;
+ }
+
+ /*
+ * Scoped route lookup:
+ *
+ * We first perform a non-scoped lookup for the original result.
+ * Afterwards, depending on whether or not the caller has specified
+ * a scope, we perform a more specific scoped search and fallback
+ * to this original result upon failure.
+ */
+ rn0 = rn = node_lookup(dst, netmask, IFSCOPE_NONE);
+
+ /*
+ * If the caller did not specify a scope, use the primary scope
+ * derived from the system's non-scoped default route. If, for
+ * any reason, there is no primary interface, return what we have.
+ */
+ if (dontcare && (ifscope = get_primary_ifscope()) == IFSCOPE_NONE)
+ goto validate;
+
+ /*
+ * Keep the original result if either of the following is true:
+ *
+ * 1) The interface portion of the route has the same interface
+ * index as the scope value and it is marked with RTF_IFSCOPE.
+ * 2) The route uses the loopback interface, in which case the
+ * destination (host/net) is local/loopback.
+ *
+ * Otherwise, do a more specified search using the scope.
+ */
+ if (rn != NULL) {
+ struct rtentry *rt = RT(rn);
+ if (rt->rt_ifp != lo_ifp) {
+ if (rt->rt_ifp->if_index != ifscope) {
+ /*
+ * Wrong interface; keep the original result
+ * only if the caller did not specify a scope,
+ * and do a more specific scoped search using
+ * the scope of the found route. Otherwise,
+ * start again from scratch.
+ */
+ rn = NULL;
+ if (dontcare)
+ ifscope = rt->rt_ifp->if_index;
+ else
+ rn0 = NULL;
+ } else if (!(rt->rt_flags & RTF_IFSCOPE)) {
+ /*
+ * Right interface, except that this route
+ * isn't marked with RTF_IFSCOPE. Do a more
+ * specific scoped search. Keep the original
+ * result and return it it in case the scoped
+ * search fails.
+ */
+ rn = NULL;
+ }
+ }
+ }
+
+ /*
+ * Scoped search. Find the most specific entry having the same
+ * interface scope as the one requested. The following will result
+ * in searching for the longest prefix scoped match.
+ */
+ if (rn == NULL)
+ rn = node_lookup(dst, netmask, ifscope);
+
+ /*
+ * Use the original result if either of the following is true:
+ *
+ * 1) The scoped search did not yield any result.
+ * 2) The result from the scoped search is a scoped default route,
+ * and the original (non-scoped) result is not a default route,
+ * i.e. the original result is a more specific host/net route.
+ * 3) The scoped search yielded a net route but the original
+ * result is a host route, i.e. the original result is treated
+ * as a more specific route.
+ */
+ if (rn == NULL || (rn0 != NULL &&
+ ((INET_DEFAULT(rt_key(RT(rn))) && !INET_DEFAULT(rt_key(RT(rn0)))) ||
+ (!RT_HOST(rn) && RT_HOST(rn0)))))
+ rn = rn0;
+
+ /*
+ * If we still don't have a route, use the non-scoped default
+ * route as long as the interface portion satistifes the scope.
+ */
+ if (rn == NULL && (rn = node_lookup_default()) != NULL &&
+ RT(rn)->rt_ifp->if_index != ifscope)
+ rn = NULL;
+
+validate:
+ if (rn != NULL && !lookup_only)
+ (void) in_validate(rn);
+
+done:
+ if (rn != NULL && (rn->rn_flags & RNF_ROOT))
+ rn = NULL;
+ else if (rn != NULL)
+ rtref(RT(rn));
+
+ return (RT(rn));
+}
+
/*
* Set up a routing table entry, normally
* for an interface.
/*
- * Copyright (c) 2000,2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
struct route {
struct rtentry *ro_rt;
struct sockaddr ro_dst;
- u_long reserved[2]; /* for future use if needed */
+ u_int32_t ro_flags; /* route flags (see below) */
+ u_int32_t reserved; /* for future use if needed */
};
+
+#define ROF_SRCIF_SELECTED 0x1 /* source interface was selected */
+
#else
struct route;
#endif /* PRIVATE */
#define RTF_LOCAL 0x200000 /* route represents a local address */
#define RTF_BROADCAST 0x400000 /* route represents a bcast address */
#define RTF_MULTICAST 0x800000 /* route represents a mcast address */
-#define RTF_TRACKREFS 0x1000000 /* Debug references and releases */
- /* 0x1000000 and up unassigned */
+#define RTF_IFSCOPE 0x1000000 /* has valid interface scope */
+ /* 0x2000000 and up unassigned */
/*
* Routing statistics.
};
#ifdef KERNEL_PRIVATE
+/*
+ * For scoped routing; a zero interface scope value means nil/no scope.
+ */
+#define IFSCOPE_NONE 0
+
#define RTFREE(rt) rtfree(rt)
extern struct route_cb route_cb;
extern struct radix_node_head *rt_tables[AF_MAX+1];
extern void rt_newaddrmsg(int, struct ifaddr *, int, struct rtentry *);
extern void rt_newmaddrmsg(int, struct ifmultiaddr *);
extern int rt_setgate(struct rtentry *, struct sockaddr *, struct sockaddr *);
+extern void set_primary_ifscope(unsigned int);
+extern unsigned int get_primary_ifscope(void);
+extern boolean_t rt_inet_default(struct rtentry *, struct sockaddr *);
+extern struct rtentry *rt_lookup(boolean_t, struct sockaddr *,
+ struct sockaddr *, struct radix_node_head *, unsigned int);
extern void rtalloc(struct route *);
extern void rtalloc_ign(struct route *, u_long);
-extern void rtalloc_ign_locked(struct route *, u_long );
+extern void rtalloc_ign_locked(struct route *, u_long);
+extern void rtalloc_scoped_ign_locked(struct route *, u_long, unsigned int);
extern struct rtentry *rtalloc1(struct sockaddr *, int, u_long);
extern struct rtentry *rtalloc1_locked(struct sockaddr *, int, u_long);
+extern struct rtentry *rtalloc1_scoped_locked(struct sockaddr *, int,
+ u_long, unsigned int);
extern void rtfree(struct rtentry *);
extern void rtfree_locked(struct rtentry *);
extern void rtref(struct rtentry *);
extern int rtinit(struct ifaddr *, int, int);
extern int rtinit_locked(struct ifaddr *, int, int);
extern int rtioctl(int, caddr_t, struct proc *);
-extern void rtredirect(struct sockaddr *, struct sockaddr *,
+extern void rtredirect(struct ifnet *, struct sockaddr *, struct sockaddr *,
struct sockaddr *, int, struct sockaddr *, struct rtentry **);
extern int rtrequest(int, struct sockaddr *,
struct sockaddr *, struct sockaddr *, int, struct rtentry **);
extern int rtrequest_locked(int, struct sockaddr *,
struct sockaddr *, struct sockaddr *, int, struct rtentry **);
+extern int rtrequest_scoped_locked(int, struct sockaddr *, struct sockaddr *,
+ struct sockaddr *, int, struct rtentry **, unsigned int);
extern struct rtentry *rte_alloc(void);
extern void rte_free(struct rtentry *);
+extern unsigned int sa_get_ifscope(struct sockaddr *);
#endif KERNEL_PRIVATE
#endif
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <machine/spl.h>
extern struct rtstat rtstat;
-extern int rttrash;
extern u_long route_generation;
extern int use_routegenid;
extern int check_routeselfref;
static int route_output(struct mbuf *, struct socket *);
static void rt_setmetrics(u_long, struct rt_metrics *, struct rt_metrics *);
static void rt_setif(struct rtentry *, struct sockaddr *, struct sockaddr *,
- struct sockaddr *);
+ struct sockaddr *, unsigned int);
+
+#define SIN(sa) ((struct sockaddr_in *)(size_t)(sa))
/* Sleazy use of local variables throughout file, warning!!!! */
#define dst info.rti_info[RTAX_DST]
#ifndef __APPLE__
struct proc *curproc = current_proc();
#endif
+ struct sockaddr_in dst_in, gate_in;
int sendonlytoself = 0;
+ unsigned int ifscope = IFSCOPE_NONE;
#define senderr(e) { error = e; goto flush;}
- if (m == 0 || ((m->m_len < sizeof(long)) && (m = m_pullup(m, sizeof(long))) == 0))
+ if (m == NULL ||
+ ((m->m_len < sizeof(long)) && (m = m_pullup(m, sizeof(long))) == 0))
return (ENOBUFS);
if ((m->m_flags & M_PKTHDR) == 0)
panic("route_output");
len = m->m_pkthdr.len;
if (len < sizeof(*rtm) ||
len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
- dst = 0;
+ dst = NULL;
senderr(EINVAL);
}
R_Malloc(rtm, struct rt_msghdr *, len);
- if (rtm == 0) {
- dst = 0;
+ if (rtm == NULL) {
+ dst = NULL;
senderr(ENOBUFS);
}
m_copydata(m, 0, len, (caddr_t)rtm);
if (rtm->rtm_version != RTM_VERSION) {
- dst = 0;
+ dst = NULL;
senderr(EPROTONOSUPPORT);
}
-
+
/*
* Silent version of RTM_GET for Reachabiltiy APIs. We may change
* all RTM_GETs to be silent in the future, so this is private for now.
sendonlytoself = 1;
rtm->rtm_type = RTM_GET;
}
-
+
/*
* Perform permission checking, only privileged sockets
* may perform operations other than RTM_GET
*/
if (rtm->rtm_type != RTM_GET && (so->so_state & SS_PRIV) == 0) {
- dst = 0;
+ dst = NULL;
senderr(EPERM);
}
rtm->rtm_pid = proc_selfpid();
info.rti_addrs = rtm->rtm_addrs;
if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) {
- dst = 0;
+ dst = NULL;
senderr(EINVAL);
}
- if (dst == 0 || (dst->sa_family >= AF_MAX)
- || (gate != 0 && (gate->sa_family >= AF_MAX))) {
+ if (dst == NULL || (dst->sa_family >= AF_MAX) ||
+ (gate != NULL && (gate->sa_family >= AF_MAX))) {
senderr(EINVAL);
}
+
+ if (dst->sa_family == AF_INET && dst->sa_len != sizeof (dst_in)) {
+ /* At minimum, we need up to sin_addr */
+ if (dst->sa_len < offsetof(struct sockaddr_in, sin_zero))
+ senderr(EINVAL);
+ bzero(&dst_in, sizeof (dst_in));
+ dst_in.sin_len = sizeof (dst_in);
+ dst_in.sin_family = AF_INET;
+ dst_in.sin_port = SIN(dst)->sin_port;
+ dst_in.sin_addr = SIN(dst)->sin_addr;
+ dst = (struct sockaddr *)&dst_in;
+ }
+
+ if (gate != NULL &&
+ gate->sa_family == AF_INET && gate->sa_len != sizeof (gate_in)) {
+ /* At minimum, we need up to sin_addr */
+ if (gate->sa_len < offsetof(struct sockaddr_in, sin_zero))
+ senderr(EINVAL);
+ bzero(&gate_in, sizeof (gate_in));
+ gate_in.sin_len = sizeof (gate_in);
+ gate_in.sin_family = AF_INET;
+ gate_in.sin_port = SIN(gate)->sin_port;
+ gate_in.sin_addr = SIN(gate)->sin_addr;
+ gate = (struct sockaddr *)&gate_in;
+ }
+
if (genmask) {
struct radix_node *t;
t = rn_addmask((caddr_t)genmask, 0, 1);
else
senderr(ENOBUFS);
}
+
+ /*
+ * If RTF_IFSCOPE flag is set, then rtm_index specifies the scope.
+ */
+ if (rtm->rtm_flags & RTF_IFSCOPE) {
+ /* Scoped routing is for AF_INET only */
+ if (dst->sa_family != AF_INET)
+ senderr(EINVAL);
+ ifscope = rtm->rtm_index;
+ }
+
switch (rtm->rtm_type) {
-
+
case RTM_ADD:
- if (gate == 0)
+ if (gate == NULL)
senderr(EINVAL);
#ifdef __APPLE__
}
}
#endif
- error = rtrequest_locked(RTM_ADD, dst, gate, netmask,
- rtm->rtm_flags, &saved_nrt);
+ error = rtrequest_scoped_locked(RTM_ADD, dst, gate,
+ netmask, rtm->rtm_flags, &saved_nrt, ifscope);
if (error == 0 && saved_nrt) {
#ifdef __APPLE__
/*
* dwiggins@bbn.com
*/
- rt_setif(saved_nrt, ifpaddr, ifaaddr, gate);
+ rt_setif(saved_nrt, ifpaddr, ifaaddr, gate,
+ ifscope);
#endif
rt_setmetrics(rtm->rtm_inits,
&rtm->rtm_rmx, &saved_nrt->rt_rmx);
saved_nrt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
saved_nrt->rt_rmx.rmx_locks |=
(rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
- rtunref(saved_nrt);
saved_nrt->rt_genmask = genmask;
+ rtunref(saved_nrt);
}
break;
case RTM_DELETE:
- error = rtrequest_locked(RTM_DELETE, dst, gate, netmask,
- rtm->rtm_flags, &saved_nrt);
+ error = rtrequest_scoped_locked(RTM_DELETE, dst,
+ gate, netmask, rtm->rtm_flags, &saved_nrt, ifscope);
if (error == 0) {
rt = saved_nrt;
goto report;
case RTM_GET:
case RTM_CHANGE:
case RTM_LOCK:
- if ((rnh = rt_tables[dst->sa_family]) == 0) {
+ if ((rnh = rt_tables[dst->sa_family]) == NULL)
senderr(EAFNOSUPPORT);
- } else if ((rt = (struct rtentry *)
- rnh->rnh_lookup(dst, netmask, rnh)) != NULL)
- rtref(rt);
- else
+
+ /*
+ * Lookup the best match based on the key-mask pair;
+ * callee adds a reference and checks for root node.
+ */
+ rt = rt_lookup(TRUE, dst, netmask, rnh, ifscope);
+ if (rt == NULL)
senderr(ESRCH);
+
switch(rtm->rtm_type) {
case RTM_GET: {
* equivalent to the code found at this very spot
* in BSD.
*/
- rt_setif(rt, ifpaddr, ifaaddr, gate);
+ rt_setif(rt, ifpaddr, ifaaddr, gate,
+ ifscope);
#endif
rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
* Set route's interface given ifpaddr, ifaaddr, and gateway.
*/
static void
-rt_setif(
- struct rtentry *rt,
- struct sockaddr *Ifpaddr,
- struct sockaddr *Ifaaddr,
- struct sockaddr *Gate)
+rt_setif(struct rtentry *rt, struct sockaddr *Ifpaddr, struct sockaddr *Ifaaddr,
+ struct sockaddr *Gate, unsigned int ifscope)
{
struct ifaddr *ifa = 0;
struct ifnet *ifp = 0;
if (use_routegenid)
route_generation++;
- /* new gateway could require new ifaddr, ifp;
- flags may also be different; ifp may be specified
- by ll sockaddr when protocol address is ambiguous */
- if (Ifpaddr && (ifa = ifa_ifwithnet(Ifpaddr)) &&
+ /*
+ * New gateway could require new ifaddr, ifp; flags may also
+ * be different; ifp may be specified by ll sockaddr when
+ * protocol address is ambiguous.
+ */
+ if (Ifpaddr && (ifa = ifa_ifwithnet_scoped(Ifpaddr, ifscope)) &&
(ifp = ifa->ifa_ifp) && (Ifaaddr || Gate)) {
- ifafree(ifa);
- ifa = ifaof_ifpforaddr(Ifaaddr ? Ifaaddr : Gate,
- ifp);
- }
- else
- {
+ ifafree(ifa);
+ ifa = ifaof_ifpforaddr(Ifaaddr ? Ifaaddr : Gate, ifp);
+ } else {
if (ifa) {
ifafree(ifa);
ifa = 0;
if (Ifpaddr && (ifp = if_withname(Ifpaddr)) ) {
if (Gate) {
ifa = ifaof_ifpforaddr(Gate, ifp);
- }
- else {
+ } else {
ifnet_lock_shared(ifp);
ifa = TAILQ_FIRST(&ifp->if_addrhead);
ifaref(ifa);
ifnet_lock_done(ifp);
}
- }
- else if (Ifaaddr && (ifa = ifa_ifwithaddr(Ifaaddr))) {
+ } else if (Ifaaddr &&
+ (ifa = ifa_ifwithaddr_scoped(Ifaaddr, ifscope))) {
ifp = ifa->ifa_ifp;
- }
- else if (Gate && (ifa = ifa_ifwithroute_locked(rt->rt_flags,
- rt_key(rt), Gate))) {
+ } else if (Gate &&
+ (ifa = ifa_ifwithroute_scoped_locked(rt->rt_flags,
+ rt_key(rt), Gate, ifscope))) {
ifp = ifa->ifa_ifp;
}
}
if (ifa) {
struct ifaddr *oifa = rt->rt_ifa;
if (oifa != ifa) {
- if (oifa && oifa->ifa_rtrequest)
- oifa->ifa_rtrequest(RTM_DELETE,
- rt, Gate);
+ if (oifa && oifa->ifa_rtrequest)
+ oifa->ifa_rtrequest(RTM_DELETE, rt, Gate);
rtsetifa(rt, ifa);
- rt->rt_ifp = ifp;
- rt->rt_rmx.rmx_mtu = ifp->if_mtu;
- if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
+ rt->rt_ifp = ifp;
+ /*
+ * If this is the (non-scoped) default route, record
+ * the interface index used for the primary ifscope.
+ */
+ if (rt_inet_default(rt, rt_key(rt)))
+ set_primary_ifscope(rt->rt_ifp->if_index);
+ rt->rt_rmx.rmx_mtu = ifp->if_mtu;
+ if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, Gate);
} else {
ifafree(ifa);
ifafree(ifa);
return;
}
- call_ifareq:
+call_ifareq:
/* XXX: to reset gateway to correct value, at RTM_CHANGE */
if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, Gate);
static int
sysctl_rtsock SYSCTL_HANDLER_ARGS
{
+#pragma unused(oidp)
int *name = (int *)arg1;
u_int namelen = arg2;
struct radix_node_head *rnh;
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
*/
in_ifscrub(ifp, ia, 1);
ifa = &ia->ia_ifa;
-#if CONFIG_FORCE_OUT_IFP
- // Cleanup any pdp hack related route
- if (ia->ia_route)
- {
- ia->ia_route->rt_flags &= ~RTF_UP;
- rtfree_locked(ia->ia_route);
- ia->ia_route = NULL;
- }
-#endif
lck_mtx_unlock(rt_mtx);
ifnet_lock_exclusive(ifp);
if_detach_ifa(ifp, ifa);
/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
sa_family_t sin_family;
in_port_t sin_port;
struct in_addr sin_addr;
- char sin_zero[8]; /* XXX bwg2001-004 */
+ char sin_zero[8];
};
#define INET_ADDRSTRLEN 16
#ifdef __APPLE__
#define IP_STRIPHDR 23 /* bool: drop receive of raw IP header */
#endif
-#define IP_RECVTTL 24 /* bool; receive reception TTL w/dgram */
+#define IP_RECVTTL 24 /* bool; receive reception TTL w/dgram */
+#define IP_BOUND_IF 25 /* set/get bound interface */
#define IP_FW_ADD 40 /* add a firewall rule to chain */
#define IP_TRAFFIC_MGT_BACKGROUND 65 /* int*; get background IO flags; set background IO */
#ifdef PRIVATE
-/* This is a hack, this is only a hack. */
-#define IP_FORCE_OUT_IFP 69 /* char ifname[] - send traffic on this interface */
+#define IP_FORCE_OUT_IFP 69 /* deprecated; use IP_BOUND_IF instead */
#endif
/* Background socket configuration flags */
/*
- * Copyright (c) 2004-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
const struct in_addr *addr,
int create,
int proxy,
- route_t *route)
+ route_t *route,
+ unsigned int ifscope)
{
struct sockaddr_inarp sin = {sizeof(sin), AF_INET, 0, {0}, {0}, 0, 0};
const char *why = NULL;
sin.sin_addr.s_addr = addr->s_addr;
sin.sin_other = proxy ? SIN_PROXY : 0;
-
- *route = rtalloc1_locked((struct sockaddr*)&sin, create, 0);
+
+ *route = rtalloc1_scoped_locked((struct sockaddr*)&sin,
+ create, 0, ifscope);
if (*route == NULL)
return ENETUNREACH;
if (why && create && log_arp_warnings) {
char tmp[MAX_IPv4_STR_LEN];
- log(LOG_DEBUG, "arplookup %s failed: %s\n",
+ log(LOG_DEBUG, "arplookup link#%d %s failed: %s\n", ifscope,
inet_ntop(AF_INET, addr, tmp, sizeof(tmp)), why);
}
if ((route->rt_flags & RTF_UP) == 0) {
/* route is down, find a new one */
- hint = route = rtalloc1_locked(net_dest, 1, 0);
+ hint = route = rtalloc1_scoped_locked(net_dest,
+ 1, 0, route->rt_ifp->if_index);
if (hint) {
rtunref(hint);
}
if (route->rt_gwroute != 0)
rtfree_locked(route->rt_gwroute);
- route->rt_gwroute = rtalloc1_locked(route->rt_gateway, 1, 0);
+ route->rt_gwroute = rtalloc1_scoped_locked(
+ route->rt_gateway, 1, 0,
+ route->rt_ifp->if_index);
if (route->rt_gwroute == 0) {
lck_mtx_unlock(rt_mtx);
return EHOSTUNREACH;
* route and link layer information.
*/
if (route == NULL || route->rt_llinfo == NULL)
- result = arp_lookup_route(&net_dest->sin_addr, 1, 0, &route);
+ result = arp_lookup_route(&net_dest->sin_addr, 1, 0, &route,
+ ifp->if_index);
if (result || route == NULL || route->rt_llinfo == NULL) {
char tmp[MAX_IPv4_STR_LEN];
/*
* Look up the routing entry. If it doesn't exist and we are the
- * target, go ahead and create one.
+ * target, and the sender isn't 0.0.0.0, go ahead and create one.
*/
- error = arp_lookup_route(&sender_ip->sin_addr, (target_ip->sin_addr.s_addr ==
- best_ia->ia_addr.sin_addr.s_addr), 0, &route);
+ error = arp_lookup_route(&sender_ip->sin_addr,
+ (target_ip->sin_addr.s_addr == best_ia->ia_addr.sin_addr.s_addr &&
+ sender_ip->sin_addr.s_addr != 0), 0, &route, ifp->if_index);
if (error || route == 0 || route->rt_gateway == 0) {
if (arpop != ARPOP_REQUEST) {
goto respond;
* Verify this ARP probe doesn't conflict with an IPv4LL we know of
* on another interface.
*/
- error = arp_lookup_route(&target_ip->sin_addr, 0, 0, &route);
+ error = arp_lookup_route(&target_ip->sin_addr, 0, 0,
+ &route, ifp->if_index);
if (error == 0 && route && route->rt_gateway) {
gateway = SDL(route->rt_gateway);
if (route->rt_ifp != ifp && gateway->sdl_alen != 0
/* don't create entry if link-local address and link-local is disabled */
if (!IN_LINKLOCAL(ntohl(sender_ip->sin_addr.s_addr))
|| (ifp->if_eflags & IFEF_ARPLL) != 0) {
- error = arp_lookup_route(&sender_ip->sin_addr, 1, 0, &route);
+ error = arp_lookup_route(&sender_ip->sin_addr,
+ 1, 0, &route, ifp->if_index);
if (error == 0 && route != NULL && route->rt_gateway != NULL) {
created_announcement = 1;
}
if (target_ip->sin_addr.s_addr != best_ia->ia_addr.sin_addr.s_addr) {
/* Find a proxy route */
- error = arp_lookup_route(&target_ip->sin_addr, 0, SIN_PROXY, &route);
+ error = arp_lookup_route(&target_ip->sin_addr, 0, SIN_PROXY,
+ &route, ifp->if_index);
if (error || route == NULL) {
/* We don't have a route entry indicating we should use proxy */
}
/* See if we have a route to the target ip before we proxy it */
- route = rtalloc1_locked((const struct sockaddr*)target_ip, 0, 0);
+ route = rtalloc1_scoped_locked(
+ (const struct sockaddr *)target_ip, 0, 0,
+ ifp->if_index);
if (!route) {
lck_mtx_unlock(rt_mtx);
return 0;
struct ip iphdr; /* capsule IP header, host byte ordered */
int proto, error;
u_int8_t tos;
+ struct ip_out_args ipoa = { IFSCOPE_NONE };
if (sin_src == NULL || sin_dst == NULL ||
sin_src->sin_family != AF_INET ||
#endif
}
- error = ip_output(m, NULL, &sc->gif_ro, 0, NULL, NULL);
+ error = ip_output(m, NULL, &sc->gif_ro, IP_OUTARGS, NULL, &ipoa);
return(error);
}
sin.sin_family = AF_INET;
sin.sin_len = sizeof(struct sockaddr_in);
sin.sin_addr = ip.ip_src;
- rt = rtalloc1((struct sockaddr *)&sin, 0, 0UL);
+ lck_mtx_lock(rt_mtx);
+ rt = rtalloc1_scoped_locked((struct sockaddr *)&sin, 0, 0,
+ m->m_pkthdr.rcvif->if_index);
+ lck_mtx_unlock(rt_mtx);
if (!rt || rt->rt_ifp != m->m_pkthdr.rcvif) {
#if 0
log(LOG_WARNING, "%s: packet from 0x%x dropped "
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
}
#ifdef __APPLE_API_PRIVATE
+static void
in_pcb_conflict_post_msg(u_int16_t port)
{
/*
return (0);
}
-#if CONFIG_FORCE_OUT_IFP
-/*
- * pdp_context_route_locked is losely based on rtalloc_ign_locked with
- * the hope that it can be used anywhere rtalloc_ign_locked is.
- */
-__private_extern__ void
-pdp_context_route_locked(ifnet_t ifp, struct route *ro)
-{
- struct in_ifaddr *ia;
- struct rtentry *rt;
-
- if ((rt = ro->ro_rt) != NULL) {
- if (rt->rt_ifp == ifp && rt->rt_flags & RTF_UP)
- return;
-
- rtfree_locked(rt);
- ro->ro_rt = NULL;
- }
-
- if (ifp == NULL)
- return;
-
- /* Find the first IP address, we will use a fake route off of that */
- TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
- if (ia->ia_ifp == ifp)
- break;
- }
-
- /* Hrmm no IP addresses here :( */
- if (ia == NULL)
- return;
-
- rt = ia->ia_route;
- if (rt == NULL) {
- struct sockaddr *ifa = ia->ia_ifa.ifa_addr;
-
- /* Allocate and set up a fake route */
- if ((rt = rte_alloc()) == NULL)
- return;
-
- bzero(rt, sizeof(*rt));
- rt->rt_flags = RTF_UP | RTF_STATIC;
- if (rt_setgate(rt, ifa, ifa) != 0) {
- rte_free(rt);
- return;
- }
- /*
- * Explicitly zero the key so that:
- * rt_tables[rt_key(rt)->sa_family] == rt_tables[0] == NULL
- */
- bzero(rt_key(rt), ifa->sa_len);
-
- rtsetifa(rt, &ia->ia_ifa);
- rt->rt_ifp = rt->rt_ifa->ifa_ifp;
-
- /* Take a reference for the ia pointer to this */
- ia->ia_route = rt;
- rtref(rt);
-
- /*
- * One more rtentry floating around that is not
- * linked to the routing table.
- */
- (void) OSIncrementAtomic((SInt32 *)&rttrash);
- }
- rt->generation_id = route_generation;
- rtref(rt); /* increment the reference count */
- ro->ro_rt = rt;
-}
-#endif
-
/*
* Transform old in_pcbconnect() into an inner subroutine for new
* in_pcbconnect(): Do some validity-checking on the remote
}
if (inp->inp_laddr.s_addr == INADDR_ANY) {
struct route *ro;
+ unsigned int ifscope;
ia = (struct in_ifaddr *)0;
+ ifscope = (inp->inp_flags & INP_BOUND_IF) ?
+ inp->inp_boundif : IFSCOPE_NONE;
/*
* If route is known or can be allocated now,
* our src addr is taken from the i/f, else punt.
ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
sin->sin_addr;
-#if CONFIG_FORCE_OUT_IFP
- /* If the socket has requested a specific interface, use that address */
- if (inp->pdp_ifp != NULL) {
- pdp_context_route_locked(inp->pdp_ifp, ro);
- }
- else
-#endif /* CONFIG_FORCE_OUT_IFP */
- rtalloc_ign_locked(ro, 0UL);
+ rtalloc_scoped_ign_locked(ro, 0UL, ifscope);
}
/*
* If we found a route, use the address
sin->sin_port = 0;
ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin)));
if (ia == 0) {
- ia = ifatoia(ifa_ifwithnet(sintosa(sin)));
+ ia = ifatoia(ifa_ifwithnet_scoped(sintosa(sin),
+ ifscope));
}
sin->sin_port = fport;
if (ia == 0) {
so->so_saved_pcb = (caddr_t) inp;
so->so_pcb = 0;
inp->inp_socket = 0;
- inp->reserved[0] = (u_int32_t)so;
#if CONFIG_MACF_NET
mac_inpcb_label_destroy(inp);
#endif
in_pcbremlists(inp);
inp->inp_socket = 0;
- inp->reserved[0] = (u_int32_t) so;
zfree(pcbinfo->ipi_zone, inp);
pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb; /* restores dummypcb */
}
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#else
void *inpcb_mtx;
#endif
- u_int32_t reserved[4]; /* future use (some already used) */
+ unsigned int inp_boundif; /* interface scope for INP_BOUND_IF */
+ u_int32_t inp_reserved[3]; /* reserved for future use */
#if CONFIG_MACF_NET
struct label *inp_label; /* MAC label */
#endif
-#if CONFIG_FORCE_OUT_IFP
-#ifdef _KERN_SYS_KERNELTYPES_H_
- ifnet_t pdp_ifp;
-#else
- void *pdp_ifp;
-#endif /* _KERN_SYS_KERNELTYPES_H_ */
-#endif /* CONFIG_EMBEDDED */
#if CONFIG_IP_EDGEHOLE
u_int32_t inpcb_edgehole_flags;
u_int32_t inpcb_edgehole_mask;
#define INP_RECVTTL 0x1000
#define INP_UDP_NOCKSUM 0x2000 /* Turn off outbound UDP checksum */
+#define INP_BOUND_IF 0x4000 /* bind socket to an ifindex */
#define IN6P_IPV6_V6ONLY 0x008000 /* restrict AF_INET6 socket for v6 */
void in_pcbremlists(struct inpcb *inp);
int in_pcb_ckeckstate(struct inpcb *, int, int);
void inpcb_to_compat(struct inpcb *inp, struct inpcb_compat *inp_compat);
-#if CONFIG_FORCE_OUT_IFP
-void pdp_context_route_locked(ifnet_t ifp, struct route *ro);
-#endif
#endif /* KERNEL */
#endif /* KERNEL_PRIVATE */
/*
- * Copyright (c) 2000,2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
static void in_rtqtimo(void *rock);
#endif
+static struct radix_node *in_matroute_args(void *, struct radix_node_head *,
+ rn_matchf_t *f, void *);
+
#define RTPRF_OURS RTF_PROTO3 /* set on routes we manage */
/*
* Find out if it is because of an
* ARP entry and delete it if so.
*/
- rt2 = rtalloc1_locked((struct sockaddr *)sin, 0,
- RTF_CLONING | RTF_PRCLONING);
+ rt2 = rtalloc1_scoped_locked(rt_key(rt), 0,
+ RTF_CLONING | RTF_PRCLONING, sa_get_ifscope(rt_key(rt)));
if (rt2) {
if (rt2->rt_flags & RTF_LLINFO &&
rt2->rt_flags & RTF_HOST &&
return ret;
}
+/*
+ * Validate (unexpire) an expiring AF_INET route.
+ */
+struct radix_node *
+in_validate(struct radix_node *rn)
+{
+ struct rtentry *rt = (struct rtentry *)rn;
+
+ /* This is first reference? */
+ if (rt != NULL && rt->rt_refcnt == 0 && (rt->rt_flags & RTPRF_OURS)) {
+ rt->rt_flags &= ~RTPRF_OURS;
+ rt->rt_rmx.rmx_expire = 0;
+ }
+ return (rn);
+}
+
+/*
+ * Similar to in_matroute_args except without the leaf-matching parameters.
+ */
+static struct radix_node *
+in_matroute(void *v_arg, struct radix_node_head *head)
+{
+ return (in_matroute_args(v_arg, head, NULL, NULL));
+}
+
/*
* This code is the inverse of in_clsroute: on first reference, if we
* were managing the route, stop doing so and set the expiration timer
* back off again.
*/
static struct radix_node *
-in_matroute(void *v_arg, struct radix_node_head *head)
+in_matroute_args(void *v_arg, struct radix_node_head *head,
+ rn_matchf_t *f, void *w)
{
- struct radix_node *rn = rn_match(v_arg, head);
- struct rtentry *rt = (struct rtentry *)rn;
+ struct radix_node *rn = rn_match_args(v_arg, head, f, w);
- if(rt && rt->rt_refcnt == 0) { /* this is first reference */
- if(rt->rt_flags & RTPRF_OURS) {
- rt->rt_flags &= ~RTPRF_OURS;
- rt->rt_rmx.rmx_expire = 0;
- }
- }
- return rn;
+ return (in_validate(rn));
}
static int rtq_reallyold = 60*60;
rnh = *head;
rnh->rnh_addaddr = in_addroute;
rnh->rnh_matchaddr = in_matroute;
+ rnh->rnh_matchaddr_args = in_matroute_args;
rnh->rnh_close = in_clsroute;
in_rtqtimo(rnh); /* kick off timeout first time */
return 1;
/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
struct sockaddr_in ia_dstaddr; /* reserve space for broadcast addr */
#define ia_broadaddr ia_dstaddr
struct sockaddr_in ia_sockmask; /* reserve space for general netmask */
-#if CONFIG_FORCE_OUT_IFP
- struct rtentry *ia_route; /* PDP context hack - a faux route we can use */
-#endif
};
#endif /* PRIVATE */
int in_control(struct socket *, u_long, caddr_t, struct ifnet *,
struct proc *);
void in_rtqdrain(void);
+extern struct radix_node *in_validate(struct radix_node *);
void ip_input(struct mbuf *);
int in_ifadown(struct ifaddr *ifa, int);
void in_ifscrub(struct ifnet *, struct in_ifaddr *, int);
/* Reinject packet into the system as incoming or outgoing */
if (!sin || sin->sin_addr.s_addr == 0) {
+ struct ip_out_args ipoa = { IFSCOPE_NONE };
+
/*
* Don't allow both user specified and setsockopt options,
* and don't allow packet length sizes that will crash
error = ip_output(m,
inp->inp_options, &inp->inp_route,
(so->so_options & SO_DONTROUTE) |
- IP_ALLOWBROADCAST | IP_RAWOUTPUT,
- inp->inp_moptions, NULL);
+ IP_ALLOWBROADCAST | IP_RAWOUTPUT | IP_OUTARGS,
+ inp->inp_moptions, &ipoa);
socket_lock(so, 0);
} else {
struct ifaddr *ifa;
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
(void)ip_output(m, NULL, NULL, pkt->flags, NULL, NULL);
if (tmp_rt.ro_rt) {
rtfree(tmp_rt.ro_rt);
+ tmp_rt.ro_rt = NULL;
}
break ;
}
pkt->dn_dst = fwa->dst;
pkt->flags = fwa->flags;
+ if (fwa->ipoa != NULL)
+ pkt->ipoa = *(fwa->ipoa);
}
if (q->head == NULL)
q->head = m;
struct m_tag *tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DUMMYNET, NULL); \
if (tag) { \
struct dn_pkt_tag *n = (struct dn_pkt_tag *)(tag+1); \
- if (n->ro.ro_rt) \
+ if (n->ro.ro_rt) { \
rtfree(n->ro.ro_rt); \
+ n->ro.ro_rt = NULL; \
+ } \
} \
m_tag_delete(_m, tag); \
m_freem(_m); \
/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* processing requirements.
*/
#ifdef KERNEL
+#include <netinet/ip_var.h> /* for ip_out_args */
+
struct dn_pkt_tag {
struct ip_fw *rule; /* matching rule */
int dn_dir; /* action when packet comes out. */
struct sockaddr_in *dn_dst ;
struct route ro; /* route, for ip_output. MUST COPY */
int flags ; /* flags, for ip_output (IPv6 ?) */
+ struct ip_out_args ipoa; /* output args, for ip_output. MUST COPY */
};
#else
struct dn_pkt;
LIST_REMOVE(ipf, ipf_next);
ipflow_addstats(ipf);
rtfree(ipf->ipf_ro.ro_rt);
+ ipf->ipf_ro.ro_rt = NULL;
return ipf;
}
/* note: called under the ip_mutex lock */
LIST_REMOVE(ipf, ipf_next);
ipflow_addstats(ipf);
rtfree(ipf->ipf_ro.ro_rt);
+ ipf->ipf_ro.ro_rt = NULL;
ipf->ipf_uses = ipf->ipf_last_uses = 0;
ipf->ipf_errors = ipf->ipf_dropped = 0;
}
ip_rtaddr(ip->ip_dst, &sro);
m->m_flags |= M_SKIP_FIREWALL;
ip_output_list(m, 0, NULL, &sro, 0, NULL, NULL);
- if (sro.ro_rt)
+ if (sro.ro_rt) {
RTFREE(sro.ro_rt);
+ sro.ro_rt = NULL;
+ }
}
/*
struct route *ro; /* for dummynet */
struct sockaddr_in *dst; /* for dummynet */
int flags; /* for dummynet */
+ struct ip_out_args *ipoa; /* for dummynet */
struct ipfw_flow_id f_id; /* grabbed from IP header */
u_int16_t divert_rule; /* divert cookie */
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
}
#endif
icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
- rtredirect((struct sockaddr *)&icmpsrc,
- (struct sockaddr *)&icmpdst,
- (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
- (struct sockaddr *)&icmpgw, (struct rtentry **)0);
+ rtredirect(m->m_pkthdr.rcvif, (struct sockaddr *)&icmpsrc,
+ (struct sockaddr *)&icmpdst, NULL, RTF_GATEWAY | RTF_HOST,
+ (struct sockaddr *)&icmpgw, NULL);
pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&icmpsrc);
#if IPSEC
key_sa_routechange((struct sockaddr *)&icmpsrc);
int hlen;
struct icmp *icp;
struct route ro;
+ struct ip_out_args ipoa = { IFSCOPE_NONE };
+
+ if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.rcvif != NULL)
+ ipoa.ipoa_ifscope = m->m_pkthdr.rcvif->if_index;
hlen = IP_VHL_HL(ip->ip_vhl) << 2;
m->m_data += hlen;
}
#endif
bzero(&ro, sizeof ro);
- (void) ip_output(m, opts, &ro, 0, NULL, NULL);
- if (ro.ro_rt)
+ (void) ip_output(m, opts, &ro, IP_OUTARGS, NULL, &ipoa);
+ if (ro.ro_rt) {
rtfree(ro.ro_rt);
+ ro.ro_rt = NULL;
+ }
}
n_time
#endif
case IP_STRIPHDR:
case IP_RECVTTL:
+ case IP_BOUND_IF:
+#if CONFIG_FORCE_OUT_IFP
+ case IP_FORCE_OUT_IFP:
+#endif
error = rip_ctloutput(so, sopt);
break;
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
static int currentfrags = 0;
+#if CONFIG_SCOPEDROUTING
+int ip_doscopedroute = 1;
+#else
+int ip_doscopedroute = 0;
+#endif
+SYSCTL_INT(_net_inet_ip, OID_AUTO, scopedroute, CTLFLAG_RW,
+ &ip_doscopedroute, 0, "Enable IPv4 scoped routing");
+
/*
* XXX - Setting ip_checkinterface mostly implements the receive side of
* the Strong ES model described in RFC 1122, but since the routing table
panic("ip_input no HDR");
#endif
+#if DUMMYNET
if (args.rule) { /* dummynet already filtered us */
ip = mtod(m, struct ip *);
hlen = IP_VHL_HL(ip->ip_vhl) << 2;
inject_filter_ref = ipf_get_inject_filter(m);
goto iphack ;
}
+#endif /* DUMMYNET */
#endif /* IPFIREWALL */
/*
n_long dest;
struct in_addr pkt_dst;
struct ifnet *destifp;
- struct ifnet *rcvif = m->m_pkthdr.rcvif;
#if IPSEC
struct ifnet dummyifp;
#endif
- m->m_pkthdr.rcvif = NULL;
-
dest = 0;
/*
* Cache the destination address of the packet; this may be
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <sys/sysctl.h>
#include <net/if.h>
+#include <net/if_dl.h>
#include <net/route.h>
#include <netinet/in.h>
static int ip_setmoptions(struct sockopt *, struct ip_moptions **);
static void ip_out_cksum_stats(int, u_int32_t);
+static struct ifaddr *in_selectsrcif(struct ip *, struct route *, unsigned int);
+static void ip_bindif(struct inpcb *, unsigned int);
int ip_createmoptions(struct ip_moptions **imop);
int ip_addmembership(struct ip_moptions *imo, struct ip_mreq *mreq);
SYSCTL_INT(_net_inet_ip, OID_AUTO, forge_ce, CTLFLAG_RW,
&forge_ce, 0, "Forge ECN CE");
#endif /* DEBUG */
+
+static int ip_select_srcif_debug = 0;
+SYSCTL_INT(_net_inet_ip, OID_AUTO, select_srcif_debug, CTLFLAG_RW,
+ &ip_select_srcif_debug, 0, "log source interface selection debug info");
+
/*
* IP output. The packet in mbuf chain m contains a skeletal IP
* header (with len, off, ttl, proto, tos, src, dst).
struct route *ro,
int flags,
struct ip_moptions *imo,
- struct ifnet *ifp)
+ struct ip_out_args *ipoa)
{
int error;
- error = ip_output_list(m0, 0, opt, ro, flags, imo, ifp);
+ error = ip_output_list(m0, 0, opt, ro, flags, imo, ipoa);
return error;
}
struct route *ro,
int flags,
struct ip_moptions *imo,
-#if CONFIG_FORCE_OUT_IFP
- struct ifnet *pdp_ifp
-#else
- __unused struct ifnet *unused_ifp
-#endif
+ struct ip_out_args *ipoa
)
{
struct ip *ip, *mhip;
ipfilter_t inject_filter_ref = 0;
struct m_tag *tag;
struct route saved_route;
+ struct ip_out_args saved_ipoa;
struct mbuf * packetlist;
int pktcnt = 0;
-
+ unsigned int ifscope;
+ boolean_t select_srcif;
KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
args.eh = NULL;
args.rule = NULL;
args.divert_rule = 0; /* divert cookie */
+ args.ipoa = NULL;
/* Grab info from mtags prepended to the chain */
#if DUMMYNET
dst = dn_tag->dn_dst;
ifp = dn_tag->ifp;
flags = dn_tag->flags;
+ saved_ipoa = dn_tag->ipoa;
+ ipoa = &saved_ipoa;
m_tag_delete(m0, tag);
}
mtod(m, struct ip *)->ip_p);
#endif
+ /*
+ * Do not perform source interface selection when forwarding.
+ * At present the IP_OUTARGS flag implies a request for IP to
+ * perform source interface selection.
+ */
+ if (ip_doscopedroute &&
+ (flags & (IP_OUTARGS | IP_FORWARDING)) == IP_OUTARGS) {
+ select_srcif = TRUE;
+ ifscope = ipoa->ipoa_ifscope;
+ } else {
+ select_srcif = FALSE;
+ ifscope = IFSCOPE_NONE;
+ }
+
#if IPFIREWALL
if (args.rule != NULL) { /* dummynet already saw us */
ip = mtod(m, struct ip *);
rtfree_locked(ro->ro_rt);
ro->ro_rt = NULL;
}
- if (ro->ro_rt && ro->ro_rt->generation_id != route_generation)
+ /*
+ * If we're doing source interface selection, we may not
+ * want to use this route; only synch up the generation
+ * count otherwise.
+ */
+ if (!select_srcif && ro->ro_rt != NULL &&
+ ro->ro_rt->generation_id != route_generation)
ro->ro_rt->generation_id = route_generation;
}
if (ro->ro_rt == NULL) {
ifp = ia->ia_ifp;
ip->ip_ttl = 1;
isbroadcast = in_broadcast(dst->sin_addr, ifp);
+ } else if (IN_MULTICAST(ntohl(pkt_dst.s_addr)) &&
+ imo != NULL && imo->imo_multicast_ifp != NULL) {
+ /*
+ * Bypass the normal routing lookup for multicast
+ * packets if the interface is specified.
+ */
+ ifp = imo->imo_multicast_ifp;
+ isbroadcast = 0;
+ if (ia != NULL)
+ ifafree(&ia->ia_ifa);
+
+ /* Could use IFP_TO_IA instead but rt_mtx is already held */
+ for (ia = TAILQ_FIRST(&in_ifaddrhead);
+ ia != NULL && ia->ia_ifp != ifp;
+ ia = TAILQ_NEXT(ia, ia_link))
+ continue;
+
+ if (ia != NULL)
+ ifaref(&ia->ia_ifa);
} else {
+ boolean_t cloneok = FALSE;
+ /*
+ * Perform source interface selection; the source IP address
+ * must belong to one of the addresses of the interface used
+ * by the route. For performance reasons, do this only if
+ * there is no route, or if the routing table has changed,
+ * or if we haven't done source interface selection on this
+ * route (for this PCB instance) before.
+ */
+ if (select_srcif && ip->ip_src.s_addr != INADDR_ANY &&
+ (ro->ro_rt == NULL ||
+ ro->ro_rt->generation_id != route_generation ||
+ !(ro->ro_flags & ROF_SRCIF_SELECTED))) {
+ struct ifaddr *ifa;
-#if CONFIG_FORCE_OUT_IFP
- /* Check if this packet should be forced out a specific interface */
- if (ro->ro_rt == 0 && pdp_ifp != NULL) {
- pdp_context_route_locked(pdp_ifp, ro);
-
- if (ro->ro_rt == NULL) {
- OSAddAtomic(1, (UInt32*)&ipstat.ips_noroute);
- error = EHOSTUNREACH;
+ /* Find the source interface */
+ ifa = in_selectsrcif(ip, ro, ifscope);
+
+ /*
+ * If the source address is spoofed (in the case
+ * of IP_RAWOUTPUT), or if this is destined for
+ * local/loopback, just let it go out using the
+ * interface of the route. Otherwise, there's no
+ * interface having such an address, so bail out.
+ */
+ if (ifa == NULL && !(flags & IP_RAWOUTPUT) &&
+ ifscope != lo_ifp->if_index) {
+ error = EADDRNOTAVAIL;
lck_mtx_unlock(rt_mtx);
goto bad;
}
+
+ /*
+ * If the caller didn't explicitly specify the scope,
+ * pick it up from the source interface. If the cached
+ * route was wrong and was blown away as part of source
+ * interface selection, don't mask out RTF_PRCLONING
+ * since that route may have been allocated by the ULP,
+ * unless the IP header was created by the caller or
+ * the destination is IPv4 LLA. The check for the
+ * latter is needed because IPv4 LLAs are never scoped
+ * in the current implementation, and we don't want to
+ * replace the resolved IPv4 LLA route with one whose
+ * gateway points to that of the default gateway on
+ * the primary interface of the system.
+ */
+ if (ifa != NULL) {
+ if (ifscope == IFSCOPE_NONE)
+ ifscope = ifa->ifa_ifp->if_index;
+ ifafree(ifa);
+ cloneok = (!(flags & IP_RAWOUTPUT) &&
+ !(IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))));
+ }
}
-#endif
-
+
/*
* If this is the case, we probably don't want to allocate
* a protocol-cloned route since we didn't get one from the
* the link layer, as this is probably required in all cases
* for correct operation (as it is for ARP).
*/
-
- if (ro->ro_rt == 0) {
+ if (ro->ro_rt == NULL) {
unsigned long ign = RTF_PRCLONING;
/*
* We make an exception here: if the destination
* that allocate a route and those that don't. The
* RTF_BROADCAST route is important since we'd want
* to send out undirected IP broadcast packets using
- * link-level broadcast address.
+ * link-level broadcast address. Another exception
+ * is for ULP-created routes that got blown away by
+ * source interface selection (see above).
*
- * This exception will no longer be necessary when
+ * These exceptions will no longer be necessary when
* the RTF_PRCLONING scheme is no longer present.
*/
- if (dst->sin_addr.s_addr == INADDR_BROADCAST)
+ if (cloneok || dst->sin_addr.s_addr == INADDR_BROADCAST)
ign &= ~RTF_PRCLONING;
- rtalloc_ign_locked(ro, ign);
+ rtalloc_scoped_ign_locked(ro, ign, ifscope);
}
- if (ro->ro_rt == 0) {
+
+ if (ro->ro_rt == NULL) {
OSAddAtomic(1, (SInt32*)&ipstat.ips_noroute);
error = EHOSTUNREACH;
lck_mtx_unlock(rt_mtx);
goto bad;
}
-
+
if (ia)
ifafree(&ia->ia_ifa);
ia = ifatoia(ro->ro_rt->rt_ifa);
}
#if DUMMYNET
if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) {
- /*
- * pass the pkt to dummynet. Need to include
- * pipe number, m, ifp, ro, dst because these are
- * not recomputed in the next pass.
- * All other parameters have been already used and
- * so they are not needed anymore.
- * XXX note: if the ifp or ro entry are deleted
- * while a pkt is in dummynet, we are in trouble!
- */
- args.ro = ro;
- args.dst = dst;
- args.flags = flags;
-
- error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT,
- &args);
- goto done;
+ /*
+ * pass the pkt to dummynet. Need to include
+ * pipe number, m, ifp, ro, dst because these are
+ * not recomputed in the next pass.
+ * All other parameters have been already used and
+ * so they are not needed anymore.
+ * XXX note: if the ifp or ro entry are deleted
+ * while a pkt is in dummynet, we are in trouble!
+ */
+ args.ro = ro;
+ args.dst = dst;
+ args.flags = flags;
+ if (flags & IP_OUTARGS)
+ args.ipoa = ipoa;
+
+ error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT,
+ &args);
+ goto done;
}
#endif /* DUMMYNET */
#if IPDIVERT
break;
#undef OPTSET
-#if CONFIG_FORCE_OUT_IFP
+#if CONFIG_FORCE_OUT_IFP
+ /*
+ * Apple private interface, similar to IP_BOUND_IF, except
+ * that the parameter is a NULL-terminated string containing
+ * the name of the network interface; an emptry string means
+ * unbind. Applications are encouraged to use IP_BOUND_IF
+ * instead, as that is the current "official" API.
+ */
case IP_FORCE_OUT_IFP: {
- char ifname[IFNAMSIZ];
- ifnet_t ifp;
-
+ char ifname[IFNAMSIZ];
+ unsigned int ifscope;
+
+ /* This option is settable only for IPv4 */
+ if (!(inp->inp_vflag & INP_IPV4)) {
+ error = EINVAL;
+ break;
+ }
+
/* Verify interface name parameter is sane */
if (sopt->sopt_valsize > sizeof(ifname)) {
error = EINVAL;
break;
}
-
+
/* Copy the interface name */
if (sopt->sopt_valsize != 0) {
- error = sooptcopyin(sopt, ifname, sizeof(ifname), sopt->sopt_valsize);
+ error = sooptcopyin(sopt, ifname,
+ sizeof (ifname), sopt->sopt_valsize);
if (error)
break;
}
-
- if (sopt->sopt_valsize == 0 || ifname[0] == 0) {
- // Set pdp_ifp to NULL
- inp->pdp_ifp = NULL;
-
- // Flush the route
- if (inp->inp_route.ro_rt) {
- rtfree(inp->inp_route.ro_rt);
- inp->inp_route.ro_rt = NULL;
+
+ if (sopt->sopt_valsize == 0 || ifname[0] == NULL) {
+ /* Unbind this socket from any interface */
+ ifscope = IFSCOPE_NONE;
+ } else {
+ ifnet_t ifp;
+
+ /* Verify name is NULL terminated */
+ if (ifname[sopt->sopt_valsize - 1] != NULL) {
+ error = EINVAL;
+ break;
}
-
- break;
- }
-
- /* Verify name is NULL terminated */
- if (ifname[sopt->sopt_valsize - 1] != 0) {
- error = EINVAL;
- break;
- }
-
- if (ifnet_find_by_name(ifname, &ifp) != 0) {
- error = ENXIO;
- break;
- }
-
- /* Won't actually free. Since we don't release this later, we should do it now. */
- ifnet_release(ifp);
-
- /* This only works for point-to-point interfaces */
- if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
- error = ENOTSUP;
- break;
+
+ /* Bail out if given bogus interface name */
+ if (ifnet_find_by_name(ifname, &ifp) != 0) {
+ error = ENXIO;
+ break;
+ }
+
+ /* Bind this socket to this interface */
+ ifscope = ifp->if_index;
+
+ /*
+ * Won't actually free; since we don't release
+ * this later, we should do it now.
+ */
+ ifnet_release(ifp);
}
-
- inp->pdp_ifp = ifp;
+ ip_bindif(inp, ifscope);
}
break;
#endif
}
#endif /* TRAFFIC_MGT */
+ /*
+ * On a multihomed system, scoped routing can be used to
+ * restrict the source interface used for sending packets.
+ * The socket option IP_BOUND_IF binds a particular AF_INET
+ * socket to an interface such that data sent on the socket
+ * is restricted to that interface. This is unlike the
+ * SO_DONTROUTE option where the routing table is bypassed;
+ * therefore it allows for a greater flexibility and control
+ * over the system behavior, and does not place any restriction
+ * on the destination address type (e.g. unicast, multicast,
+ * or broadcast if applicable) or whether or not the host is
+ * directly reachable. Note that in the multicast transmit
+ * case, IP_MULTICAST_IF takes precedence over IP_BOUND_IF,
+ * since the former practically bypasses the routing table;
+ * in this case, IP_BOUND_IF sets the default interface used
+ * for sending multicast packets in the absence of an explicit
+ * transmit interface set via IP_MULTICAST_IF.
+ */
+ case IP_BOUND_IF:
+ /* This option is settable only for IPv4 */
+ if (!(inp->inp_vflag & INP_IPV4)) {
+ error = EINVAL;
+ break;
+ }
+
+ error = sooptcopyin(sopt, &optval, sizeof (optval),
+ sizeof (optval));
+
+ if (error)
+ break;
+
+ ip_bindif(inp, optval);
+ break;
+
default:
error = ENOPROTOOPT;
break;
}
#endif /* TRAFFIC_MGT */
+ case IP_BOUND_IF:
+ if (inp->inp_flags & INP_BOUND_IF)
+ optval = inp->inp_boundif;
+ error = sooptcopyout(sopt, &optval, sizeof (optval));
+ break;
+
default:
error = ENOPROTOOPT;
break;
m_freem(copym);
}
}
+
+/*
+ * Given a source IP address (and route, if available), determine the best
+ * interface to send the packet from.
+ */
+static struct ifaddr *
+in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope)
+{
+ struct ifaddr *ifa = NULL;
+ struct sockaddr src = { sizeof (struct sockaddr_in), AF_INET, { 0, } };
+ struct ifnet *rt_ifp;
+ char ip_src[16], ip_dst[16];
+
+ if (ip_select_srcif_debug) {
+ (void) inet_ntop(AF_INET, &ip->ip_src.s_addr, ip_src,
+ sizeof (ip_src));
+ (void) inet_ntop(AF_INET, &ip->ip_dst.s_addr, ip_dst,
+ sizeof (ip_dst));
+ }
+
+ lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED);
+
+ ((struct sockaddr_in *)&src)->sin_addr.s_addr = ip->ip_src.s_addr;
+ rt_ifp = (ro->ro_rt != NULL) ? ro->ro_rt->rt_ifp : NULL;
+
+ /*
+ * Given the source IP address, find a suitable source interface
+ * to use for transmission; if the caller has specified a scope,
+ * optimize the search by looking at the addresses only for that
+ * interface. This is still suboptimal, however, as we need to
+ * traverse the per-interface list.
+ */
+ if (ifscope != IFSCOPE_NONE || ro->ro_rt != NULL) {
+ unsigned int scope = ifscope;
+
+ /*
+ * If no scope is specified and the route is stale (pointing
+ * to a defunct interface) use the current primary interface;
+ * this happens when switching between interfaces configured
+ * with the same IP address. Otherwise pick up the scope
+ * information from the route; the ULP may have looked up a
+ * correct route and we just need to verify it here and mark
+ * it with the ROF_SRCIF_SELECTED flag below.
+ */
+ if (scope == IFSCOPE_NONE) {
+ scope = rt_ifp->if_index;
+ if (scope != get_primary_ifscope() &&
+ ro->ro_rt->generation_id != route_generation)
+ scope = get_primary_ifscope();
+ }
+
+ ifa = ifa_ifwithaddr_scoped(&src, scope);
+
+ if (ip_select_srcif_debug && ifa != NULL) {
+ if (ro->ro_rt != NULL) {
+ printf("%s->%s ifscope %d->%d ifa_if %s%d "
+ "ro_if %s%d\n", ip_src, ip_dst, ifscope,
+ scope, ifa->ifa_ifp->if_name,
+ ifa->ifa_ifp->if_unit, rt_ifp->if_name,
+ rt_ifp->if_unit);
+ } else {
+ printf("%s->%s ifscope %d->%d ifa_if %s%d\n",
+ ip_src, ip_dst, ifscope, scope,
+ ifa->ifa_ifp->if_name,
+ ifa->ifa_ifp->if_unit);
+ }
+ }
+ }
+
+ /*
+ * Slow path; search for an interface having the corresponding source
+ * IP address if the scope was not specified by the caller, and:
+ *
+ * 1) There currently isn't any route, or,
+ * 2) The interface used by the route does not own that source
+ * IP address; in this case, the route will get blown away
+ * and we'll do a more specific scoped search using the newly
+ * found interface.
+ */
+ if (ifa == NULL && ifscope == IFSCOPE_NONE) {
+ ifa = ifa_ifwithaddr(&src);
+
+ if (ip_select_srcif_debug && ifa != NULL) {
+ printf("%s->%s ifscope %d ifa_if %s%d\n",
+ ip_src, ip_dst, ifscope, ifa->ifa_ifp->if_name,
+ ifa->ifa_ifp->if_unit);
+ }
+ }
+
+ /*
+ * If there is a non-loopback route with the wrong interface, or if
+ * there is no interface configured with such an address, blow it
+ * away. Except for local/loopback, we look for one with a matching
+ * interface scope/index.
+ */
+ if (ro->ro_rt != NULL &&
+ (ifa == NULL || (ifa->ifa_ifp != rt_ifp && rt_ifp != lo_ifp) ||
+ !(ro->ro_rt->rt_flags & RTF_UP))) {
+ if (ip_select_srcif_debug) {
+ if (ifa != NULL) {
+ printf("%s->%s ifscope %d ro_if %s%d != "
+ "ifa_if %s%d (cached route cleared)\n",
+ ip_src, ip_dst, ifscope, rt_ifp->if_name,
+ rt_ifp->if_unit, ifa->ifa_ifp->if_name,
+ ifa->ifa_ifp->if_unit);
+ } else {
+ printf("%s->%s ifscope %d ro_if %s%d "
+ "(no ifa_if found)\n",
+ ip_src, ip_dst, ifscope, rt_ifp->if_name,
+ rt_ifp->if_unit);
+ }
+ }
+
+ rtfree_locked(ro->ro_rt);
+ ro->ro_rt = NULL;
+ ro->ro_flags &= ~ROF_SRCIF_SELECTED;
+
+ /*
+ * If the destination is IPv4 LLA and the route's interface
+ * doesn't match the source interface, then the source IP
+ * address is wrong; it most likely belongs to the primary
+ * interface associated with the IPv4 LL subnet. Drop the
+ * packet rather than letting it go out and return an error
+ * to the ULP. This actually applies not only to IPv4 LL
+ * but other shared subnets; for now we explicitly test only
+ * for the former case and save the latter for future.
+ */
+ if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr)) &&
+ !IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)) && ifa != NULL) {
+ ifafree(ifa);
+ ifa = NULL;
+ }
+ }
+
+ if (ip_select_srcif_debug && ifa == NULL) {
+ printf("%s->%s ifscope %d (neither ro_if/ifa_if found)\n",
+ ip_src, ip_dst, ifscope);
+ }
+
+ /*
+ * If there is a route, mark it accordingly. If there isn't one,
+ * we'll get here again during the next transmit (possibly with a
+ * route) and the flag will get set at that point. For IPv4 LLA
+ * destination, mark it only if the route has been fully resolved;
+ * otherwise we want to come back here again when the route points
+ * to the interface over which the ARP reply arrives on.
+ */
+ if (ro->ro_rt != NULL && (!IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr)) ||
+ (ro->ro_rt->rt_gateway->sa_family == AF_LINK &&
+ SDL(ro->ro_rt->rt_gateway)->sdl_alen != 0))) {
+ ro->ro_flags |= ROF_SRCIF_SELECTED;
+ ro->ro_rt->generation_id = route_generation;
+ }
+
+ return (ifa);
+}
+
+/*
+ * Handler for setting IP_FORCE_OUT_IFP or IP_BOUND_IF socket option.
+ */
+static void
+ip_bindif(struct inpcb *inp, unsigned int ifscope)
+{
+ /*
+ * A zero interface scope value indicates an "unbind".
+ * Otherwise, take in whatever value the app desires;
+ * the app may already know the scope (or force itself
+ * to such a scope) ahead of time before the interface
+ * gets attached. It doesn't matter either way; any
+ * route lookup from this point on will require an
+ * exact match for the embedded interface scope.
+ */
+ inp->inp_boundif = ifscope;
+ if (inp->inp_boundif == IFSCOPE_NONE)
+ inp->inp_flags &= ~INP_BOUND_IF;
+ else
+ inp->inp_flags |= INP_BOUND_IF;
+
+ lck_mtx_lock(rt_mtx);
+ /* Blow away any cached route in the PCB */
+ if (inp->inp_route.ro_rt != NULL) {
+ rtfree_locked(inp->inp_route.ro_rt);
+ inp->inp_route.ro_rt = NULL;
+ }
+ lck_mtx_unlock(rt_mtx);
+}
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#define IP_NOIPSEC 0x4 /* No IPSec processing */
#define IP_ROUTETOIF SO_DONTROUTE /* bypass routing tables (0x0010) */
#define IP_ALLOWBROADCAST SO_BROADCAST /* can send broadcast packets (0x0020) */
+#define IP_OUTARGS 0x100 /* has ancillary output info */
struct ip;
struct inpcb;
struct route;
struct sockopt;
+/*
+ * Extra information passed to ip_output when IP_OUTARGS is set.
+ */
+struct ip_out_args {
+ unsigned int ipoa_ifscope; /* interface scope */
+};
+
extern struct ipstat ipstat;
#if !defined(RANDOM_IP_ID) || RANDOM_IP_ID == 0
extern u_short ip_id; /* ip packet ctr, for ids */
extern u_long (*ip_mcast_src)(int);
extern int rsvp_on;
extern struct pr_usrreqs rip_usrreqs;
+extern int ip_doscopedroute;
int ip_ctloutput(struct socket *, struct sockopt *sopt);
void ip_drain(void);
void ip_init(void) __attribute__((section("__TEXT, initcode")));
extern int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
struct ip_moptions *);
-int ip_output(struct mbuf *,
- struct mbuf *, struct route *, int, struct ip_moptions *, struct ifnet *);
-int ip_output_list(struct mbuf *, int,
- struct mbuf *, struct route *, int, struct ip_moptions *, struct ifnet *);
+extern int ip_output(struct mbuf *, struct mbuf *, struct route *, int,
+ struct ip_moptions *, struct ip_out_args *);
+extern int ip_output_list(struct mbuf *, int, struct mbuf *, struct route *,
+ int, struct ip_moptions *, struct ip_out_args *);
struct in_ifaddr *
ip_rtaddr(struct in_addr, struct route *);
void ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *,
error = ip_output(m, NULL, &ro, IP_ALLOWBROADCAST | IP_RAWOUTPUT, imo, NULL);
/* Release the route */
- if (ro.ro_rt)
+ if (ro.ro_rt) {
rtfree(ro.ro_rt);
+ ro.ro_rt = NULL;
+ }
return error;
}
error = ip6_output(m, NULL, &ro, 0, im6o, NULL, 0);
/* Release the route */
- if (ro.ro_rt)
+ if (ro.ro_rt) {
rtfree(ro.ro_rt);
+ ro.ro_rt = NULL;
+ }
return error;
}
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
register struct ip *ip;
register struct inpcb *inp = sotoinpcb(so);
int flags = (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST;
+ struct ip_out_args ipoa;
+
+ /* If socket was bound to an ifindex, tell ip_output about it */
+ ipoa.ipoa_ifscope = (inp->inp_flags & INP_BOUND_IF) ?
+ inp->inp_boundif : IFSCOPE_NONE;
+ flags |= IP_OUTARGS;
/*
* If the user handed us a complete IP packet, use it.
#if CONFIG_IP_EDGEHOLE
ip_edgehole_mbuf_tag(inp, m);
#endif
-
-#if CONFIG_FORCE_OUT_IFP
- return (ip_output_list(m, 0, inp->inp_options, &inp->inp_route, flags,
- inp->inp_moptions, inp->pdp_ifp));
-#else
- return (ip_output_list(m, 0, inp->inp_options, &inp->inp_route, flags,
- inp->inp_moptions, NULL));
-#endif
+ return (ip_output(m, inp->inp_options, &inp->inp_route, flags,
+ inp->inp_moptions, &ipoa));
}
#if IPFIREWALL
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
struct inpcbinfo tcbinfo;
static void tcp_dooptions(struct tcpcb *,
- u_char *, int, struct tcphdr *, struct tcpopt *);
+ u_char *, int, struct tcphdr *, struct tcpopt *, unsigned int);
static void tcp_pulloutofband(struct socket *,
struct tcphdr *, struct mbuf *, int);
static int tcp_reass(struct tcpcb *, struct tcphdr *, int *,
#endif
struct m_tag *fwd_tag;
u_char ip_ecn = IPTOS_ECN_NOTECT;
+ unsigned int ifscope;
+
+ /*
+ * Record the interface where this segment arrived on; this does not
+ * affect normal data output (for non-detached TCP) as it provides a
+ * hint about which route and interface to use for sending in the
+ * absence of a PCB, when scoped routing (and thus source interface
+ * selection) are enabled.
+ */
+ if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.rcvif != NULL)
+ ifscope = m->m_pkthdr.rcvif->if_index;
+ else
+ ifscope = IFSCOPE_NONE;
/* Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. */
fwd_tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD, NULL);
ip->ip_dst, th->th_dport, 1, m->m_pkthdr.rcvif);
}
+ /*
+ * Use the interface scope information from the PCB for outbound
+ * segments. If the PCB isn't present and if scoped routing is
+ * enabled, tcp_respond will use the scope of the interface where
+ * the segment arrived on.
+ */
+ if (inp != NULL && (inp->inp_flags & INP_BOUND_IF))
+ ifscope = inp->inp_boundif;
#if IPSEC
if (ipsec_bypass == 0) {
#if INET6
struct inpcb *oinp = sotoinpcb(so);
#endif /* INET6 */
int ogencnt = so->so_gencnt;
+ unsigned int head_ifscope;
+
+ /* Get listener's bound-to-interface, if any */
+ head_ifscope = (inp->inp_flags & INP_BOUND_IF) ?
+ inp->inp_boundif : IFSCOPE_NONE;
#if !IPSEC
/*
*/
dropsocket++;
inp = (struct inpcb *)so->so_pcb;
+
+ /*
+ * Inherit INP_BOUND_IF from listener; testing if
+ * head_ifscope is non-zero is sufficient, since it
+ * can only be set to a non-zero value earlier if
+ * the listener has such a flag set.
+ */
+#if INET6
+ if (head_ifscope != IFSCOPE_NONE && !isipv6) {
+#else
+ if (head_ifscope != IFSCOPE_NONE) {
+#endif /* INET6 */
+ inp->inp_flags |= INP_BOUND_IF;
+ inp->inp_boundif = head_ifscope;
+ }
#if INET6
if (isipv6)
inp->in6p_laddr = ip6->ip6_dst;
* else do it below (after getting remote address).
*/
if (tp->t_state != TCPS_LISTEN && optp)
- tcp_dooptions(tp, optp, optlen, th, &to);
+ tcp_dooptions(tp, optp, optlen, th, &to, ifscope);
if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
if (to.to_flags & TOF_SCALE) {
tp->ts_recent_age = tcp_now;
}
if (to.to_flags & TOF_MSS)
- tcp_mss(tp, to.to_mss);
+ tcp_mss(tp, to.to_mss, ifscope);
if (tp->sack_enable) {
if (!(to.to_flags & TOF_SACK))
tp->sack_enable = 0;
tp->ts_recent = to.to_tsval;
}
+ /* Force acknowledgment if we received a FIN */
+
+ if (thflags & TH_FIN)
+ tp->t_flags |= TF_ACKNOW;
+
if (tlen == 0) {
if (SEQ_GT(th->th_ack, tp->snd_una) &&
SEQ_LEQ(th->th_ack, tp->snd_max) &&
FREE(sin, M_SONAME);
}
- tcp_dooptions(tp, optp, optlen, th, &to);
+ tcp_dooptions(tp, optp, optlen, th, &to, ifscope);
if (tp->sack_enable) {
if (!(to.to_flags & TOF_SACK))
soisdisconnected(so);
}
tp->t_state = TCPS_FIN_WAIT_2;
- goto drop;
+ /* fall through and make sure we also recognize data ACKed with the FIN */
}
+ tp->t_flags |= TF_ACKNOW;
break;
/*
add_to_time_wait(tp);
soisdisconnected(so);
}
+ tp->t_flags |= TF_ACKNOW;
break;
/*
* case PRU_RCVD). If a FIN has already been received on this
* connection then we just ignore the text.
*/
- if ((tlen || (thflags&TH_FIN)) &&
+ if ((tlen || (thflags & TH_FIN)) &&
TCPS_HAVERCVDFIN(tp->t_state) == 0) {
tcp_seq save_start = th->th_seq;
tcp_seq save_end = th->th_seq + tlen;
if (thflags & TH_ACK)
/* mtod() below is safe as long as hdr dropping is delayed */
tcp_respond(tp, mtod(m, void *), th, m, (tcp_seq)0, th->th_ack,
- TH_RST, m->m_pkthdr.rcvif);
+ TH_RST, ifscope);
else {
if (thflags & TH_SYN)
tlen++;
/* mtod() below is safe as long as hdr dropping is delayed */
tcp_respond(tp, mtod(m, void *), th, m, th->th_seq+tlen,
- (tcp_seq)0, TH_RST|TH_ACK, m->m_pkthdr.rcvif);
+ (tcp_seq)0, TH_RST|TH_ACK, ifscope);
}
/* destroy temporarily created socket */
if (dropsocket) {
}
static void
-tcp_dooptions(tp, cp, cnt, th, to)
+tcp_dooptions(tp, cp, cnt, th, to, input_ifscope)
/*
* Parse TCP options and place in tcpopt.
*/
int cnt;
struct tcphdr *th;
struct tcpopt *to;
+ unsigned int input_ifscope;
{
u_short mss = 0;
int opt, optlen;
}
}
if (th->th_flags & TH_SYN)
- tcp_mss(tp, mss); /* sets t_maxseg */
+ tcp_mss(tp, mss, input_ifscope); /* sets t_maxseg */
}
/*
*
*/
void
-tcp_mss(tp, offer)
+tcp_mss(tp, offer, input_ifscope)
struct tcpcb *tp;
int offer;
+ unsigned int input_ifscope;
{
register struct rtentry *rt;
struct ifnet *ifp;
else
#endif /* INET6 */
{
- rt = tcp_rtlookup(inp);
+ rt = tcp_rtlookup(inp, input_ifscope);
if (rt && (rt->rt_gateway->sa_family == AF_LINK ||
rt->rt_ifp->if_flags & IFF_LOOPBACK))
isnetlocal = TRUE;
rt = tcp_rtlookup6(tp->t_inpcb);
else
#endif /* INET6 */
- rt = tcp_rtlookup(tp->t_inpcb);
+ rt = tcp_rtlookup(tp->t_inpcb, IFSCOPE_NONE);
if (rt == NULL) {
lck_mtx_unlock(rt_mtx);
return (
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
int error = 0;
boolean_t chain;
boolean_t unlocked = FALSE;
+ struct inpcb *inp = tp->t_inpcb;
+ struct ip_out_args ipoa;
+
+ /* If socket was bound to an ifindex, tell ip_output about it */
+ ipoa.ipoa_ifscope = (inp->inp_flags & INP_BOUND_IF) ?
+ inp->inp_boundif : IFSCOPE_NONE;
+ flags |= IP_OUTARGS;
/* Make sure ACK/DELACK conditions are cleared before
* we unlock the socket.
*/
cnt = 0;
}
-#if CONFIG_FORCE_OUT_IFP
- error = ip_output_list(pkt, cnt, opt, &tp->t_inpcb->inp_route,
- flags, 0, tp->t_inpcb->pdp_ifp);
-#else
- error = ip_output_list(pkt, cnt, opt, &tp->t_inpcb->inp_route,
- flags, 0, NULL);
-#endif
+ error = ip_output_list(pkt, cnt, opt, &inp->inp_route,
+ flags, 0, &ipoa);
if (chain || error) {
/*
* If we sent down a chain then we are done since
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
tcp_seq ack,
tcp_seq seq,
int flags,
-#if CONFIG_FORCE_OUT_IFP
- ifnet_t ifp
-#else
- __unused ifnet_t ifp
-#endif
+ unsigned int ifscope
)
{
register int tlen;
struct ip6_hdr *ip6;
int isipv6;
#endif /* INET6 */
- int ipflags = 0;
#if INET6
isipv6 = IP_VHL_V(((struct ip *)ipgen)->ip_vhl) == 6;
#endif
#if INET6
if (isipv6) {
- (void)ip6_output(m, NULL, ro6, ipflags, NULL, NULL, 0);
+ (void)ip6_output(m, NULL, ro6, 0, NULL, NULL, 0);
if (ro6 == &sro6 && ro6->ro_rt) {
rtfree(ro6->ro_rt);
ro6->ro_rt = NULL;
} else
#endif /* INET6 */
{
-#if CONFIG_FORCE_OUT_IFP
- ifp = (tp && tp->t_inpcb) ? tp->t_inpcb->pdp_ifp :
- (ifp && (ifp->if_flags & IFF_POINTOPOINT) != 0) ? ifp : NULL;
-#endif
- (void) ip_output_list(m, 0, NULL, ro, ipflags, NULL, ifp);
+ struct ip_out_args ipoa = { ifscope };
+
+ (void) ip_output(m, NULL, ro, IP_OUTARGS, NULL, &ipoa);
+
if (ro == &sro && ro->ro_rt) {
rtfree(ro->ro_rt);
ro->ro_rt = NULL;
rt = tcp_rtlookup6(inp);
else
#endif /* INET6 */
- rt = tcp_rtlookup(inp);
+ rt = tcp_rtlookup(inp, IFSCOPE_NONE);
if (!rt || !rt->rt_rmx.rmx_mtu) {
tp->t_maxopd = tp->t_maxseg =
#if INET6
* to get the interface MTU.
*/
struct rtentry *
-tcp_rtlookup(inp)
+tcp_rtlookup(inp, input_ifscope)
struct inpcb *inp;
+ unsigned int input_ifscope;
{
struct route *ro;
struct rtentry *rt;
if (rt == NULL || !(rt->rt_flags & RTF_UP) || rt->generation_id != route_generation) {
/* No route yet, so try to acquire one */
if (inp->inp_faddr.s_addr != INADDR_ANY) {
+ unsigned int ifscope;
+
ro->ro_dst.sa_family = AF_INET;
ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
inp->inp_faddr;
- rtalloc_ign_locked(ro, 0UL);
+
+ /*
+ * If the socket was bound to an interface, then
+ * the bound-to-interface takes precedence over
+ * the inbound interface passed in by the caller
+ * (if we get here as part of the output path then
+ * input_ifscope is IFSCOPE_NONE).
+ */
+ ifscope = (inp->inp_flags & INP_BOUND_IF) ?
+ inp->inp_boundif : input_ifscope;
+
+ rtalloc_scoped_ign_locked(ro, 0UL, ifscope);
rt = ro->ro_rt;
}
}
rt = tcp_rtlookup6(inp);
else
#endif /* INET6 */
- rt = tcp_rtlookup(inp);
+ rt = tcp_rtlookup(inp, IFSCOPE_NONE);
/* Make sure this is a host route and is up. */
if (rt == NULL ||
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
tcpstat.tcps_keepprobe++;
t_template = tcp_maketemplate(tp);
if (t_template) {
+ unsigned int ifscope;
+
+ if (tp->t_inpcb->inp_flags & INP_BOUND_IF)
+ ifscope = tp->t_inpcb->inp_boundif;
+ else
+ ifscope = IFSCOPE_NONE;
+
tcp_respond(tp, t_template->tt_ipgen,
&t_template->tt_t, (struct mbuf *)NULL,
- tp->rcv_nxt, tp->snd_una - 1, 0, NULL);
+ tp->rcv_nxt, tp->snd_una - 1, 0, ifscope);
(void) m_free(dtom(t_template));
}
tp->t_timer[TCPT_KEEP] = tcp_keepintvl;
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
if (error)
goto out;
tp->snd_wnd = TTCP_CLIENT_SND_WND;
- tcp_mss(tp, -1);
+ tcp_mss(tp, -1, IFSCOPE_NONE);
}
if (flags & PRUS_EOF) {
if (error)
goto out;
tp->snd_wnd = TTCP_CLIENT_SND_WND;
- tcp_mss(tp, -1);
+ tcp_mss(tp, -1, IFSCOPE_NONE);
}
tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
tp->t_force = 1;
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
tcp_gettaocache(struct inpcb *);
void tcp_init(void) __attribute__((section("__TEXT, initcode")));
void tcp_input(struct mbuf *, int);
-void tcp_mss(struct tcpcb *, int);
+void tcp_mss(struct tcpcb *, int, unsigned int);
int tcp_mssopt(struct tcpcb *);
void tcp_drop_syn_sent(struct inpcb *, int);
void tcp_mtudisc(struct inpcb *, int);
int tcp_output(struct tcpcb *);
void tcp_quench(struct inpcb *, int);
void tcp_respond(struct tcpcb *, void *,
- struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int, ifnet_t);
-struct rtentry *
- tcp_rtlookup(struct inpcb *);
+ struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int,
+ unsigned int);
+struct rtentry *tcp_rtlookup(struct inpcb *, unsigned int);
void tcp_setpersist(struct tcpcb *);
void tcp_slowtimo(void);
struct tcptemp *
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
struct sockaddr_in *ifaddr;
int error = 0, udp_dodisconnect = 0;
struct socket *so = inp->inp_socket;
- int soopts;
+ int soopts = 0;
struct mbuf *inpopts;
struct ip_moptions *mopts;
struct route ro;
+ struct ip_out_args ipoa;
KERNEL_DEBUG(DBG_FNC_UDP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
goto release;
}
+ lck_mtx_assert(inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
+
+ /* If socket was bound to an ifindex, tell ip_output about it */
+ ipoa.ipoa_ifscope = (inp->inp_flags & INP_BOUND_IF) ?
+ inp->inp_boundif : IFSCOPE_NONE;
+ soopts |= IP_OUTARGS;
+
/* If there was a routing change, discard cached route and check
* that we have a valid source address.
* Reacquire a new source address if INADDR_ANY was specified
*/
-
-#if 1
- lck_mtx_assert(inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
-#endif
-
if (inp->inp_route.ro_rt && inp->inp_route.ro_rt->generation_id != route_generation) {
if (ifa_foraddr(inp->inp_laddr.s_addr) == 0) { /* src address is gone */
if (inp->inp_flags & INP_INADDR_ANY)
m->m_pkthdr.socket_id = get_socket_id(inp->inp_socket);
inpopts = inp->inp_options;
- soopts = (inp->inp_socket->so_options & (SO_DONTROUTE | SO_BROADCAST));
+ soopts |= (inp->inp_socket->so_options & (SO_DONTROUTE | SO_BROADCAST));
mopts = inp->inp_moptions;
/* We don't want to cache the route for non-connected UDP */
socket_unlock(so, 0);
/* XXX jgraessley please look at XXX */
error = ip_output_list(m, 0, inpopts,
- udp_dodisconnect ? &ro : &inp->inp_route, soopts, mopts, NULL);
+ udp_dodisconnect ? &ro : &inp->inp_route, soopts, mopts, &ipoa);
socket_lock(so, 0);
if (udp_dodisconnect) {
/* Discard the cached route, if there is one */
- if (ro.ro_rt != NULL)
+ if (ro.ro_rt != NULL) {
rtfree(ro.ro_rt);
+ ro.ro_rt = NULL;
+ }
in_pcbdisconnect(inp);
inp->inp_laddr = origladdr; /* XXX rehash? */
}
if (!validated)
return;
+ /*
+ * In case the suggested mtu is less than IPV6_MMTU, we
+ * only need to remember that it was for above mentioned
+ * "alwaysfrag" case.
+ * Try to be as close to the spec as possible.
+ */
+ if (mtu < IPV6_MMTU)
+ mtu = IPV6_MMTU - 8;
+
+
bzero(&sin6, sizeof(sin6));
sin6.sin6_family = PF_INET6;
sin6.sin6_len = sizeof(struct sockaddr_in6);
*/
bzero(&ro, sizeof(ro));
src = in6_selectsrc(&sa6_src, NULL, NULL, &ro, NULL, &src_storage, &e);
- if (ro.ro_rt)
+ if (ro.ro_rt) {
rtfree(ro.ro_rt); /* XXX: we could use this */
+ ro.ro_rt = NULL;
+ }
if (src == NULL) {
nd6log((LOG_DEBUG,
"icmp6_reflect: source can't be determined: "
bcopy(&redtgt6, &sgw.sin6_addr, sizeof(struct in6_addr));
bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr));
- rtredirect((struct sockaddr *)&sdst, (struct sockaddr *)&sgw,
- (struct sockaddr *)NULL, RTF_GATEWAY | RTF_HOST,
- (struct sockaddr *)&ssrc,
- (struct rtentry **)NULL);
+ rtredirect(ifp, (struct sockaddr *)&sdst,
+ (struct sockaddr *)&sgw, NULL, RTF_GATEWAY | RTF_HOST,
+ (struct sockaddr *)&ssrc, NULL);
}
/* finally update cached route in each socket via pfctlinput */
{
m_freem(inp->in6p_options);
ip6_freepcbopts(inp->in6p_outputopts);
ip6_freemoptions(inp->in6p_moptions);
- if (inp->in6p_route.ro_rt)
+ if (inp->in6p_route.ro_rt) {
rtfree(inp->in6p_route.ro_rt);
+ inp->in6p_route.ro_rt = NULL;
+ }
/* Check and free IPv4 related resources in case of mapped addr */
if (inp->inp_options)
(void)m_free(inp->inp_options);
/*
- * Copyright (c) 2003-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
static void in6_mtutimo(void *rock);
extern int tvtohz(struct timeval *);
+static struct radix_node *in6_matroute_args(void *, struct radix_node_head *,
+ rn_matchf_t *, void *);
+
#define RTPRF_OURS RTF_PROTO3 /* set on routes we manage */
/*
return ret;
}
+/*
+ * Similar to in6_matroute_args except without the leaf-matching parameters.
+ */
+static struct radix_node *
+in6_matroute(void *v_arg, struct radix_node_head *head)
+{
+ return (in6_matroute_args(v_arg, head, NULL, NULL));
+}
+
/*
* This code is the inverse of in6_clsroute: on first reference, if we
* were managing the route, stop doing so and set the expiration timer
* back off again.
*/
static struct radix_node *
-in6_matroute(void *v_arg, struct radix_node_head *head)
+in6_matroute_args(void *v_arg, struct radix_node_head *head,
+ rn_matchf_t *f, void *w)
{
- struct radix_node *rn = rn_match(v_arg, head);
+ struct radix_node *rn = rn_match_args(v_arg, head, f, w);
struct rtentry *rt = (struct rtentry *)rn;
if (rt && rt->rt_refcnt == 0) { /* this is first reference */
rt->rt_rmx.rmx_expire = 0;
}
}
- return rn;
+ return (rn);
}
SYSCTL_DECL(_net_inet6_ip6);
rnh = *head;
rnh->rnh_addaddr = in6_addroute;
rnh->rnh_matchaddr = in6_matroute;
+ rnh->rnh_matchaddr_args = in6_matroute_args;
rnh->rnh_close = in6_clsroute;
in6_rtqtimo(rnh); /* kick off timeout first time */
in6_mtutimo(rnh); /* kick off timeout first time */
/*
- * Copyright (c) 2003-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
}
bcopy(&ti, ip6, sizeof(ti));
tcp_respond(NULL, ip6, (struct tcphdr *)(ip6 + 1),
- *m, ack, seq, flags, NULL);
+ *m, ack, seq, flags, IFSCOPE_NONE);
*m = NULL;
break;
}
lck_mtx_unlock(ip6_mutex);
if (ro == &ip6route && ro->ro_rt) { /* brace necessary for rtfree */
rtfree(ro->ro_rt);
+ ro->ro_rt = NULL;
} else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
rtfree(ro_pmtu->ro_rt);
+ ro_pmtu->ro_rt = NULL;
}
#if IPSEC
}
ifp = ro.ro_rt->rt_ifp;
rtfree(ro.ro_rt);
+ ro.ro_rt = NULL;
}
} else
ifp = ifindex2ifnet[mreq->ipv6mr_interface];
* DKIOCISFORMATTED is media formatted?
* DKIOCISWRITABLE is media writable?
*
+ * DKIOCREQUESTIDLE idle media
* DKIOCDISCARD delete unused data
*
* DKIOCGETMAXBLOCKCOUNTREAD get maximum block count for reads
#define DKIOCISFORMATTED _IOR('d', 23, uint32_t)
#define DKIOCISWRITABLE _IOR('d', 29, uint32_t)
+#define DKIOCREQUESTIDLE _IO('d', 30)
#define DKIOCDISCARD _IOW('d', 31, dk_discard_t)
#define DKIOCGETMAXBLOCKCOUNTREAD _IOR('d', 64, uint64_t)
#define DTRACE_INVOP_BCTR 6
#define DTRACE_INVOP_TAILJUMP 7
#endif
+
+
#endif /* __APPLE__ */
#ifdef __cplusplus
#define F_FLOCK 0x020 /* Use flock(2) semantics for lock */
#define F_POSIX 0x040 /* Use POSIX semantics for lock */
#define F_PROV 0x080 /* Non-coelesced provisional lock */
+#define F_WAKE1_SAFE 0x100 /* its safe to only wake one waiter */
#endif
/*
off_t lf_start; /* Byte # of the start of the lock */
off_t lf_end; /* Byte # of the end of the lock (-1=EOF) */
caddr_t lf_id; /* Id of the resource holding the lock */
+ uint32_t lf_waiters; /* count of waiters on this lock */
struct lockf **lf_head; /* Back pointer to the head of the locf list */
struct vnode *lf_vnode; /* Back pointer to the inode */
struct lockf *lf_next; /* Pointer to the next lock on this inode */
#if CONFIG_DTRACE
extern int lockstat_depth(void);
extern void lockstat_hot_patch(boolean_t);
-extern void dtrace_membar_producer(void);
/*
* Macros to record lockstat probes.
/* compatiblity with 4.3 */
#define m_copy(m, o, l) m_copym((m), (o), (l), M_DONTWAIT)
+#define MBSHIFT 20 /* 1MB */
+
#endif /* KERNEL_PRIVATE */
/*
#ifdef __APPLE_API_UNSTABLE
int vnode_isnamedstream(vnode_t);
+int vnode_isshadow(vnode_t);
#endif
enum vtype vnode_iftovt(int);
#define VAGE 0x001000 /* Insert vnode at head of free list */
#define VRAOFF 0x002000 /* read ahead disabled */
#define VNCACHEABLE 0x004000 /* vnode is allowed to be put back in name cache */
-#define VUINACTIVE 0x008000 /* UBC vnode is on inactive list */
+#if NAMEDSTREAMS
+#define VISSHADOW 0x008000 /* vnode is a shadow file */
+#endif
#define VSWAP 0x010000 /* vnode is being used as swapfile */
#define VTHROTTLED 0x020000 /* writes or pageouts have been throttled */
/* wakeup tasks waiting when count falls below threshold */
#endif
}
+int
+vnode_isshadow(
+#if NAMEDSTREAMS
+ vnode_t vp
+#else
+ __unused vnode_t vp
+#endif
+ )
+{
+#if NAMEDSTREAMS
+ return ((vp->v_flag & VISSHADOW) ? 1 : 0);
+#else
+ return (0);
+#endif
+}
+
/* TBD: set vnode_t to not cache data after it is consumed once; used for quota */
void
vnode_setnocache(vnode_t vp)
*/
if (vnode_isnamedstream(vp) &&
(vp->v_parent != NULLVP) &&
- ((vp->v_parent->v_mount->mnt_kern_flag & MNTK_NAMED_STREAMS) == 0) &&
+ (vnode_isshadow(vp)) &&
((vp->v_lflag & VL_TERMINATE) == 0)) {
vnode_recycle(vp);
}
}
if (phys_blksz != (size_t)jnl->jhdr->jhdr_size && jnl->jhdr->jhdr_size != 0) {
- printf("jnl: %s: open: phys_blksz %lu does not match journal header size %d\n",
- jdev_name, phys_blksz, jnl->jhdr->jhdr_size);
+ /*
+ * The volume has probably been resized (such that we had to adjust the
+ * logical sector size), or copied to media with a different logical
+ * sector size. If the journal is empty, then just switch to the
+ * current logical sector size. If the journal is not empty, then
+ * fail to open the journal.
+ */
+
+ if (jnl->jhdr->start == jnl->jhdr->end) {
+ int err;
+ printf("jnl: %s: open: changing journal header size from %d to %lu\n",
+ jdev_name, jnl->jhdr->jhdr_size, phys_blksz);
+ jnl->jhdr->jhdr_size = phys_blksz;
+ if (write_journal_header(jnl)) {
+ printf("jnl: %s: open: failed to update journal header size\n", jdev_name);
+ goto bad_journal;
+ }
+ } else {
+ printf("jnl: %s: open: phys_blksz %lu does not match journal header size %d, and journal is not empty!\n",
+ jdev_name, phys_blksz, jnl->jhdr->jhdr_size);
+ goto bad_journal;
+ }
}
if ( jnl->jhdr->start <= 0
}
switch (cnp->cn_nameiop) {
case DELETE:
- nsop = NS_DELETE;
+ if (cnp->cn_flags & CN_ALLOWRSRCFORK) {
+ nsop = NS_DELETE;
+ }
+ else {
+ error = EPERM;
+ goto bad;
+ }
break;
case CREATE:
- nsop = NS_CREATE;
+ if (cnp->cn_flags & CN_ALLOWRSRCFORK) {
+ nsop = NS_CREATE;
+ }
+ else {
+ error = EPERM;
+ goto bad;
+ }
break;
case LOOKUP:
/* Make sure our lookup of "/..namedfork/rsrc" is allowed. */
TAILQ_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
if (vp->v_lflag & VNAMED_MOUNT)
panic("insmntque: vp already in mount vnode list");
- if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb))
- panic("insmntque: vp on the free list\n");
vp->v_lflag |= VNAMED_MOUNT;
mount_ref(mp, 1);
mount_unlock(mp);
/* Delete the shadow stream file before we reclaim its vnode */
if ((is_namedstream != 0) &&
(vp->v_parent != NULLVP) &&
- ((vp->v_parent->v_mount->mnt_kern_flag & MNTK_NAMED_STREAMS) == 0)) {
+ (vnode_isshadow(vp))) {
vnode_relenamedstream(vp->v_parent, vp, ctx);
}
#endif
if (param->vnfs_mp->mnt_kern_flag & MNTK_LOCK_LOCAL)
vp->v_flag |= VLOCKLOCAL;
if (insert) {
+ if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb))
+ panic("insmntque: vp on the free list\n");
+
/*
* enter in mount vnode list
*/
int need_event = 0;
int has_listeners = 0;
+#if NAMEDRSRCFORK
+ /* unlink or delete is allowed on rsrc forks and named streams */
+ ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
+#endif
+
ndp->ni_cnd.cn_flags |= LOCKPARENT;
cnp = &ndp->ni_cnd;
* since it may need to release the fs_nodelock on the dvp
*/
out:
+#if NAMEDRSRCFORK
+ /* recycle deleted rsrc fork to force reclaim on shadow file if necessary */
+ if ((vnode_isnamedstream(ndp->ni_vp)) &&
+ (ndp->ni_vp->v_parent != NULLVP) &&
+ (vnode_isshadow(ndp->ni_vp))) {
+ vnode_recycle(ndp->ni_vp);
+ }
+#endif
+
nameidone(ndp);
vnode_put(dvp);
vnode_put(vp);
*/
if (vnode_isnamedstream(nd.ni_vp) &&
(nd.ni_vp->v_parent != NULLVP) &&
- ((nd.ni_vp->v_parent->v_mount->mnt_kern_flag & MNTK_NAMED_STREAMS) == 0)) {
+ (vnode_isshadow(nd.ni_vp))) {
is_namedstream = 1;
vnode_ref(nd.ni_vp);
}
*/
if (vnode_isnamedstream(ndp->ni_vp) &&
(ndp->ni_vp->v_parent != NULLVP) &&
- ((ndp->ni_vp->v_parent->v_mount->mnt_kern_flag & MNTK_NAMED_STREAMS) == 0)) {
+ (vnode_isshadow(ndp->ni_vp))) {
is_namedstream = 1;
vnode_ref (ndp->ni_vp);
}
if ((error == 0) &&
(vp->v_flag & VISNAMEDSTREAM) &&
(vp->v_parent != NULLVP) &&
- !(vp->v_parent->v_mount->mnt_kern_flag & MNTK_NAMED_STREAMS) &&
+ (vnode_isshadow(vp)) &&
(fp->f_flags & FP_WRITTEN)) {
(void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
}
bad:
ndp->ni_vp = NULL;
if (vp) {
- vnode_put(vp);
+#if NAMEDRSRCFORK
+ if ((vnode_isnamedstream(vp)) && (vp->v_parent != NULLVP) &&
+ (vnode_isshadow (vp))) {
+ vnode_recycle(vp);
+ }
+#endif
+ vnode_put(vp);
/*
* Check for a race against unlink. We had a vnode
* but according to vnode_authorize or VNOP_OPEN it
/* Sync data from resource fork shadow file if needed. */
if ((vp->v_flag & VISNAMEDSTREAM) &&
(vp->v_parent != NULLVP) &&
- !(vp->v_parent->v_mount->mnt_kern_flag & MNTK_NAMED_STREAMS)) {
+ (vnode_isshadow(vp))) {
if (flags & FWASWRITTEN) {
(void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
}
error = default_getnamedstream(vp, svpp, name, op, context);
if (error == 0) {
+ uint32_t streamflags = VISNAMEDSTREAM;
vnode_t svp = *svpp;
-
+
+ if ((vp->v_mount->mnt_kern_flag & MNTK_NAMED_STREAMS) == 0) {
+ streamflags |= VISSHADOW;
+ }
+
/* Tag the vnode. */
- vnode_lock(svp);
- svp->v_flag |= VISNAMEDSTREAM;
+ vnode_lock_spin(svp);
+ svp->v_flag |= streamflags;
vnode_unlock(svp);
/* Make the file its parent.
* Note: This parent link helps us distinguish vnodes for
error = default_makenamedstream(vp, svpp, name, context);
if (error == 0) {
+ uint32_t streamflags = VISNAMEDSTREAM;
vnode_t svp = *svpp;
/* Tag the vnode. */
- vnode_lock(svp);
- svp->v_flag |= VISNAMEDSTREAM;
- vnode_unlock(svp);
+ if ((vp->v_mount->mnt_kern_flag & MNTK_NAMED_STREAMS) == 0) {
+ streamflags |= VISSHADOW;
+ }
+
+ /* Tag the vnode. */
+ vnode_lock_spin(svp);
+ svp->v_flag |= streamflags;
+ vnode_unlock(svp);
+
/* Make the file its parent.
* Note: This parent link helps us distinguish vnodes for
* shadow stream files from vnodes for resource fork on file
_sha1_loop:_SHA1Update
_sha1_result:_SHA1Final_r
_snprintf
-_sprintf
_sscanf
_strcasecmp
-_strcat
_strchr
_strcmp
-_strcpy
_STRDUP
_strlen
_strncasecmp
_OSCompareAndSwap64
_OSAddAtomic64
+_strcpy
+_strcat
+_sprintf
__ZN8OSObject19_RESERVEDOSObject31Ev
_bcopy_nc
_bzero_nc
-
+_strcpy
+_strcat
+_sprintf
$(SYMBOL_SET_BUILD): $(OBJPATH)/%.symbolset : %.exports %.$(ARCH_CONFIG_LC).exports $(OBJPATH)/allsymbols
$(_v)$(KEXT_CREATE_SYMBOL_SET) \
- $($(addsuffix $(ARCH_CONFIG),ARCH_FLAGS_)) \
+ $($(addsuffix $(ARCH_CONFIG),ARCH_FLAGS_ALL_)) \
-import $(OBJPATH)/allsymbols \
-export $*.exports \
-export $*.$(ARCH_CONFIG_LC).exports \
build_symbol_sets: $(SYMBOL_SET_BUILD)
$(_v)$(KEXT_CREATE_SYMBOL_SET) \
- $($(addsuffix $(ARCH_CONFIG),ARCH_FLAGS_)) \
+ $($(addsuffix $(ARCH_CONFIG),ARCH_FLAGS_ALL_)) \
-import $(OBJPATH)/allsymbols \
-export $(SRCROOT)/$(COMPONENT)/Libkern.exports \
-export $(SRCROOT)/$(COMPONENT)/Libkern.$(ARCH_CONFIG_LC).exports \
-9.6.0
+9.7.0
# The first line of this file contains the master version number for the kernel.
# All other instances of the kernel version in xnu are derived from this file.
_spltty
_splvm
_splx
-_sprintf
_sscanf
_stack_privilege
-_strcat
_strchr
_strcmp
-_strcpy
_strlen
_strncat
_strncmp
_smp_initialized
__ZN24IOBufferMemoryDescriptor20initWithPhysicalMaskEP4taskmyyy
__ZN24IOBufferMemoryDescriptor22inTaskWithPhysicalMaskEP4taskmyy
+_strcat
+_strcpy
+_sprintf
_scc
_rc4_crypt
_rc4_init
+_strcat
+_strcpy
+_sprintf
kIOMemoryPurgeable = 0x00000040,
kIOMemorySharingTypeMask = 0x000f0000,
kIOMemoryUnshared = 0x00000000,
- kIOMemoryKernelUserShared = 0x00010000
+ kIOMemoryKernelUserShared = 0x00010000,
+ // shared IOMemoryDescriptor options for IOBufferMemoryDescriptor:
+ kIOBufferDescriptorMemoryFlags = kIOMemoryDirectionMask
+#ifdef XNU_KERNEL_PRIVATE
+ | kIOMemoryAutoPrepare
+#endif
+ | kIOMemoryThreadSafe
};
#define _IOBUFFERMEMORYDESCRIPTOR_INTASKWITHOPTIONS_ 1
kIOMemoryAsReference = 0x00000100,
kIOMemoryBufferPageable = 0x00000400,
kIOMemoryDontMap = 0x00000800,
+#ifdef XNU_KERNEL_PRIVATE
+ kIOMemoryRedirected = 0x00004000,
+ kIOMemoryPreparedReadOnly = 0x00008000,
+#endif
kIOMemoryPersistent = 0x00010000,
- kIOMemoryThreadSafe = 0x00020000
+#ifdef XNU_KERNEL_PRIVATE
+ kIOMemoryReserved6156215 = 0x00020000,
+#endif
+ kIOMemoryThreadSafe = 0x00100000, // Shared with Buffer MD
};
#define kIOMapperNone ((IOMapper *) -1)
// might be created by IOMemoryDescriptor::withAddress(), but there should be
// no need to reference as anything but a generic IOMemoryDescriptor *.
-// Also these flags should not overlap with the options to
-// IOMemoryDescriptor::initWithRanges(... IOOptionsBits options);
-
-enum {
- kIOMemoryPreparedReadOnly = 0x00008000,
-};
-
class IOGeneralMemoryDescriptor : public IOMemoryDescriptor
{
OSDeclareDefaultStructors(IOGeneralMemoryDescriptor);
#include <IOKit/IOLib.h>
#include <IOKit/IOMapper.h>
#include <IOKit/IOBufferMemoryDescriptor.h>
+#include <libkern/OSDebug.h>
#include "IOKitKernelInternal.h"
#include "IOCopyMapper.h"
range.length = 0;
_ranges.v64 = ⦥
- // Grab the direction and the Auto Prepare bits from the Buffer MD options
- iomdOptions |= options & (kIOMemoryDirectionMask | kIOMemoryAutoPrepare);
+ // Grab IOMD bits from the Buffer MD options
+ iomdOptions |= (options & kIOBufferDescriptorMemoryFlags);
if ((options & (kIOMemorySharingTypeMask | kIOMapCacheMask)) && (alignment < page_size))
alignment = page_size;
name = moduleName->getCStringNoCopy();
k_info = kmod_lookupbyname_locked((char *)name);
if ( k_info && (k_info->reference_count < 1) ) {
+ record_kext_unload(k_info->id);
if ( k_info->stop &&
!((ret = k_info->stop(k_info, 0)) == kIOReturnSuccess) ) {
uncompressedSize ? ((int) ((compressedSize * 100ULL) / uncompressedSize)) : 0,
sum1, sum2);
+ if (vars->fileVars->io)
+ (void) IOHibernatePollerIODone(vars->fileVars, false);
+
if (pollerOpen)
IOHibernatePollerClose(vars->fileVars, kIOPolledBeforeSleepState);
gIOSystemMapper = mapper = IOMapper::gSystem;
}
+ // Temp binary compatibility for kIOMemoryThreadSafe
+ if (kIOMemoryReserved6156215 & options)
+ {
+ options &= ~kIOMemoryReserved6156215;
+ options |= kIOMemoryThreadSafe;
+ }
// Remove the dynamic internal use flags from the initial setting
options &= ~(kIOMemoryPreparedReadOnly);
_flags = options;
return (err);
}
-enum {
- kIOMemoryRedirected = 0x00010000
-};
-
IOReturn IOMemoryDescriptor::handleFault(
void * _pager,
vm_map_t addressMap,
return kIOReturnNoResources;
abstime = inAbstime;
- if ( enabled && AbsoluteTime_to_scalar(&abstime) && workLoop )
+ if ( enabled && AbsoluteTime_to_scalar(&inAbstime) && AbsoluteTime_to_scalar(&abstime) && workLoop )
{
if (reserved)
{
reserved->workLoop = workLoop;
reserved->calloutGeneration++;
if (thread_call_enter1_delayed((thread_call_t) calloutEntry,
- (void *) reserved->calloutGeneration, abstime))
+ (void *) reserved->calloutGeneration, inAbstime))
{
release();
workLoop->release();
}
}
else
- thread_call_enter_delayed((thread_call_t) calloutEntry, abstime);
+ thread_call_enter_delayed((thread_call_t) calloutEntry, inAbstime);
}
return kIOReturnSuccess;
machPort = (IOMachPort *) dict->getObject( (const OSSymbol *) obj );
if( machPort) {
- destroyed = (machPort->mscount == *mscount);
+ destroyed = (machPort->mscount <= *mscount);
if( destroyed)
dict->removeObject( (const OSSymbol *) obj );
else
| kdp-reenter Schedule reentry into the debugger and continue.
| kdp-reboot Restart remote target
|
+| zstack Print zalloc caller stack (zone leak debugging)
+| findoldest Find oldest zone leak debugging record
+| countpcs Print how often a pc occurs in the zone leak log
+|
+|
| Type "help <macro>" for more specific help on a particular macro.
| Type "show user <macro>" to see what the macro is really doing.
end
define showcurrentthreads
set $kgm_prp = (struct processor *)processor_list
while $kgm_prp != 0
+ printf "Processor 0x%08x State %d (cpu_id %x)\n", $kgm_prp, ($kgm_prp)->state, ($kgm_prp)->cpu_num
if ($kgm_prp)->active_thread != 0
set $kgm_actp = ($kgm_prp)->active_thread
showtaskheader
define showcurrentstacks
set $kgm_prp = processor_list
while $kgm_prp != 0
+ printf "Processor 0x%08x State %d (cpu_id %x)\n", $kgm_prp, ($kgm_prp)->state, ($kgm_prp)->cpu_num
if ($kgm_prp)->active_thread != 0
set $kgm_actp = ($kgm_prp)->active_thread
showtaskheader
set $kgm_obj = (OSObject *) $arg1
set $kgm_vt = *((void **) $arg1)
+ if ($kgm_mtype == 12)
+ set $kgm_vt = $kgm_vt - 2 * sizeof(void *)
+ end
+
if ($kgm_show_object_addrs)
printf "`object %p, vt ", $arg1
output /a (unsigned) $kgm_vt
printf " <object %p, ", $kgm_re
printf "vtable "
set $kgm_vt = (unsigned) *(void**) $kgm_re
+ if ($kgm_mtype == 12)
+ set $kgm_vt = $kgm_vt - 2 * sizeof(void *)
+ end
output /a $kgm_vt
if ($kgm_vt != _ZTV15IORegistryEntry)
| For page-tables in <pmap> translate <virtual_address> to physical address.
end
+define zstack
+ set $index = $arg0
+
+ if (log_records == 0)
+ set $count = 0
+ printf "Zone logging not enabled. Add 'zlog=<zone name>' to boot-args.\n"
+ else
+ if ($argc == 2)
+ set $count = $arg1
+ else
+ set $count = 1
+ end
+ end
+
+ while ($count)
+ printf "\n--------------- "
+
+ if (zrecords[$index].z_opcode == 1)
+ printf "ALLOC "
+ else
+ printf "FREE "
+ end
+
+ printf " 0x%x : index %d : ztime %d -------------\n", zrecords[$index].z_element, $index, zrecords[$index].z_time
+
+ set $frame = 0
+
+ while ($frame < 15)
+ set $frame_pc = zrecords[$index].z_pc[$frame]
+
+ if ($frame_pc == 0)
+ loop_break
+ end
+
+ x/i $frame_pc
+ set $frame = $frame + 1
+ end
+
+ set $index = $index + 1
+ set $count = $count - 1
+ end
+end
+
+document zstack
+Syntax: (gdb) zstack <index> [<count>]
+| Zone leak debugging: print the stack trace of log element at <index>.
+| If a <count> is supplied, it prints <count> log elements starting at <index>.
+|
+| The suggested usage is to look at indexes below zcurrent and look for common stack traces.
+| The stack trace that occurs the most is probably the cause of the leak. Find the pc of the
+| function calling into zalloc and use the countpcs kgmacro to find out how often that pc occurs in the log.
+| The pc occuring in a high percentage of records is most likely the source of the leak.
+|
+| The findoldest kgmacro is also useful for leak debugging since it identifies the oldest record
+| in the log, which may indicate the leaker.
+end
+
+define findoldest
+ set $index = 0
+ set $count = log_records
+ set $cur_min = 2000000000
+ set $cur_index = 0
+
+ if (log_records == 0)
+ printf "Zone logging not enabled. Add 'zlog=<zone name>' to boot-args.\n"
+ else
+
+ while ($count)
+ if (zrecords[$index].z_element && zrecords[$index].z_time < $cur_min)
+ set $cur_index = $index
+ set $cur_min = zrecords[$index].z_time
+ end
+
+ set $count = $count - 1
+ set $index = $index + 1
+ end
+
+ printf "oldest record is at log index %d:\n", $cur_index
+ zstack $cur_index
+ end
+end
+
+document findoldest
+Syntax: (gdb) findoldest
+| Zone leak debugging: find and print the oldest record in the log. Note that this command
+| can take several minutes to run since it uses linear search.
+|
+| Once it prints a stack trace, find the pc of the caller above all the zalloc, kalloc and
+| IOKit layers. Then use the countpcs kgmacro to see how often this caller has allocated
+| memory. A caller with a high percentage of records in the log is probably the leaker.
+end
+
+define countpcs
+ set $target_pc = $arg0
+ set $index = 0
+ set $count = log_records
+ set $found = 0
+
+ if (log_records == 0)
+ printf "Zone logging not enabled. Add 'zlog=<zone name>' to boot-args.\n"
+ else
+
+ while ($count)
+ set $frame = 0
+
+ if (zrecords[$index].z_element != 0)
+ while ($frame < 15)
+ if (zrecords[$index].z_pc[$frame] == $target_pc)
+ set $found = $found + 1
+ set $frame = 15
+ end
+
+ set $frame = $frame + 1
+ end
+ end
+
+ set $index = $index + 1
+ set $count = $count - 1
+ end
+
+ printf "occurred %d times in log (%d%c of records)\n", $found, ($found * 100) / zrecorded, '%'
+ end
+end
+
+document countpcs
+Syntax: (gdb) countpcs <pc>
+| Zone leak debugging: search the log and print a count of all log entries that contain the given <pc>
+| in the stack trace. This is useful for verifying a suspected <pc> as being the source of
+| the leak. If a high percentage of the log entries contain the given <pc>, then it's most
+| likely the source of the leak. Note that this command can take several minutes to run.
+end
+
+define findelem
+ set $fe_index = zcurrent
+ set $fe_count = log_records
+ set $fe_elem = $arg0
+ set $fe_prev_op = -1
+
+ if (log_records == 0)
+ printf "Zone logging not enabled. Add 'zlog=<zone name>' to boot-args.\n"
+ end
+
+ while ($fe_count)
+ if (zrecords[$fe_index].z_element == $fe_elem)
+ zstack $fe_index
+
+ if (zrecords[$fe_index].z_opcode == $fe_prev_op)
+ printf "*************** DOUBLE OP! *********************\n
+ end
+
+ set $fe_prev_op = zrecords[$fe_index].z_opcode
+ end
+
+ set $fe_count = $fe_count - 1
+ set $fe_index = $fe_index + 1
+
+ if ($fe_index >= log_records)
+ set $fe_index = 0
+ end
+ end
+end
+
+document findelem
+Syntax: (gdb) findelem <elem addr>
+| Zone corruption debugging: search the log and print out the stack traces for all log entries that
+| refer to the given zone element. When the kernel panics due to a corrupted zone element, get the
+| element address and use this macro. This will show you the stack traces of all logged zalloc and
+| zfree operations which tells you who touched the element in the recent past. This also makes
+| double-frees readily apparent.
+end
INSTINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS}
-
EXPINC_SUBDIRS = \
libkern \
uuid
SETUP_SUBDIRS = conf
-COMP_SUBDIRS = conf
+COMP_SUBDIRS = conf kmod
INST_SUBDIRS = kmod
classes->release();
if (0 == checkClass) {
+ record_kext_unload(ki->id);
OSRuntimeUnloadCPP(ki, 0); // call destructors
ret = kmod_destroy(host_priv_self(), ki->id);
didUnload = true;
export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
include $(MakeInc_cmd)
include $(MakeInc_def)
INSTOBJROOT = $(OBJROOT)/$(INSTALL_TYPE)_$(ARCH_CONFIG)/$(COMPONENT)/kmod
endif
-
KMOD_CFILES = c_start.c c_stop.c
KMODCPP_CFILES = cplus_start.c cplus_stop.c
ALL_OFILES = $(KMOD_OFILES) $(KMODCPP_OFILES)
$(ALL_OFILES): %.o : %.c
- ${KCC} -c ${CFLAGS} ${${join $@,_CFLAGS}} ${INCFLAGS} ${${join $@,_INCFLAGS}} -o $(COMPOBJROOT)/$(*F).o $<
+ @echo CC $@
+ $(_v)${KCC} -c ${CFLAGS} ${${join $@,_CFLAGS}} ${INCFLAGS} ${${join $@,_INCFLAGS}} -o $(COMPOBJROOT)/$(*F).o $<
$(COMPOBJROOT)/$(KMOD_NAME).a: $(KMOD_OFILES)
- libtool -static -o $@ $^
+ @echo LIBTOOL $@
+ $(_v)libtool -static -o $@ $^
$(COMPOBJROOT)/$(KMODCPP_NAME).a: $(KMODCPP_OFILES)
- libtool -static -o $@ $^
+ @echo LIBTOOL $@
+ $(_v)libtool -static -o $@ $^
do_build_all: $(COMPOBJROOT)/$(KMOD_NAME).a $(COMPOBJROOT)/$(KMODCPP_NAME).a
$(INSTALL_DIR)/%.a: $(INSTOBJROOT)/%.a
- @allarchs=""; \
- for onearch in $(INSTALL_ARCHS); do \
- if [ $(MACHINE_CONFIG) = DEFAULT ] ; then \
- archdir=$(OBJROOT)/$(KERNEL_CONFIG)_$${onearch}/$(COMPONENT); \
- else \
- archdir=$(OBJROOT)/$(KERNEL_CONFIG)_$${onearch}_$(MACHINE_CONFIG)/$(COMPONENT); \
- fi; \
- if [ -e $${archdir}/kmod/$(*F).a ]; then \
- allarchs="$${allarchs} $${archdir}/kmod/$(*F).a"; \
- fi; \
- done; \
+ @echo Installing $< in $@;
$(RM) $@ || true; \
${MKDIR} $(INSTALL_DIR) $(SYMROOT); \
- cmd="lipo $${allarchs} -create -output $(SYMROOT)/$(*F).a"; \
- echo $$cmd; eval $$cmd; \
- cmd="install $(LIB_INSTALL_FLAGS) $(SYMROOT)/$(*F).a $@"; \
+ $(_v)if [ $(MACHINE_CONFIG) = DEFAULT ] ; then \
+ allarchs=""; \
+ for onearch in $(INSTALL_ARCHS); do \
+ archdir=$(OBJROOT)/$(KERNEL_CONFIG)_$${onearch}/$(COMPONENT); \
+ if [ -e $${archdir}/kmod/$(*F).a ]; then \
+ allarchs="$${allarchs} $${archdir}/kmod/$(*F).a"; \
+ fi; \
+ done; \
+ cmd="$(LIPO) $${allarchs} -create -output $(SYMROOT)/$(*F).a"; \
+ echo $$cmd; eval $$cmd; \
+ else \
+ my_counter=1; \
+ my_innercounter=1; \
+ outputfile=$(SYMROOT)/$(*F).a; \
+ for my_config in $(TARGET_CONFIGS_UC); do \
+ if [ $${my_counter} -eq 1 ]; then \
+ my_counter=2; \
+ my_kconfig=$${my_config}; \
+ elif [ $${my_counter} -eq 2 ]; then \
+ my_counter=3; \
+ my_aconfig=$${my_config}; \
+ else \
+ my_counter=1; \
+ inputfile=$(OBJROOT)/$${my_kconfig}_$${my_aconfig}_$${my_config}/$(COMPONENT)/kmod/$(*F).a; \
+ if [ -e $${inputfile} ]; then \
+ if [ $${my_innercounter} -eq 1 ]; then \
+ my_innercounter=2; \
+ cmd="$(LIPO) -create $${inputfile} -o $${outputfile}"; \
+ else \
+ cmd="$(LIPO) -create $${outputfile} $${inputfile} -o $${outputfile} || true"; \
+ fi; \
+ echo $$cmd; eval $$cmd; \
+ fi; \
+ fi; \
+ done; \
+ fi; \
+ cmd="$(INSTALL) $(LIB_INSTALL_FLAGS) $(SYMROOT)/$(*F).a $@"; \
echo $$cmd; eval $$cmd
-
do_build_install: $(INSTALL_DIR)/$(KMOD_NAME).a $(INSTALL_DIR)/$(KMODCPP_NAME).a
# include $(MakeInc_rule)
goto finish;
}
- if (0 == strcmp("com.apple.driver.AppleIntelCPUPowerManagement",
- incumbentName->getCStringNoCopy())) {
- /* Special rules. Always favor version 51.0.0 exactly at the
- * expense of all other versions newer or older.
- */
- if(0 == strcmp(incumbentVersionString->getCStringNoCopy(), "51.0.0")) {
- IOLog(VTYELLOW "Skipping duplicate extension \"%s\" with "
- " version (%s -> %s).\n" VTRESET,
- candidateName->getCStringNoCopy(),
- candidateVersionString->getCStringNoCopy(),
- incumbentVersionString->getCStringNoCopy());
- winner = incumbent;
- goto finish;
- } else if (0 == strcmp(candidateVersionString->getCStringNoCopy(), "51.0.0")) {
- IOLog(VTYELLOW "Skipping duplicate extension \"%s\" with "
- " version (%s -> %s).\n" VTRESET,
- candidateName->getCStringNoCopy(),
- incumbentVersionString->getCStringNoCopy(),
- candidateVersionString->getCStringNoCopy());
- winner = candidate;
- goto finish;
- }
- }
-
if (candidate_vers > incumbent_vers) {
IOLog(VTYELLOW "Replacing extension \"%s\" with newer version "
"(%s -> %s).\n" VTRESET,
RC_$(RC_ARCHS) = 1
.endif
NARCHS != echo $(RC_ARCHS) | wc -w
-LIBSYS = $(NEXT_ROOT)/usr/local/lib/system
+LIBSYS = $(SDKROOT)/usr/local/lib/system
NJOBS != perl -e '$$n = `/usr/sbin/sysctl -n hw.ncpu`; printf "%d\n", $$n < 2 ? 2 : ($$n * 1.5)'
BSDMAKE = bsdmake -f Makefile
BSDMAKEJ = $(BSDMAKE) -j $(NJOBS)
.ifdef ALTFRAMEWORKSPATH
PRIVINC = -F${ALTFRAMEWORKSPATH} -I${ALTFRAMEWORKSPATH}/System.framework/PrivateHeaders
.else
-PRIVINC = -I${NEXT_ROOT}/System/Library/Frameworks/System.framework/PrivateHeaders
+PRIVINC = -I${SDKROOT}/System/Library/Frameworks/System.framework/PrivateHeaders
.endif
CFLAGS += ${PRIVINC}
-CFLAGS += -no-cpp-precomp -force_cpusubtype_ALL
+.if empty $(MACHINE_ARCH:Marm*)
+CFLAGS += -force_cpusubtype_ALL
+AINC= -force_cpusubtype_ALL
+.endif
+CFLAGS += -no-cpp-precomp
CFLAGS += -fno-common -pipe -Wmost -g
-AINC= -no-cpp-precomp -force_cpusubtype_ALL
+AINC+= -no-cpp-precomp
AINC+= -arch ${MACHINE_ARCH} -g
CLEANFILES+=tags
INSTALL_PIC_ARCHIVE= yes
# add version string
SRCS += libsyscall_version.c
libsyscall_version.c:
- ${NEXT_ROOT}/Developer/Makefiles/bin/version.pl Libsyscall > $@
+ ${SDKROOT}/Developer/Makefiles/bin/version.pl Libsyscall > $@
CFLAGS += -I${SYMROOT}
.include "${.CURDIR}/Makefile.inc"
KERNELFRAMEWORK = ${DESTDIR}/System/Library/Frameworks/Kernel.framework
PRIVKERNELHDRS = ${KERNELFRAMEWORK}/Versions/A/PrivateHeaders
-.if ${MACHINE_ARCH} == armv6
-ARCHDIR = arm
-.else
-ARCHDIR = ${MACHINE_ARCH}
-.endif
+ARCHDIR = ${MACHINE_ARCH:C/^armv.*$/arm/}
installhdrs-md: gen_md_mig_defs
mkdir -p ${INCDIR}/mach/${ARCHDIR}
# machine-dependent mach sources
-.if ${MACHINE_ARCH} == armv6
-ARCHDIR = arm
-.else
-ARCHDIR = ${MACHINE_ARCH}
-.endif
+ARCHDIR = ${MACHINE_ARCH:C/^armv.*$/arm/}
.if exists(${.CURDIR}/mach/${ARCHDIR}/Makefile.inc)
.include "${.CURDIR}/mach/${ARCHDIR}/Makefile.inc"
.endif
ARCH_FLAGS_I386 = -arch i386
ARCH_FLAGS_ARM = $($(addsuffix $(MACHINE_CONFIG),ARCH_FLAGS_ARM_))
+ARCH_FLAGS_ALL_PPC = $(ARCH_FLAGS_PPC)
+ARCH_FLAGS_ALL_I386 = $(ARCH_FLAGS_I386)
+ARCH_FLAGS_ALL_ARM = -arch arm
+
#
# Default CFLAGS
-fno-builtin -finline -msoft-float \
-fsigned-bitfields $(OTHER_CFLAGS)
+ifeq ($(BUILD_STABS),1)
+export CFLAGS_GEN += -gstabs+
+export BUILD_DWARF = 0
+export BUILD_STABS = 1
+else
+export CFLAGS_GEN += -gdwarf-2
+export BUILD_DWARF = 1
+export BUILD_STABS = 0
+endif
+
export CFLAGS_RELEASE =
export CFLAGS_DEVELOPMENT =
export CFLAGS_DEBUG =
export CFLAGS_PROFILE = -pg
-ifeq ($(BUILD_STABS),1)
-export CFLAGS_PPC = -Dppc -DPPC -D__PPC__ -DPAGE_SIZE_FIXED \
- -mno-altivec -gstabs+ -force_cpusubtype_ALL
-export CFLAGS_I386 = -Di386 -DI386 -D__I386__ \
- -DPAGE_SIZE_FIXED -gstabs+ -force_cpusubtype_ALL
-export CFLAGS_ARM = -Darm -DARM -D__ARM__ -DPAGE_SIZE_FIXED \
- -fno-strict-aliasing -gstabs+ -fno-keep-inline-functions
-export BUILD_DWARF = 0
-export BUILD_STABS = 1
-else
export CFLAGS_PPC = -Dppc -DPPC -D__PPC__ -DPAGE_SIZE_FIXED \
- -mno-altivec -gdwarf-2 -force_cpusubtype_ALL
+ -mno-altivec -force_cpusubtype_ALL
export CFLAGS_I386 = -Di386 -DI386 -D__I386__ \
- -DPAGE_SIZE_FIXED -gdwarf-2 -force_cpusubtype_ALL
+ -DPAGE_SIZE_FIXED -force_cpusubtype_ALL
export CFLAGS_ARM = -Darm -DARM -D__ARM__ -DPAGE_SIZE_FIXED \
- -fno-strict-aliasing -gdwarf-2 -fno-keep-inline-functions
-export BUILD_DWARF = 1
-export BUILD_STABS = 0
+ -fno-strict-aliasing -fno-keep-inline-functions
+
+ifeq (-arch armv7,$(ARCH_FLAGS_ARM))
+CFLAGS_ARM += -mthumb
endif
ifeq (-arch armv6,$(ARCH_FLAGS_ARM))
CFLAGS_ARM += -mthumb
endif
ifeq (-arch armv5,$(ARCH_FLAGS_ARM))
-CFLAGS_ARM += -mthumb
+#CFLAGS_ARM += -mthumb # <rdar://problem/6174175>
endif
ifeq (-arch xscale,$(ARCH_FLAGS_ARM))
CFLAGS_ARM += -mthumb
export LDFLAGS_COMPONENT_PPC = $(COMP_LDFLAGS_COMPONENT_PPC) -force_cpusubtype_ALL
export LDFLAGS_COMPONENT_I386 = $(COMP_LDFLAGS_COMPONENT_i386)
-export LDFLAGS_COMPONENT_ARM = $(COMP_LDFLAGS_COMPONENT_ARM)
+export LDFLAGS_COMPONENT_ARM = $(COMP_LDFLAGS_COMPONENT_ARM) -Wl,-new_linker
export LDFLAGS_COMPONENT = $(LDFLAGS_COMPONENT_GEN) \
$($(addsuffix $(ARCH_CONFIG),ARCH_FLAGS_)) \
-Wl,-segaddr,__TEXT,0x111000
export LDFLAGS_KERNEL_ARM = \
+ -Wl,-new_linker \
-Wl,-segaddr,__HIB,0xC0000000 \
-Wl,-segaddr,__TEXT,0xC0008000
$(DSTROOT)$(INSTALL_FILE_DIR)mach.$(KERNEL_CONFIG_LC).$(MACHINE_CONFIG_LC): $(TARGET)/mach_kernel force_file_install
@echo Installing $< in $@;
- @if [ ! -e $(DSTROOT)$(INSTALL_FILE_DIR) ]; then \
+ $(_v)if [ ! -e $(DSTROOT)$(INSTALL_FILE_DIR) ]; then \
$(MKDIR) $(DSTROOT)$(INSTALL_FILE_DIR); \
fi; \
if [ "`echo $(INSTALL_ARCHS_LC) | wc -w`" -eq 1 ]; then \
echo >empty_file_$(notdir $@); \
lipo_arg="$(subst _empty_file, empty_file_$(notdir $@),$(foreach lipo_arch,$(INSTALL_ARCHS_LC), $(addprefix -arch , $(addsuffix _empty_file, $(lipo_arch)))))"; \
$(LIPO) $${lipo_arg} -create -output $@; \
- $(RM) $(RMFLAGS) empty_file_$(notdir $@); \
+ $(RM) $(RMFLAGS) empty_file_$(notdir $@); \
fi; \
$(LIPO) $@ -replace $(ARCH_CONFIG_LC) $< -o $@; \
+ fi; \
+ if [ $(BUILD_DWARF) -eq 1 ]; then \
+ if [ "`echo $(INSTALL_ARCHS_LC) | wc -w`" -eq 1 ]; then \
+ $(CP) -f $< $<.ctfsys; \
+ $(FIND) $(OBJPATH)/ -name \*.ctf -size 0 \
+ -exec $(RM) -rf {} \; ; \
+ $(CTFMERGE) -l xnu -o $<.ctfsys \
+ $(OBJPATH)/*/$(KERNEL_CONFIG)/*.*o.ctf || true; \
+ $(INSTALL) $(FILE_INSTALL_FLAGS) $<.ctfsys $@.ctfsys; \
+ else \
+ if [ ! -e $@.ctfsys ]; then \
+ echo >empty_file_$(notdir $@); \
+ lipo_arg="$(subst _empty_file, empty_file_$(notdir $@),$(foreach lipo_arch,$(INSTALL_ARCHS_LC), $(addprefix -arch , $(addsuffix _empty_file, $(lipo_arch)))))"; \
+ $(LIPO) $${lipo_arg} -create -output $@.ctfsys;\
+ $(RM) $(RMFLAGS) empty_file_$(notdir $@);\
+ fi; \
+ $(FIND) $(OBJPATH)/ -name \*.ctf -size 0 \
+ -exec $(RM) -rf {} \; ; \
+ $(CP) -f $< $<.ctfsys; \
+ $(CTFMERGE) -l xnu -o $<.ctfsys \
+ $(OBJPATH)/*/$(KERNEL_CONFIG)/*.*o.ctf || true; \
+ $(LIPO) $@.ctfsys -replace $(ARCH_CONFIG_LC) \
+ $<.ctfsys -o $@.ctfsys; \
+ fi; \
fi
$(SYMROOT)$(INSTALL_FILE_DIR)mach.$(KERNEL_CONFIG_LC).$(MACHINE_CONFIG_LC): $(TARGET)/mach_kernel.sys force_file_install
@echo Installing $< in $@;
- @if [ ! -e $(SYMROOT)$(INSTALL_FILE_DIR) ]; then \
+ $(_v)if [ ! -e $(SYMROOT)$(INSTALL_FILE_DIR) ]; then \
$(MKDIR) $(SYMROOT)$(INSTALL_FILE_DIR); \
fi; \
if [ "`echo $(INSTALL_ARCHS_LC) | wc -w`" -eq 1 ]; then \
fi; \
if [ "`echo $(INSTALL_ARCHS_LC) | wc -w`" -eq 1 ]; then \
$(RM) $(RMFLAGS) $@; \
- $(INSTALL) $(FILE_INSTALL_FLAGS) $< $@; \
+ if [ $(MACHINE_CONFIG) = DEFAULT ]; then \
+ $(INSTALL) $(FILE_INSTALL_FLAGS) $< $@; \
+ fi; \
else \
if [ ! -e $@ ]; then \
echo >empty_file_$(notdir $@); \
-exec $(RM) -rf {} \; ; \
$(CTFMERGE) -l xnu -o $<.ctfsys \
$(OBJPATH)/*/$(KERNEL_CONFIG)/*.*o.ctf || true; \
- $(INSTALL) $(FILE_INSTALL_FLAGS) $<.ctfsys $(dir $@); \
+ if [ $(MACHINE_CONFIG) = DEFAULT ]; then \
+ $(INSTALL) $(FILE_INSTALL_FLAGS) $<.ctfsys $(dir $@); \
+ fi; \
else \
if [ ! -e $@.ctfsys ]; then \
echo >empty_file_$(notdir $@); \
osfmk/i386/commpage/bcopy_sse2.s standard
osfmk/i386/commpage/bcopy_sse3x.s standard
osfmk/i386/commpage/bcopy_sse3x_64.s standard
+osfmk/i386/commpage/bcopy_sse42.s standard
+osfmk/i386/commpage/bcopy_sse42_64.s standard
osfmk/i386/commpage/bzero_scalar.s standard
osfmk/i386/commpage/bzero_sse2.s standard
osfmk/i386/commpage/bzero_sse2_64.s standard
+osfmk/i386/commpage/bzero_sse42.s standard
+osfmk/i386/commpage/bzero_sse42_64.s standard
osfmk/i386/commpage/memset_pattern_sse2.s standard
osfmk/i386/commpage/memset_pattern_sse2_64.s standard
osfmk/i386/commpage/longcopy_sse3x.s standard
static unsigned panic_io_port;
static unsigned commit_paniclog_to_nvram;
+int debug_boot_arg;
+
void
machine_startup(void)
{
if (boot_arg & DB_PRT) disable_debug_output=FALSE;
if (boot_arg & DB_SLOG) systemLogDiags=TRUE;
if (boot_arg & DB_NMI) panicDebugging=TRUE;
- if (boot_arg & DB_LOG_PI_SCRN) logPanicDataToScreen=TRUE;
+ if (boot_arg & DB_LOG_PI_SCRN) logPanicDataToScreen=TRUE;
+ debug_boot_arg = boot_arg;
}
if (!PE_parse_boot_argn("nvram_paniclog", &commit_paniclog_to_nvram, sizeof (commit_paniclog_to_nvram)))
kmod_dump(&PC, 1);
panic_display_system_configuration();
+ panic_display_zprint();
+ dump_kext_info(&kdb_log);
+
/* Release print backtrace lock, to permit other callers in the
* event of panics on multiple processors.
*/
jmp LReverseShort // copy remaining 0..63 bytes and done
- COMMPAGE_DESCRIPTOR(bcopy_sse3x,_COMM_PAGE_BCOPY,kHasSSE2+kHasSupplementalSSE3+kCache64,0)
+ COMMPAGE_DESCRIPTOR(bcopy_sse3x,_COMM_PAGE_BCOPY,kHasSSE2+kHasSupplementalSSE3+kCache64,kHasSSE4_2)
// rdi = ptr to 1st dest byte not to move (aligned)
LDestAligned:
- movl %edx,%ecx // copy length
+ movq %rdx,%rcx // copy length
movl %esi,%eax // copy low half of source address
andl $63,%edx // get remaining bytes for LShort
andl $15,%eax // mask to low 4 bits of source address
- andl $-64,%ecx // get number of bytes we will copy in inner loop
+ andq $-64,%rcx // get number of bytes we will copy in inner loop
// We'd like to use lea with rip-relative addressing, but cannot in a .code64 block.
// lea LTable(%rip),%r8 // point to dispatch table
movq $(_COMM_PAGE_32_TO_64(_COMM_PAGE_BCOPY)),%r8 // work around 4586528
jmp LReverseShort // copy remaining 0..63 bytes and done
- COMMPAGE_DESCRIPTOR(bcopy_sse3x_64,_COMM_PAGE_BCOPY,kHasSSE2+kHasSupplementalSSE3+kCache64,0)
+ COMMPAGE_DESCRIPTOR(bcopy_sse3x_64,_COMM_PAGE_BCOPY,kHasSSE2+kHasSupplementalSSE3+kCache64,kHasSSE4_2)
--- /dev/null
+/*
+ * Copyright (c) 2008 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <machine/cpu_capabilities.h>
+#include <machine/commpage.h>
+
+/*
+ * The bcopy/memcpy loops, tuned for Nehalem.
+ *
+ * The following #defines are tightly coupled to the u-architecture:
+ */
+
+#define kShort 80 // too short to bother with SSE (must be >=80)
+
+
+// void bcopy(const void *src, void *dst, size_t len);
+
+ .text
+ .align 5, 0x90
+Lbcopy_sse42: // void bcopy(const void *src, void *dst, size_t len)
+ pushl %ebp // set up a frame for backtraces
+ movl %esp,%ebp
+ pushl %esi
+ pushl %edi
+ movl 8(%ebp),%esi // get source ptr
+ movl 12(%ebp),%edi // get dest ptr
+ movl 16(%ebp),%ecx // get length
+ movl %edi,%edx
+ subl %esi,%edx // (dest - source)
+ cmpl %ecx,%edx // must move in reverse if (dest - source) < length
+ jb LReverseIsland
+ cmpl $(kShort),%ecx // long enough to bother with SSE?
+ jbe Lshort // no
+ jmp LNotShort
+
+//
+// void *memcpy(void *dst, const void *src, size_t len);
+// void *memmove(void *dst, const void *src, size_t len);
+//
+// NB: These need to be 32 bytes from bcopy():
+//
+
+ .align 5, 0x90
+Lmemcpy: // void *memcpy(void *dst, const void *src, size_t len)
+Lmemmove: // void *memmove(void *dst, const void *src, size_t len)
+ pushl %ebp // set up a frame for backtraces
+ movl %esp,%ebp
+ pushl %esi
+ pushl %edi
+ movl 8(%ebp),%edi // get dest ptr
+ movl 12(%ebp),%esi // get source ptr
+ movl 16(%ebp),%ecx // get length
+ movl %edi,%edx
+ subl %esi,%edx // (dest - source)
+ cmpl %ecx,%edx // must move in reverse if (dest - source) < length
+ jb LReverseIsland
+ cmpl $(kShort),%ecx // long enough to bother with SSE?
+ ja LNotShort // yes
+
+// Handle short forward copies. As the most common case, this is the fall-through path.
+// ecx = length (<= kShort)
+// esi = source ptr
+// edi = dest ptr
+
+Lshort:
+ movl %ecx,%edx // copy length
+ shrl $2,%ecx // get #doublewords
+ jz 3f
+2: // loop copying doublewords
+ movl (%esi),%eax
+ addl $4,%esi
+ movl %eax,(%edi)
+ addl $4,%edi
+ dec %ecx
+ jnz 2b
+3: // handle leftover bytes (0..3) in last word
+ andl $3,%edx // any leftover bytes?
+ jz Lexit
+4: // loop copying bytes
+ movb (%esi),%al
+ inc %esi
+ movb %al,(%edi)
+ inc %edi
+ dec %edx
+ jnz 4b
+Lexit:
+ movl 8(%ebp),%eax // get return value (dst ptr) for memcpy/memmove
+ popl %edi
+ popl %esi
+ popl %ebp
+ ret
+
+
+LReverseIsland: // keep the "jb" above a short branch...
+ jmp LReverse // ...because reverse moves are uncommon
+
+
+// Handle forward moves that are long enough to justify use of SSE.
+// First, 16-byte align the destination.
+// ecx = length (> kShort)
+// esi = source ptr
+// edi = dest ptr
+
+LNotShort:
+ movl %edi,%edx // copy destination
+ negl %edx
+ andl $15,%edx // get #bytes to align destination
+ jz LDestAligned // already aligned
+ subl %edx,%ecx // decrement length
+1: // loop copying 1..15 bytes
+ movb (%esi),%al
+ inc %esi
+ movb %al,(%edi)
+ inc %edi
+ dec %edx
+ jnz 1b
+
+// Destination is now aligned. Nehalem does a great job with unaligned SSE loads,
+// so we use MOVDQU rather than aligned loads and shifts. Since kShort>=80, we
+// know there is at least one 64-byte chunk to move.
+// When we enter the copy loops, the following registers are set up:
+// ecx = residual length (0..63)
+// edx = -(length to move), a multiple of 64
+// esi = ptr to 1st source byte not to move (unaligned)
+// edi = ptr to 1st dest byte not to move (aligned)
+
+LDestAligned:
+ movl %ecx,%edx // copy length
+ andl $63,%ecx // get remaining bytes for Lshort
+ andl $-64,%edx // get number of bytes we will copy in inner loop
+ addl %edx,%esi // point to 1st byte not copied
+ addl %edx,%edi
+ negl %edx // now generate offset to 1st byte to be copied
+ testl $15,%esi // source also aligned?
+ jnz LUnalignedLoop
+ jmp LAlignedLoop
+
+
+// Forward loop for aligned operands.
+
+ .align 4,0x90 // 16-byte align inner loops
+LAlignedLoop: // loop over 64-byte chunks
+ movdqa (%esi,%edx),%xmm0
+ movdqa 16(%esi,%edx),%xmm1
+ movdqa 32(%esi,%edx),%xmm2
+ movdqa 48(%esi,%edx),%xmm3
+
+ movdqa %xmm0,(%edi,%edx)
+ movdqa %xmm1,16(%edi,%edx)
+ movdqa %xmm2,32(%edi,%edx)
+ movdqa %xmm3,48(%edi,%edx)
+
+ addl $64,%edx
+ jnz LAlignedLoop
+
+ jmp Lshort // copy remaining 0..63 bytes and done
+
+
+// Forward loop for unaligned operands.
+
+ .align 4,0x90 // 16-byte align inner loops
+LUnalignedLoop: // loop over 64-byte chunks
+ movdqu (%esi,%edx),%xmm0
+ movdqu 16(%esi,%edx),%xmm1
+ movdqu 32(%esi,%edx),%xmm2
+ movdqu 48(%esi,%edx),%xmm3
+
+ movdqa %xmm0,(%edi,%edx)
+ movdqa %xmm1,16(%edi,%edx)
+ movdqa %xmm2,32(%edi,%edx)
+ movdqa %xmm3,48(%edi,%edx)
+
+ addl $64,%edx
+ jnz LUnalignedLoop
+
+ jmp Lshort // copy remaining 0..63 bytes and done
+
+
+// Reverse moves. They are only used with destructive overlap.
+// ecx = length
+// esi = source ptr
+// edi = dest ptr
+
+LReverse:
+ addl %ecx,%esi // point to end of strings
+ addl %ecx,%edi
+ cmpl $(kShort),%ecx // long enough to bother with SSE?
+ ja LReverseNotShort // yes
+
+// Handle reverse short copies.
+// ecx = length
+// esi = one byte past end of source
+// edi = one byte past end of dest
+
+LReverseShort:
+ movl %ecx,%edx // copy length
+ shrl $2,%ecx // #words
+ jz 3f
+1:
+ subl $4,%esi
+ movl (%esi),%eax
+ subl $4,%edi
+ movl %eax,(%edi)
+ dec %ecx
+ jnz 1b
+3:
+ andl $3,%edx // bytes?
+ jz 5f
+4:
+ dec %esi
+ movb (%esi),%al
+ dec %edi
+ movb %al,(%edi)
+ dec %edx
+ jnz 4b
+5:
+ movl 8(%ebp),%eax // get return value (dst ptr) for memcpy/memmove
+ popl %edi
+ popl %esi
+ popl %ebp
+ ret
+
+// Handle a reverse move long enough to justify using SSE.
+// ecx = length
+// esi = one byte past end of source
+// edi = one byte past end of dest
+
+LReverseNotShort:
+ movl %edi,%edx // copy destination
+ andl $15,%edx // get #bytes to align destination
+ je LReverseDestAligned // already aligned
+ subl %edx,%ecx // adjust length
+1: // loop copying 1..15 bytes
+ dec %esi
+ movb (%esi),%al
+ dec %edi
+ movb %al,(%edi)
+ dec %edx
+ jnz 1b
+
+// Destination is now aligned. Prepare for reverse loops.
+
+LReverseDestAligned:
+ movl %ecx,%edx // copy length
+ andl $63,%ecx // get remaining bytes for Lshort
+ andl $-64,%edx // get number of bytes we will copy in inner loop
+ subl %edx,%esi // point to endpoint of copy
+ subl %edx,%edi
+ testl $15,%esi // is source aligned too?
+ jnz LReverseUnalignedLoop // no
+
+LReverseAlignedLoop: // loop over 64-byte chunks
+ movdqa -16(%esi,%edx),%xmm0
+ movdqa -32(%esi,%edx),%xmm1
+ movdqa -48(%esi,%edx),%xmm2
+ movdqa -64(%esi,%edx),%xmm3
+
+ movdqa %xmm0,-16(%edi,%edx)
+ movdqa %xmm1,-32(%edi,%edx)
+ movdqa %xmm2,-48(%edi,%edx)
+ movdqa %xmm3,-64(%edi,%edx)
+
+ subl $64,%edx
+ jne LReverseAlignedLoop
+
+ jmp LReverseShort // copy remaining 0..63 bytes and done
+
+
+// Reverse, unaligned loop. LDDQU==MOVDQU on these machines.
+
+LReverseUnalignedLoop: // loop over 64-byte chunks
+ movdqu -16(%esi,%edx),%xmm0
+ movdqu -32(%esi,%edx),%xmm1
+ movdqu -48(%esi,%edx),%xmm2
+ movdqu -64(%esi,%edx),%xmm3
+
+ movdqa %xmm0,-16(%edi,%edx)
+ movdqa %xmm1,-32(%edi,%edx)
+ movdqa %xmm2,-48(%edi,%edx)
+ movdqa %xmm3,-64(%edi,%edx)
+
+ subl $64,%edx
+ jne LReverseUnalignedLoop
+
+ jmp LReverseShort // copy remaining 0..63 bytes and done
+
+
+ COMMPAGE_DESCRIPTOR(bcopy_sse42,_COMM_PAGE_BCOPY,kHasSSE4_2,0)
--- /dev/null
+/*
+ * Copyright (c) 2008 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <machine/cpu_capabilities.h>
+#include <machine/commpage.h>
+
+/*
+ * The bcopy/memcpy loops, tuned for Nehalem. This is the 64-bit version.
+ *
+ * The following #defines are tightly coupled to the u-architecture:
+ */
+
+#define kShort 80 // too short to bother with SSE (must be >=80)
+
+
+// void bcopy(const void *src, void *dst, size_t len);
+
+ .text
+ .code64
+ .align 5, 0x90
+Lbcopy_sse42_64: // void bcopy(const void *src, void *dst, size_t len)
+ pushq %rbp // set up a frame for backtraces
+ movq %rsp,%rbp
+ movq %rsi,%rax // copy dest ptr
+ movq %rdi,%rsi // xchange source and dest ptrs
+ movq %rax,%rdi
+ subq %rsi,%rax // (dest - source)
+ cmpq %rdx,%rax // must move in reverse if (dest - source) < length
+ jb LReverseIsland
+ cmpq $(kShort),%rdx // long enough to bother with SSE?
+ jbe LShort // no
+ jmp LNotShort
+
+//
+// void *memcpy(void *dst, const void *src, size_t len);
+// void *memmove(void *dst, const void *src, size_t len);
+//
+// NB: These need to be 32 bytes from bcopy():
+//
+
+ .align 5, 0x90
+Lmemcpy: // void *memcpy(void *dst, const void *src, size_t len)
+Lmemmove: // void *memmove(void *dst, const void *src, size_t len)
+ pushq %rbp // set up a frame for backtraces
+ movq %rsp,%rbp
+ movq %rdi,%r11 // save return value here
+ movq %rdi,%rax
+ subq %rsi,%rax // (dest - source)
+ cmpq %rdx,%rax // must move in reverse if (dest - source) < length
+ jb LReverseIsland
+ cmpq $(kShort),%rdx // long enough to bother with SSE?
+ ja LNotShort // yes
+
+// Handle short forward copies. As the most common case, this is the fall-through path.
+// rdx = length (<= kShort)
+// rsi = source ptr
+// rdi = dest ptr
+
+LShort:
+ movl %edx,%ecx // copy length using 32-bit operation
+ shrl $2,%ecx // get #doublewords
+ jz 3f
+2: // loop copying doublewords
+ movl (%rsi),%eax
+ addq $4,%rsi
+ movl %eax,(%rdi)
+ addq $4,%rdi
+ decl %ecx
+ jnz 2b
+3: // handle leftover bytes (0..3) in last word
+ andl $3,%edx // any leftover bytes?
+ jz 5f
+4: // loop copying bytes
+ movb (%rsi),%al
+ incq %rsi
+ movb %al,(%rdi)
+ incq %rdi
+ decl %edx
+ jnz 4b
+5:
+ movq %r11,%rax // get return value (dst ptr) for memcpy/memmove
+ popq %rbp
+ ret
+
+
+LReverseIsland: // keep the "jb" above a short branch...
+ jmp LReverse // ...because reverse moves are uncommon
+
+
+// Handle forward moves that are long enough to justify use of SSE.
+// First, 16-byte align the destination.
+// rdx = length (> kShort)
+// rsi = source ptr
+// rdi = dest ptr
+
+LNotShort:
+ movl %edi,%ecx // copy low half of destination ptr
+ negl %ecx
+ andl $15,%ecx // get #bytes to align destination
+ jz LDestAligned // already aligned
+ subl %ecx,%edx // decrement length
+1: // loop copying 1..15 bytes
+ movb (%rsi),%al
+ inc %rsi
+ movb %al,(%rdi)
+ inc %rdi
+ dec %ecx
+ jnz 1b
+
+
+// Destination is now aligned. Nehalem does a great job with unaligned SSE loads,
+// so we use MOVDQU rather than aligned loads and shifts. Since kShort>=80, we
+// know there is at least one 64-byte chunk to move.
+// When we enter the copy loops, the following registers are set up:
+// rdx = residual length (0..63)
+// rcx = -(length to move), a multiple of 64 less than 2GB
+// rsi = ptr to 1st source byte not to move (unaligned)
+// rdi = ptr to 1st dest byte not to move (aligned)
+
+LDestAligned:
+ movq %rdx,%rcx // copy length
+ andl $63,%edx // get remaining bytes for LShort
+ andq $-64,%rcx // get number of bytes we will copy in inner loop
+ addq %rcx,%rsi // point to 1st byte not copied
+ addq %rcx,%rdi
+ negq %rcx // now generate offset to 1st byte to be copied
+ testl $15,%esi // source also aligned?
+ jnz LUnalignedLoop
+ jmp LAlignedLoop
+
+
+// Forward loop for aligned operands.
+
+ .align 4,0x90 // 16-byte align inner loops
+LAlignedLoop: // loop over 64-byte chunks
+ movdqa (%rsi,%rcx),%xmm0
+ movdqa 16(%rsi,%rcx),%xmm1
+ movdqa 32(%rsi,%rcx),%xmm2
+ movdqa 48(%rsi,%rcx),%xmm3
+
+ movdqa %xmm0,(%rdi,%rcx)
+ movdqa %xmm1,16(%rdi,%rcx)
+ movdqa %xmm2,32(%rdi,%rcx)
+ movdqa %xmm3,48(%rdi,%rcx)
+
+ addq $64,%rcx
+ jnz LAlignedLoop
+
+ jmp LShort // copy remaining 0..63 bytes and done
+
+
+// Forward loop for unaligned operands.
+
+ .align 4,0x90 // 16-byte align inner loops
+LUnalignedLoop: // loop over 64-byte chunks
+ movdqu (%rsi,%rcx),%xmm0
+ movdqu 16(%rsi,%rcx),%xmm1
+ movdqu 32(%rsi,%rcx),%xmm2
+ movdqu 48(%rsi,%rcx),%xmm3
+
+ movdqa %xmm0,(%rdi,%rcx)
+ movdqa %xmm1,16(%rdi,%rcx)
+ movdqa %xmm2,32(%rdi,%rcx)
+ movdqa %xmm3,48(%rdi,%rcx)
+
+ addq $64,%rcx
+ jnz LUnalignedLoop
+
+ jmp LShort // copy remaining 0..63 bytes and done
+
+
+// Reverse moves. These are only used with destructive overlap.
+// rdx = length
+// rsi = source ptr
+// rdi = dest ptr
+
+LReverse:
+ addq %rdx,%rsi // point to end of strings
+ addq %rdx,%rdi
+ cmpq $(kShort),%rdx // long enough to bother with SSE?
+ ja LReverseNotShort // yes
+
+// Handle reverse short copies.
+// edx = length (<= kShort)
+// rsi = one byte past end of source
+// rdi = one byte past end of dest
+
+LReverseShort:
+ movl %edx,%ecx // copy length
+ shrl $3,%ecx // #quadwords
+ jz 3f
+1:
+ subq $8,%rsi
+ movq (%rsi),%rax
+ subq $8,%rdi
+ movq %rax,(%rdi)
+ decl %ecx
+ jnz 1b
+3:
+ andl $7,%edx // bytes?
+ jz 5f
+4:
+ decq %rsi
+ movb (%rsi),%al
+ decq %rdi
+ movb %al,(%rdi)
+ decl %edx
+ jnz 4b
+5:
+ movq %r11,%rax // get return value (dst ptr) for memcpy/memmove
+ popq %rbp
+ ret
+
+// Handle a reverse move long enough to justify using SSE.
+// rdx = length (> kShort)
+// rsi = one byte past end of source
+// rdi = one byte past end of dest
+
+LReverseNotShort:
+ movl %edi,%ecx // copy destination
+ andl $15,%ecx // get #bytes to align destination
+ jz LReverseDestAligned // already aligned
+ subq %rcx,%rdx // adjust length
+1: // loop copying 1..15 bytes
+ decq %rsi
+ movb (%rsi),%al
+ decq %rdi
+ movb %al,(%rdi)
+ decl %ecx
+ jnz 1b
+
+// Destination is now aligned. Prepare for reverse loops.
+
+LReverseDestAligned:
+ movq %rdx,%rcx // copy length
+ andl $63,%edx // get remaining bytes for LReverseShort
+ andq $-64,%rcx // get number of bytes we will copy in inner loop
+ subq %rcx,%rsi // point to endpoint of copy
+ subq %rcx,%rdi
+ testl $15,%esi // is source aligned too?
+ jnz LReverseUnalignedLoop // no
+
+LReverseAlignedLoop: // loop over 64-byte chunks
+ movdqa -16(%rsi,%rcx),%xmm0
+ movdqa -32(%rsi,%rcx),%xmm1
+ movdqa -48(%rsi,%rcx),%xmm2
+ movdqa -64(%rsi,%rcx),%xmm3
+
+ movdqa %xmm0,-16(%rdi,%rcx)
+ movdqa %xmm1,-32(%rdi,%rcx)
+ movdqa %xmm2,-48(%rdi,%rcx)
+ movdqa %xmm3,-64(%rdi,%rcx)
+
+ subq $64,%rcx
+ jne LReverseAlignedLoop
+
+ jmp LReverseShort // copy remaining 0..63 bytes and done
+
+
+// Reverse, unaligned loop. LDDQU==MOVDQU on these machines.
+
+LReverseUnalignedLoop: // loop over 64-byte chunks
+ movdqu -16(%rsi,%rcx),%xmm0
+ movdqu -32(%rsi,%rcx),%xmm1
+ movdqu -48(%rsi,%rcx),%xmm2
+ movdqu -64(%rsi,%rcx),%xmm3
+
+ movdqa %xmm0,-16(%rdi,%rcx)
+ movdqa %xmm1,-32(%rdi,%rcx)
+ movdqa %xmm2,-48(%rdi,%rcx)
+ movdqa %xmm3,-64(%rdi,%rcx)
+
+ subq $64,%rcx
+ jne LReverseUnalignedLoop
+
+ jmp LReverseShort // copy remaining 0..63 bytes and done
+
+
+ COMMPAGE_DESCRIPTOR(bcopy_sse42_64,_COMM_PAGE_BCOPY,kHasSSE4_2,0)
jmp Lshort
- COMMPAGE_DESCRIPTOR(bzero_sse2,_COMM_PAGE_BZERO,kHasSSE2,0)
+ COMMPAGE_DESCRIPTOR(bzero_sse2,_COMM_PAGE_BZERO,kHasSSE2,kHasSSE4_2)
jmp Lshort
- COMMPAGE_DESCRIPTOR(bzero_sse2_64,_COMM_PAGE_BZERO,kHasSSE2,0)
+ COMMPAGE_DESCRIPTOR(bzero_sse2_64,_COMM_PAGE_BZERO,kHasSSE2,kHasSSE4_2)
--- /dev/null
+/*
+ * Copyright (c) 2008 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <machine/cpu_capabilities.h>
+#include <machine/commpage.h>
+
+/*
+ * Bzero, tuned for processors with SSE4.2 and 64-byte cache lines, ie Nehalem.
+ * We don't actually use SSE4.2, but rather use it to identify Nehalem.
+ *
+ * We do not use nontemporal operations, but use MOVDQA in preference to REP/STOS.
+ *
+ * This routine is also used for memset(p,0,n), which is a common case
+ * since gcc sometimes silently maps bzero() into memset(). As a result,
+ * we always load the original ptr into %eax before returning.
+ */
+
+#define kShort 80 // too short to bother with SSE (must be >=80)
+
+
+ .text
+ .align 5, 0x90
+Lbzero_sse42: // void bzero(void *b, size_t len);
+ pushl %ebp // set up a frame for backtraces
+ movl %esp,%ebp
+ pushl %edi
+ movl 8(%ebp),%edi // get ptr
+ movl 12(%ebp),%edx // get length
+
+ xorl %eax,%eax // set fill data to 0
+ cmpl $(kShort),%edx // long enough for SSE?
+ jg LNotShort // yes
+
+// Here for short operands or the end of long ones.
+// %edx = length
+// %edi = ptr
+// %eax = zero
+
+Lshort:
+ cmpl $12,%edx // long enough to word align?
+ jge 3f // yes
+ test %edx,%edx // length==0?
+ jz 6f
+1:
+ movb %al,(%edi) // zero a byte
+ inc %edi
+ dec %edx
+ jnz 1b
+ jmp 6f
+2:
+ movb %al,(%edi) // zero a byte
+ inc %edi
+ dec %edx
+3:
+ test $3,%edi // is ptr doubleword aligned?
+ jnz 2b // no
+ movl %edx,%ecx // copy length
+ shrl $2,%edx // #doublewords to store
+4:
+ movl %eax,(%edi) // zero an aligned doubleword
+ addl $4,%edi
+ dec %edx
+ jnz 4b
+ andl $3,%ecx // mask down to #bytes at end (0..3)
+ jz 6f // none
+5:
+ movb %al,(%edi) // zero a byte
+ inc %edi
+ dec %ecx
+ jnz 5b
+6:
+ movl 8(%ebp),%eax // get return value in case this was a call of memset()
+ popl %edi
+ popl %ebp
+ ret
+
+
+// We will be using SSE, so align ptr.
+// %edx = length
+// %edi = ptr
+// %eax = zero
+
+LNotShort:
+ testl $3,%edi // 4-byte aligned?
+ jz 2f // yes
+ movb %al,(%edi) // zero another byte
+ incl %edi
+ decl %edx
+ jmp LNotShort
+1: // zero doublewords until 16-byte aligned
+ movl %eax,(%edi)
+ addl $4,%edi
+ subl $4,%edx
+2:
+ testl $15,%edi // 16-byte aligned?
+ jnz 1b // no
+
+
+// Destination is now 16-byte aligned. Prepare to loop over 64-byte chunks.
+// %edx = length
+// %edi = ptr
+// %eax = zero
+
+LDestAligned:
+ movl %edx,%ecx
+ andl $63,%edx // mask down to residual length (0..63)
+ andl $-64,%ecx // get #bytes we will zero in this loop
+ pxor %xmm0,%xmm0 // zero an SSE register
+ addl %ecx,%edi // increment ptr by length to move
+ negl %ecx // negate length to move
+ jmp 1f
+
+// Loop over 64-byte chunks, storing into cache.
+
+ .align 4,0x90 // keep inner loops 16-byte aligned
+1:
+ movdqa %xmm0,(%edi,%ecx)
+ movdqa %xmm0,16(%edi,%ecx)
+ movdqa %xmm0,32(%edi,%ecx)
+ movdqa %xmm0,48(%edi,%ecx)
+ addl $64,%ecx
+ jne 1b
+
+ jmp Lshort
+
+
+
+ COMMPAGE_DESCRIPTOR(bzero_sse42,_COMM_PAGE_BZERO,kHasSSE4_2,0)
--- /dev/null
+/*
+ * Copyright (c) 2008 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <machine/cpu_capabilities.h>
+#include <machine/commpage.h>
+
+/*
+ * Bzero, tuned for processors with SSE4.2 and 64-byte cache lines, ie Nehalem.
+ * We don't actually use SSE4.2, but rather use it to identify Nehalem.
+ * This is the 64-bit version.
+ *
+ * We do not use nontemporal operations, but use MOVDQA in preference to REP/STOS.
+ *
+ * This routine is also used for memset(p,0,n), which is a common case
+ * since gcc sometimes silently maps bzero() into memset(). As a result,
+ * we always load the original ptr into %eax before returning.
+ */
+
+#define kShort 80 // too short to bother with SSE (must be >=80)
+
+
+ .text
+ .code64
+ .align 5, 0x90
+Lbzero_sse42_64: // void bzero(void *b, size_t len);
+ pushq %rbp // set up a frame for backtraces
+ movq %rsp,%rbp
+ xorl %eax,%eax // set fill data to 0
+ movq %rdi,%r11 // save original ptr as return value
+ cmpq $(kShort),%rsi // long enough for SSE?
+ jg LNotShort // yes
+
+// Here for short operands or the end of long ones.
+// %esi = length (<= kShort)
+// %rdi = ptr
+// %eax = zero
+
+Lshort:
+ cmpl $12,%esi // long enough to word align?
+ jge 3f // yes
+ test %esi,%esi // length==0?
+ jz 6f
+1:
+ movb %al,(%rdi) // zero a byte
+ incq %rdi
+ decl %esi
+ jnz 1b
+ jmp 6f
+2:
+ movb %al,(%rdi) // zero a byte
+ incq %rdi
+ decl %esi
+3:
+ testl $3,%edi // is ptr doubleword aligned?
+ jnz 2b // no
+ movl %esi,%ecx // copy length
+ shrl $2,%esi // #doublewords to store
+4:
+ movl %eax,(%rdi) // zero an aligned doubleword
+ addq $4,%rdi
+ decl %esi
+ jnz 4b
+ andl $3,%ecx // mask down to #bytes at end (0..3)
+ jz 6f // none
+5:
+ movb %al,(%rdi) // zero a byte
+ incq %rdi
+ decl %ecx
+ jnz 5b
+6:
+ movq %r11,%rax // set return value in case this was a call of memset()
+ popq %rbp
+ ret
+
+
+// We will be using SSE, so align ptr.
+// %rsi = length (> kShort)
+// %rdi = ptr
+// %eax = zero
+
+LNotShort:
+ testl $3,%edi // 4-byte aligned?
+ jz 2f // yes
+ movb %al,(%rdi) // zero another byte
+ incq %rdi
+ decq %rsi
+ jmp LNotShort
+1: // zero doublewords until 16-byte aligned
+ movl %eax,(%rdi)
+ addq $4,%rdi
+ subq $4,%rsi
+2:
+ testl $15,%edi // 16-byte aligned?
+ jnz 1b // no
+
+// Destination is now 16-byte aligned. Prepare to loop over 64-byte chunks.
+// %rsi = length (> (kShort-15))
+// %rdi = ptr (aligned)
+// %eax = zero
+
+LDestAligned:
+ movq %rsi,%rcx
+ andl $63,%esi // mask down to residual length (0..63)
+ andq $-64,%rcx // get #bytes we will zero in this loop
+ pxor %xmm0,%xmm0 // zero an SSE register
+ addq %rcx,%rdi // increment ptr by length to move
+ negq %rcx // negate length to move
+ jmp 1f
+
+// Loop over 64-byte chunks, storing into cache.
+
+ .align 4,0x90 // keep inner loops 16-byte aligned
+1:
+ movdqa %xmm0,(%rdi,%rcx)
+ movdqa %xmm0,16(%rdi,%rcx)
+ movdqa %xmm0,32(%rdi,%rcx)
+ movdqa %xmm0,48(%rdi,%rcx)
+ addq $64,%rcx
+ jne 1b
+
+ jmp Lshort
+
+
+ COMMPAGE_DESCRIPTOR(bzero_sse42_64,_COMM_PAGE_BZERO,kHasSSE4_2,0)
.long CPN(bit_test_and_clear_up)
.long CPN(bzero_scalar)
.long CPN(bzero_sse2)
+ .long CPN(bzero_sse42)
.long CPN(bcopy_scalar)
.long CPN(bcopy_sse2)
.long CPN(bcopy_sse3x)
+ .long CPN(bcopy_sse42)
.long CPN(memset_pattern_sse2)
.long CPN(longcopy_sse3x)
.long CPN(nanotime)
.long CPN(bit_test_and_clear_mp_64)
.long CPN(bit_test_and_clear_up_64)
.long CPN(bzero_sse2_64)
+ .long CPN(bzero_sse42_64)
.long CPN(bcopy_sse3x_64)
+ .long CPN(bcopy_sse42_64)
.long CPN(memset_pattern_sse2_64)
.long CPN(longcopy_sse3x_64)
.long CPN(nanotime_64)
testl %esi,%esi /* if being updated, loop until stable */
jz 0b
+ lfence
rdtsc /* get TSC in %edx:%eax */
lfence
testl %esi,%esi /* if generation is 0, data being changed */
jz 0b /* so loop until stable */
+ lfence
rdtsc /* get TSC in %edx:%eax */
+ lfence
subl _COMM_PAGE_NT_TSC_BASE,%eax
sbbl _COMM_PAGE_NT_TSC_BASE+4,%edx
movl _NT_GENERATION(%rsi),%r8d // get generation
testl %r8d,%r8d // if 0, data is being changed...
jz 1b // ...so loop until stable
+ lfence
rdtsc // edx:eax := tsc
lfence
shlq $32,%rdx // rax := ((edx << 32) | eax), ie 64-bit tsc
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* Data structures embedded in per-cpu data:
*/
typedef struct rtclock_timer {
- uint64_t deadline;
- boolean_t is_set;
- boolean_t has_expired;
+ queue_head_t queue;
+ uint64_t deadline;
+ boolean_t is_set;
+ boolean_t has_expired;
} rtclock_timer_t;
int cpu_subtype;
int cpu_threadtype;
int cpu_running;
- uint64_t rtclock_intr_deadline;
rtclock_timer_t rtclock_timer;
boolean_t cpu_is64bit;
task_map_t cpu_task_map;
#define DBG(x...)
#endif /* TOPO_DEBUG */
+void validate_topology(void);
+
#define bitmask(h,l) ((bit(h)|(bit(h)-1)) & ~(bit(l)-1))
#define bitfield(x,h,l) (((x) & bitmask(h,l)) >> l)
topoParms.nCoresSharingLLC = cpuinfo->core_count;
if (nCPUsSharing > cpuinfo->thread_count)
topoParms.nLCPUsSharingLLC = cpuinfo->thread_count;
-
-
- if (nCPUsSharing > cpuinfo->thread_count)
- topoParms.maxSharingLLC = cpuinfo->thread_count;
}
static void
/* NOT REACHED */
}
+/*
+ * Validates that the topology was built correctly. Must be called only
+ * after the complete topology is built and no other changes are being made.
+ */
+void
+validate_topology(void)
+{
+ x86_pkg_t *pkg;
+ x86_die_t *die;
+ x86_core_t *core;
+ x86_lcpu_t *lcpu;
+ uint32_t nDies;
+ uint32_t nCores;
+ uint32_t nCPUs;
+
+ /*
+ * XXX
+ *
+ * Right now this only works if the number of CPUs started is the total
+ * number of CPUs. However, when specifying cpus=n the topology is only
+ * partially constructed and the checks below will fail.
+ *
+ * We should *always* build the complete topology and only start the CPUs
+ * indicated by cpus=n. Until that happens, this code will not check the
+ * topology if the number of cpus defined is < that described the the
+ * topology parameters.
+ */
+ nCPUs = topoParms.nPackages * topoParms.nLThreadsPerPackage;
+ if (nCPUs > real_ncpus)
+ return;
+
+ pkg = x86_pkgs;
+ while (pkg != NULL) {
+ /*
+ * Make sure that the package has the correct number of dies.
+ */
+ nDies = 0;
+ die = pkg->dies;
+ while (die != NULL) {
+ if (die->package == NULL)
+ panic("Die(%d)->package is NULL",
+ die->pdie_num);
+ if (die->package != pkg)
+ panic("Die %d points to package %d, should be %d",
+ die->pdie_num, die->package->lpkg_num, pkg->lpkg_num);
+
+ DBG("Die(%d)->package %d\n",
+ die->pdie_num, pkg->lpkg_num);
+
+ /*
+ * Make sure that the die has the correct number of cores.
+ */
+ DBG("Die(%d)->cores: ");
+ nCores = 0;
+ core = die->cores;
+ while (core != NULL) {
+ if (core->die == NULL)
+ panic("Core(%d)->die is NULL",
+ core->pcore_num);
+ if (core->die != die)
+ panic("Core %d points to die %d, should be %d",
+ core->pcore_num, core->die->pdie_num, die->pdie_num);
+ nCores += 1;
+ DBG("%d ", core->pcore_num);
+ core = core->next_in_die;
+ }
+ DBG("\n");
+
+ if (nCores != topoParms.nLCoresPerDie)
+ panic("Should have %d Cores, but only found %d for Die %d",
+ topoParms.nLCoresPerDie, nCores, die->pdie_num);
+
+ /*
+ * Make sure that the die has the correct number of CPUs.
+ */
+ DBG("Die(%d)->lcpus: ", die->pdie_num);
+ nCPUs = 0;
+ lcpu = die->lcpus;
+ while (lcpu != NULL) {
+ if (lcpu->die == NULL)
+ panic("CPU(%d)->die is NULL",
+ lcpu->cpu_num);
+ if (lcpu->die != die)
+ panic("CPU %d points to die %d, should be %d",
+ lcpu->cpu_num, lcpu->die->pdie_num, die->pdie_num);
+ nCPUs += 1;
+ DBG("%d ", lcpu->cpu_num);
+ lcpu = lcpu->next_in_die;
+ }
+ DBG("\n");
+
+ if (nCPUs != topoParms.nLThreadsPerDie)
+ panic("Should have %d Threads, but only found %d for Die %d",
+ topoParms.nLThreadsPerDie, nCPUs, die->pdie_num);
+
+ nDies += 1;
+ die = die->next_in_pkg;
+ }
+
+ if (nDies != topoParms.nLDiesPerPackage)
+ panic("Should have %d Dies, but only found %d for package %d",
+ topoParms.nLDiesPerPackage, nDies, pkg->lpkg_num);
+
+ /*
+ * Make sure that the package has the correct number of cores.
+ */
+ nCores = 0;
+ core = pkg->cores;
+ while (core != NULL) {
+ if (core->package == NULL)
+ panic("Core(%d)->package is NULL",
+ core->pcore_num);
+ if (core->package != pkg)
+ panic("Core %d points to package %d, should be %d",
+ core->pcore_num, core->package->lpkg_num, pkg->lpkg_num);
+ DBG("Core(%d)->package %d\n",
+ core->pcore_num, pkg->lpkg_num);
+
+ /*
+ * Make sure that the core has the correct number of CPUs.
+ */
+ nCPUs = 0;
+ lcpu = core->lcpus;
+ DBG("Core(%d)->lcpus: ");
+ while (lcpu != NULL) {
+ if (lcpu->core == NULL)
+ panic("CPU(%d)->core is NULL",
+ lcpu->cpu_num);
+ if (lcpu->core != core)
+ panic("CPU %d points to core %d, should be %d",
+ lcpu->cpu_num, lcpu->core->pcore_num, core->pcore_num);
+ DBG("%d ", lcpu->cpu_num);
+ nCPUs += 1;
+ lcpu = lcpu->next_in_core;
+ }
+ DBG("\n");
+
+ if (nCPUs != topoParms.nLThreadsPerCore)
+ panic("Should have %d Threads, but only found %d for Core %d",
+ topoParms.nLThreadsPerCore, nCPUs, core->pcore_num);
+ nCores += 1;
+ core = core->next_in_pkg;
+ }
+
+ if (nCores != topoParms.nLCoresPerPackage)
+ panic("Should have %d Cores, but only found %d for package %d",
+ topoParms.nLCoresPerPackage, nCores, pkg->lpkg_num);
+
+ /*
+ * Make sure that the package has the correct number of CPUs.
+ */
+ nCPUs = 0;
+ lcpu = pkg->lcpus;
+ while (lcpu != NULL) {
+ if (lcpu->package == NULL)
+ panic("CPU(%d)->package is NULL",
+ lcpu->cpu_num);
+ if (lcpu->package != pkg)
+ panic("CPU %d points to package %d, should be %d",
+ lcpu->cpu_num, lcpu->package->lpkg_num, pkg->lpkg_num);
+ DBG("CPU(%d)->package %d\n",
+ lcpu->cpu_num, pkg->lpkg_num);
+ nCPUs += 1;
+ lcpu = lcpu->next_in_pkg;
+ }
+
+ if (nCPUs != topoParms.nLThreadsPerPackage)
+ panic("Should have %d Threads, but only found %d for package %d",
+ topoParms.nLThreadsPerPackage, nCPUs, pkg->lpkg_num);
+
+ pkg = pkg->next;
+ }
+}
+
#if TOPO_DEBUG
/*
* Prints out the topology
#define DBG(x...)
#endif
void debug_topology_print(void);
+void validate_topology(void);
__private_extern__ void qsort(
void * array,
#if TOPO_DEBUG
debug_topology_print();
#endif /* TOPO_DEBUG */
+ validate_topology();
ml_set_interrupts_enabled(istate);
DBG("cpu_topology_start() LLC is L%d\n", topoParms.LLCDepth + 1);
struct x86_die *die; /* die containing the logical cpu */
struct x86_pkg *package; /* package containing the logical cpu */
struct cpu_data *cpu; /* cpu_data structure */
+ uint32_t flags;
uint32_t cpu_num; /* cpu number */
uint32_t lnum; /* logical cpu number (within core) */
uint32_t pnum; /* physical cpu number */
#define X86CORE_FL_PRESENT 0x80000000 /* core is present */
#define X86CORE_FL_READY 0x40000000 /* core struct is init'd */
+#define X86CORE_FL_HAS_HPET 0x10000000 /* core has HPET assigned */
#define X86CORE_FL_HALTED 0x00008000 /* core is halted */
#define X86CORE_FL_IDLE 0x00004000 /* core is idle */
+#define X86CORE_FL_WAKEUP 0x00002000 /* wakeup is pending */
typedef struct x86_core
{
quad(cpuid_reg[ecx], cpuid_reg[edx]);
}
+ /* Fold in the Invariant TSC feature bit, if present */
+ if (max_extid >= 0x80000007) {
+ do_cpuid(0x80000007, cpuid_reg);
+ info_p->cpuid_extfeatures |=
+ cpuid_reg[edx] & CPUID_EXTFEATURE_TSCI;
+ }
+
+ /* Find the microcode version number a.k.a. signature a.k.a. BIOS ID */
+ info_p->cpuid_microcode_version =
+ (uint32_t) (rdmsr64(MSR_IA32_BIOS_SIGN_ID) >> 32);
+
+ if (info_p->cpuid_model == CPUID_MODEL_NEHALEM) {
+ /*
+ * For Nehalem, find the number of enabled cores and threads
+ * (which determines whether SMT/Hyperthreading is active).
+ */
+ uint64_t msr_core_thread_count = rdmsr64(MSR_CORE_THREAD_COUNT);
+ info_p->core_count = bitfield(msr_core_thread_count, 31, 16);
+ info_p->thread_count = bitfield(msr_core_thread_count, 15, 0);
+ }
+
if (info_p->cpuid_features & CPUID_FEATURE_MONITOR) {
/*
* Extract the Monitor/Mwait Leaf info:
{CPUID_EXTFEATURE_XD, "XD"},
{CPUID_EXTFEATURE_EM64T, "EM64T"},
{CPUID_EXTFEATURE_LAHF, "LAHF"},
+ {CPUID_EXTFEATURE_RDTSCP, "RDTSCP"},
+ {CPUID_EXTFEATURE_TSCI, "TSCI"},
{0, 0}
};
*/
#define CPUID_EXTFEATURE_SYSCALL _Bit(11) /* SYSCALL/sysret */
#define CPUID_EXTFEATURE_XD _Bit(20) /* eXecute Disable */
+#define CPUID_EXTFEATURE_RDTSCP _Bit(27) /* RDTSCP */
#define CPUID_EXTFEATURE_EM64T _Bit(29) /* Extended Mem 64 Technology */
#define CPUID_EXTFEATURE_LAHF _HBit(20) /* LAFH/SAHF instructions */
+/*
+ * The CPUID_EXTFEATURE_XXX values define 64-bit values
+ * returned in %ecx:%edx to a CPUID request with %eax of 0x80000007:
+ */
+#define CPUID_EXTFEATURE_TSCI _Bit(8) /* TSC Invariant */
+
+
#define CPUID_CACHE_SIZE 16 /* Number of descriptor vales */
#define CPUID_CACHE_NULL 0x00 /* NULL */
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <mach/mach_types.h>
+#include <kern/timer_queue.h>
#include <kern/clock.h>
#include <kern/thread.h>
#include <kern/processor.h>
#include <i386/cpu_topology.h>
#include <i386/cpu_threads.h>
-/* XXX from <arch>/rtclock.c */
-clock_timer_func_t rtclock_timer_expire;
-
/*
* Event timer interrupt.
*
/* has a pending clock timer expired? */
if (mytimer->deadline <= abstime) { /* Have we expired the deadline? */
mytimer->has_expired = TRUE; /* Remember that we popped */
- mytimer->deadline = EndOfAllTime; /* Set timer request to the end of all time in case we have no more events */
- (*rtclock_timer_expire)(abstime); /* Process pop */
+ mytimer->deadline = timer_queue_expire(&mytimer->queue, abstime);
mytimer->has_expired = FALSE;
}
}
/*
- * Set the clock deadline; called by the thread scheduler.
+ * Set the clock deadline.
*/
void etimer_set_deadline(uint64_t deadline)
{
}
splx(s);
}
+
+void etimer_timer_expire(void *arg);
+
+void
+etimer_timer_expire(
+__unused void *arg)
+{
+ rtclock_timer_t *mytimer;
+ uint64_t abstime;
+ cpu_data_t *pp;
+ x86_lcpu_t *lcpu;
+
+ pp = current_cpu_datap();
+ lcpu = x86_lcpu();
+
+ mytimer = &pp->rtclock_timer;
+ abstime = mach_absolute_time();
+
+ mytimer->has_expired = TRUE;
+ mytimer->deadline = timer_queue_expire(&mytimer->queue, abstime);
+ mytimer->has_expired = FALSE;
+
+ lcpu->rtcPop = EndOfAllTime;
+ etimer_resync_deadlines();
+}
+
+queue_t
+timer_queue_assign(
+ uint64_t deadline)
+{
+ cpu_data_t *cdp = current_cpu_datap();
+ rtclock_timer_t *timer;
+
+ if (cdp->cpu_running) {
+ timer = &cdp->rtclock_timer;
+
+ if (deadline < timer->deadline)
+ etimer_set_deadline(deadline);
+ }
+ else
+ timer = &cpu_datap(master_cpu)->rtclock_timer;
+
+ return (&timer->queue);
+}
+
+void
+timer_queue_cancel(
+ queue_t queue,
+ uint64_t deadline,
+ uint64_t new_deadline)
+{
+ if (queue == ¤t_cpu_datap()->rtclock_timer.queue) {
+ if (deadline < new_deadline)
+ etimer_set_deadline(new_deadline);
+ }
+}
push %ebx
mov %edx,%edi
+ lfence
rdtsc /* read cyclecount into %edx:%eax */
lfence
addl %ecx,%eax /* fetch and timeout */
/*
* Here after spinning INNER_LOOP_COUNT times, check for timeout
*/
+ lfence
rdtsc /* cyclecount into %edx:%eax */
lfence
cmpl %ecx,%edx /* compare high-order 32-bits */
avail_start = first_avail;
mem_actual = sane_size;
-#define MEG (1024*1024)
+#define MEG (1024*1024ULL)
+#define GIG (1024*MEG)
/*
* For user visible memory size, round up to 128 Mb - accounting for the various stolen memory
sane_size = (sane_size + 128 * MEG - 1) & ~((uint64_t)(128 * MEG - 1));
+#if defined(__i386__)
+#define K32_MAXMEM (32*GIG)
+ /*
+ * For K32 we cap at K32_MAXMEM GB (currently 32GB).
+ * Unless overriden by the maxmem= boot-arg
+ * -- which is a non-zero maxmem argument to this function.
+ */
+ if (maxmem == 0 && sane_size > K32_MAXMEM) {
+ maxmem = K32_MAXMEM;
+ printf("Physical memory %lld bytes capped at %dGB for 32-bit kernel\n",
+ sane_size, (uint32_t) (K32_MAXMEM/GIG));
+ }
+#endif
/*
* if user set maxmem, reduce memory sizes
*/
/* TRUE if local APIC was enabled by the OS not by the BIOS */
static boolean_t lapic_os_enabled = FALSE;
+static boolean_t lapic_errors_masked = FALSE;
+static uint64_t lapic_last_master_error = 0;
+static uint64_t lapic_error_time_threshold = 0;
+static unsigned lapic_master_error_count = 0;
+static unsigned lapic_error_count_threshold = 5;
+static boolean_t lapic_dont_panic = FALSE;
+
+extern int debug_boot_arg;
+
/* Base vector for local APIC interrupt sources */
int lapic_interrupt_base = LAPIC_DEFAULT_INTERRUPT_BASE;
BOOL(LAPIC_READ(SVR)&LAPIC_SVR_ENABLE),
BOOL(!(LAPIC_READ(SVR)&LAPIC_SVR_FOCUS_OFF)),
LAPIC_READ(SVR) & LAPIC_SVR_MASK);
+ if (mca_is_cmci_present())
+ kprintf("LVT_CMCI: Vector 0x%02x [%s] %s %cmasked\n",
+ VEC(LVT_CMCI),
+ DM(LVT_CMCI),
+ DS(LVT_CMCI),
+ MASK(LVT_CMCI));
kprintf("LVT_TIMER: Vector 0x%02x %s %cmasked %s\n",
VEC(LVT_TIMER),
DS(LVT_TIMER),
LAPIC_WRITE(LVT_LINT0, value);
}
+ /* Error: masked */
+ LAPIC_WRITE(LVT_ERROR, LAPIC_READ(LVT_ERROR) | LAPIC_LVT_MASKED);
+
/* Timer: masked */
LAPIC_WRITE(LVT_TIMER, LAPIC_READ(LVT_TIMER) | LAPIC_LVT_MASKED);
/* Perfmon: masked */
LAPIC_WRITE(LVT_PERFCNT, LAPIC_READ(LVT_PERFCNT) | LAPIC_LVT_MASKED);
- /* Error: masked */
- LAPIC_WRITE(LVT_ERROR, LAPIC_READ(LVT_ERROR) | LAPIC_LVT_MASKED);
-
/* APIC software disabled */
LAPIC_WRITE(SVR, LAPIC_READ(SVR) & ~LAPIC_SVR_ENABLE);
{
int value;
+ if (lapic_error_time_threshold == 0 && cpu_number() == 0) {
+ nanoseconds_to_absolutetime(NSEC_PER_SEC >> 2, &lapic_error_time_threshold);
+ if (!PE_parse_boot_argn("lapic_dont_panic", &lapic_dont_panic, sizeof(lapic_dont_panic))) {
+ lapic_dont_panic = FALSE;
+ }
+ }
+
/* Set flat delivery model, logical processor id */
LAPIC_WRITE(DFR, LAPIC_DFR_FLAT);
LAPIC_WRITE(LDR, (get_cpu_number()) << LAPIC_LDR_SHIFT);
/* Thermal: unmasked */
LAPIC_WRITE(LVT_THERMAL, LAPIC_VECTOR(THERMAL));
- lapic_esr_clear();
+ /* CMCI, if available */
+ if (mca_is_cmci_present())
+ LAPIC_WRITE(LVT_CMCI, LAPIC_VECTOR(CMCI));
- LAPIC_WRITE(LVT_ERROR, LAPIC_VECTOR(ERROR));
+ if (((cpu_number() == master_cpu) && lapic_errors_masked == FALSE) ||
+ (cpu_number() != master_cpu)) {
+ lapic_esr_clear();
+ LAPIC_WRITE(LVT_ERROR, LAPIC_VECTOR(ERROR));
+ }
}
void
case LAPIC_TIMER_INTERRUPT:
case LAPIC_THERMAL_INTERRUPT:
case LAPIC_PERFCNT_INTERRUPT:
+ case LAPIC_CMCI_INTERRUPT:
lapic_intr_func[vector] = func;
break;
default:
lapic_interrupt(int interrupt, x86_saved_state_t *state)
{
int retval = 0;
+ int esr = -1;
interrupt -= lapic_interrupt_base;
if (interrupt < 0) {
switch(interrupt) {
case LAPIC_TIMER_INTERRUPT:
case LAPIC_THERMAL_INTERRUPT:
+ case LAPIC_PERFCNT_INTERRUPT:
case LAPIC_INTERPROCESSOR_INTERRUPT:
if (lapic_intr_func[interrupt] != NULL)
(void) (*lapic_intr_func[interrupt])(state);
if (interrupt == LAPIC_PERFCNT_INTERRUPT)
+ /* Clear interrupt masked */
LAPIC_WRITE(LVT_PERFCNT, LAPIC_VECTOR(PERFCNT));
_lapic_end_of_interrupt();
retval = 1;
break;
+ case LAPIC_CMCI_INTERRUPT:
+ if (lapic_intr_func[interrupt] != NULL)
+ (void) (*lapic_intr_func[interrupt])(state);
+ /* return 0 for plaform expert to handle */
+ break;
case LAPIC_ERROR_INTERRUPT:
+ /* We treat error interrupts on APs as fatal.
+ * The current interrupt steering scheme directs most
+ * external interrupts to the BSP (HPET interrupts being
+ * a notable exception); hence, such an error
+ * on an AP may signify LVT corruption (with "may" being
+ * the operative word). On the BSP, we adopt a more
+ * lenient approach, in the interests of enhancing
+ * debuggability and reducing fragility.
+ * If "lapic_error_count_threshold" error interrupts
+ * occur within "lapic_error_time_threshold" absolute
+ * time units, we mask the error vector and log. The
+ * error interrupts themselves are likely
+ * side effects of issues which are beyond the purview of
+ * the local APIC interrupt handler, however. The Error
+ * Status Register value (the illegal destination
+ * vector code is one observed in practice) indicates
+ * the immediate cause of the error.
+ */
+ esr = lapic_esr_read();
lapic_dump();
- panic("Local APIC error\n");
+
+ if ((debug_boot_arg && (lapic_dont_panic == FALSE)) ||
+ cpu_number() != master_cpu) {
+ panic("Local APIC error, ESR: %d\n", esr);
+ }
+
+ if (cpu_number() == master_cpu) {
+ uint64_t abstime = mach_absolute_time();
+ if ((abstime - lapic_last_master_error) < lapic_error_time_threshold) {
+ if (lapic_master_error_count++ > lapic_error_count_threshold) {
+ lapic_errors_masked = TRUE;
+ LAPIC_WRITE(LVT_ERROR, LAPIC_READ(LVT_ERROR) | LAPIC_LVT_MASKED);
+ printf("Local APIC: errors masked\n");
+ }
+ }
+ else {
+ lapic_last_master_error = abstime;
+ lapic_master_error_count = 0;
+ }
+ printf("Local APIC error on master CPU, ESR: %d, error count this run: %d\n", esr, lapic_master_error_count);
+ }
+
_lapic_end_of_interrupt();
retval = 1;
break;
#define LAPIC_TMR_BASE 0x00000180
#define LAPIC_IRR_BASE 0x00000200
#define LAPIC_ERROR_STATUS 0x00000280
+#define LAPIC_LVT_CMCI 0x000002F0
#define LAPIC_ICR 0x00000300
#define LAPIC_ICR_VECTOR_MASK 0x000FF
#define LAPIC_ICR_DM_MASK 0x00700
{
lapic_set_intr_func(LAPIC_VECTOR(THERMAL), func);
}
+static inline void lapic_set_cmci_func(i386_intr_func_t func)
+{
+ lapic_set_intr_func(LAPIC_VECTOR(CMCI), func);
+}
#ifdef MP_DEBUG
#define LAPIC_CPU_MAP_DUMP() lapic_cpu_map_dump()
static boolean_t mca_threshold_status_present = FALSE;
static boolean_t mca_extended_MSRs_present = FALSE;
static unsigned int mca_extended_MSRs_count = 0;
+static boolean_t mca_cmci_present = FALSE;
static ia32_mcg_cap_t ia32_mcg_cap;
decl_simple_lock_data(static, mca_lock);
mca_error_bank_count = ia32_mcg_cap.bits.count;
mca_control_MSR_present = ia32_mcg_cap.bits.mcg_ctl_p;
mca_threshold_status_present = ia32_mcg_cap.bits.mcg_tes_p;
+ mca_cmci_present = ia32_mcg_cap.bits.mcg_ext_corr_err_p;
if (family == 0x0F) {
mca_extended_MSRs_present = ia32_mcg_cap.bits.mcg_ext_p;
mca_extended_MSRs_count = ia32_mcg_cap.bits.mcg_ext_cnt;
}
}
+boolean_t
+mca_is_cmci_present(void)
+{
+ if (!mca_initialized)
+ mca_cpu_init();
+ return mca_cmci_present;
+}
+
void
mca_cpu_alloc(cpu_data_t *cdp)
{
bank->mca_mci_addr = (bank->mca_mci_status.bits.addrv)?
rdmsr64(IA32_MCi_ADDR(i)) : 0ULL;
}
+
+ /*
+ * If we're the first thread with MCA state, point our package to it
+ * and don't care about races
+ */
+ if (x86_package()->mca_state == NULL)
+ x86_package()->mca_state = mca_state;
}
void
kdb_printf(" %s\n", infop->cpuid_brand_string);
}
+static const char *mc8_memory_operation[] = {
+ [MC8_MMM_GENERIC] "generic",
+ [MC8_MMM_READ] "read",
+ [MC8_MMM_WRITE] "write",
+ [MC8_MMM_ADDRESS_COMMAND] "address/command",
+ [MC8_MMM_RESERVED] "reserved"
+};
+
+static void
+mca_dump_bank_mc8(mca_state_t *state, int i)
+{
+ mca_mci_bank_t *bank;
+ ia32_mci_status_t status;
+ struct ia32_mc8_specific mc8;
+ int mmm;
+
+ bank = &state->mca_error_bank[i];
+ status = bank->mca_mci_status;
+ mc8 = status.bits_mc8;
+ mmm = MIN(mc8.memory_operation, MC8_MMM_RESERVED);
+
+ kdb_printf(
+ " IA32_MC%d_STATUS(0x%x): 0x%016qx %svalid\n",
+ i, IA32_MCi_STATUS(i), status.u64, IF(!status.bits.val, "in"));
+ if (!status.bits.val)
+ return;
+
+ kdb_printf(
+ " Channel number: %d%s\n"
+ " Memory Operation: %s\n"
+ " Machine-specific error: %s%s%s%s%s%s%s%s\n"
+ " COR_ERR_CNT: %d\n",
+ mc8.channel_number,
+ IF(mc8.channel_number == 15, " (unknown)"),
+ mc8_memory_operation[mmm],
+ IF(mc8.read_ecc, "Read ECC"),
+ IF(mc8.ecc_on_a_scrub, "ECC on scrub"),
+ IF(mc8.write_parity, "Write parity"),
+ IF(mc8.redundant_memory, "Redundant memory"),
+ IF(mc8.sparing, "Sparing/Resilvering"),
+ IF(mc8.access_out_of_range, "Access out of Range"),
+ IF(mc8.address_parity, "Address Parity"),
+ IF(mc8.byte_enable_parity, "Byte Enable Parity"),
+ mc8.cor_err_cnt);
+ kdb_printf(
+ " Status bits:\n%s%s%s%s%s%s",
+ IF(status.bits.pcc, " Processor context corrupt\n"),
+ IF(status.bits.addrv, " ADDR register valid\n"),
+ IF(status.bits.miscv, " MISC register valid\n"),
+ IF(status.bits.en, " Error enabled\n"),
+ IF(status.bits.uc, " Uncorrected error\n"),
+ IF(status.bits.over, " Error overflow\n"));
+ if (status.bits.addrv)
+ kdb_printf(
+ " IA32_MC%d_ADDR(0x%x): 0x%016qx\n",
+ i, IA32_MCi_ADDR(i), bank->mca_mci_addr);
+ if (status.bits.miscv) {
+ ia32_mc8_misc_t mc8_misc;
+
+ mc8_misc.u64 = bank->mca_mci_misc;
+ kdb_printf(
+ " IA32_MC%d_MISC(0x%x): 0x%016qx\n"
+ " DIMM: %d\n"
+ " Channel: %d\n"
+ " Syndrome: 0x%x\n",
+ i, IA32_MCi_MISC(i), mc8_misc.u64,
+ mc8_misc.bits.dimm,
+ mc8_misc.bits.channel,
+ (int) mc8_misc.bits.syndrome);
+ }
+}
+
static const char *mca_threshold_status[] = {
[THRESHOLD_STATUS_NO_TRACKING] "No tracking",
[THRESHOLD_STATUS_GREEN] "Green",
kdb_printf("MCA error-reporting registers:\n");
for (i = 0; i < mca_error_bank_count; i++ ) {
+ if (i == 8) {
+ /*
+ * Fatal Memory Error
+ */
+
+ /* Dump MC8 for local package */
+ kdb_printf(" Package %d logged:\n",
+ x86_package()->ppkg_num);
+ mca_dump_bank_mc8(state, 8);
+
+ /* If there's other packages, report their MC8s */
+ x86_pkg_t *pkg;
+ uint64_t deadline;
+ for (pkg = x86_pkgs; pkg != NULL; pkg = pkg->next) {
+ if (pkg == x86_package())
+ continue;
+ deadline = mach_absolute_time() + LockTimeOut;
+ while (pkg->mca_state == NULL &&
+ mach_absolute_time() < deadline)
+ cpu_pause();
+ if (pkg->mca_state) {
+ kdb_printf(" Package %d logged:\n",
+ pkg->ppkg_num);
+ mca_dump_bank_mc8(pkg->mca_state, 8);
+ } else {
+ kdb_printf(" Package %d timed out!\n",
+ pkg->ppkg_num);
+ }
+ }
+ continue;
+ }
mca_dump_bank(state, i);
}
}
" control MSR present\n"),
IF(mca_threshold_status_present,
" threshold-based error status present\n"),
- "");
+ IF(mca_cmci_present,
+ " extended corrected memory error handling present\n"));
if (mca_extended_MSRs_present)
kdb_printf(
" %d extended MSRs present\n", mca_extended_MSRs_count);
uint64_t count :BITS(7,0);
uint64_t mcg_ctl_p :BIT1(8);
uint64_t mcg_ext_p :BIT1(9);
- uint64_t mcg_reserved1 :BIT1(10);
+ uint64_t mcg_ext_corr_err_p :BIT1(10);
uint64_t mcg_tes_p :BIT1(11);
- uint64_t mcg_reserved2 :BITS(15,12);
+ uint64_t mcg_ecms :BIT1(12);
+ uint64_t mcg_reserved2 :BITS(15,13);
uint64_t mcg_ext_cnt :BITS(23,16);
} bits;
uint64_t u64;
uint64_t over :BIT1(62);
uint64_t val :BIT1(63);
} bits;
- struct { /* Variant if threshold-based error status present: */
+ struct { /* Variant if threshold-based error status present: */
uint64_t mca_error :BITS(15,0);
uint64_t model_specific_error :BITS(31,16);
uint64_t other_information :BITS(52,32);
uint64_t over :BIT1(62);
uint64_t val :BIT1(63);
} bits_tes_p;
+ struct ia32_mc8_specific {
+ uint64_t channel_number :BITS(3,0);
+ uint64_t memory_operation :BITS(6,4);
+ uint64_t unused :BITS(15,7);
+ uint64_t read_ecc :BIT1(16);
+ uint64_t ecc_on_a_scrub :BIT1(17);
+ uint64_t write_parity :BIT1(18);
+ uint64_t redundant_memory :BIT1(19);
+ uint64_t sparing :BIT1(20);
+ uint64_t access_out_of_range :BIT1(21);
+ uint64_t address_parity :BIT1(23);
+ uint64_t byte_enable_parity :BIT1(24);
+ uint64_t reserved :BITS(37,25);
+ uint64_t cor_err_cnt :BITS(52,38);
+ } bits_mc8;
uint64_t u64;
} ia32_mci_status_t;
#define THRESHOLD_STATUS_YELLOW 2
#define THRESHOLD_STATUS_RESERVED 3
+/* MC8 memory operations encoding: */
+#define MC8_MMM_GENERIC 0
+#define MC8_MMM_READ 1
+#define MC8_MMM_WRITE 2
+#define MC8_MMM_ADDRESS_COMMAND 3
+#define MC8_MMM_RESERVED 4
+
+typedef union {
+ struct {
+ uint64_t reserved1 :BITS(15,0);
+ uint64_t dimm :BITS(17,16);
+ uint64_t channel :BITS(19,18);
+ uint64_t reserved2 :BITS(31,20);
+ uint64_t syndrome :BITS(63,32);
+ } bits;
+ uint64_t u64;
+} ia32_mc8_misc_t;
+
typedef uint64_t ia32_mci_addr_t;
typedef uint64_t ia32_mci_misc_t;
extern void mca_cpu_init(void);
extern void mca_dump(void);
extern void mca_check_save(void);
+extern boolean_t mca_is_cmci_present(void);
#endif /* _I386_MACHINE_CHECK_H_ */
#endif /* KERNEL_PRIVATE */
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
machine_signal_idle(
processor_t processor)
{
- cpu_interrupt(PROCESSOR_DATA(processor, slot_num));
+ cpu_interrupt(processor->cpu_num);
}
thread_t
movl S_ARG0, %ecx
+ lfence
rdtsc
lfence
pushl %esi /* save generation */
pushl RNT_SHIFT(%edi) /* save low 32 bits of tscFreq */
- rdtsc /* get TSC in %edx:%eax */
+ lfence
+ rdtsc /* get TSC in %edx:%eax */
+ lfence
subl RNT_TSC_BASE(%edi),%eax
sbbl RNT_TSC_BASE+4(%edi),%edx
#include <kern/kern_types.h>
#include <kern/startup.h>
+#include <kern/timer_queue.h>
#include <kern/processor.h>
#include <kern/cpu_number.h>
#include <kern/cpu_data.h>
static volatile int debugger_cpu = -1;
static void mp_cpus_call_action(void);
+static void mp_call_PM(void);
#if GPROF
/*
}
}
+typedef struct {
+ int target_cpu;
+ int target_lapic;
+ int starter_cpu;
+ boolean_t is_nehalem;
+} processor_start_info_t;
+
+static processor_start_info_t start_info;
+
+static void
+start_cpu(void *arg)
+{
+ int i = 1000;
+ processor_start_info_t *psip = (processor_start_info_t *) arg;
+
+ /* Ignore this if the current processor is not the starter */
+ if (cpu_number() != psip->starter_cpu)
+ return;
+
+ LAPIC_WRITE(ICRD, psip->target_lapic << LAPIC_ICRD_DEST_SHIFT);
+ LAPIC_WRITE(ICR, LAPIC_ICR_DM_INIT);
+ delay(psip->is_nehalem ? 100 : 10000);
+
+ LAPIC_WRITE(ICRD, psip->target_lapic << LAPIC_ICRD_DEST_SHIFT);
+ LAPIC_WRITE(ICR, LAPIC_ICR_DM_STARTUP|(MP_BOOT>>12));
+
+ if (!psip->is_nehalem) {
+ delay(200);
+ LAPIC_WRITE(ICRD, psip->target_lapic << LAPIC_ICRD_DEST_SHIFT);
+ LAPIC_WRITE(ICR, LAPIC_ICR_DM_STARTUP|(MP_BOOT>>12));
+ }
+
+#ifdef POSTCODE_DELAY
+ /* Wait much longer if postcodes are displayed for a delay period. */
+ i *= 10000;
+#endif
+ mp_wait_for_cpu_up(psip->target_cpu, i*100, 100);
+}
+
kern_return_t
intel_startCPU(
int slot_num)
{
-
- int i = 1000;
- int lapic = cpu_to_lapic[slot_num];
+ int lapic = cpu_to_lapic[slot_num];
+ boolean_t istate;
assert(lapic != -1);
else
cpu_desc_init(cpu_datap(slot_num), FALSE);
- /* Serialize use of the slave boot stack. */
+ /* Serialize use of the slave boot stack, etc. */
mutex_lock(&mp_cpu_boot_lock);
- mp_disable_preemption();
+ istate = ml_set_interrupts_enabled(FALSE);
if (slot_num == get_cpu_number()) {
- mp_enable_preemption();
+ ml_set_interrupts_enabled(istate);
mutex_unlock(&mp_cpu_boot_lock);
return KERN_SUCCESS;
}
- LAPIC_WRITE(ICRD, lapic << LAPIC_ICRD_DEST_SHIFT);
- LAPIC_WRITE(ICR, LAPIC_ICR_DM_INIT);
- delay(10000);
-
- LAPIC_WRITE(ICRD, lapic << LAPIC_ICRD_DEST_SHIFT);
- LAPIC_WRITE(ICR, LAPIC_ICR_DM_STARTUP|(MP_BOOT>>12));
- delay(200);
+ start_info.starter_cpu = cpu_number();
+ start_info.is_nehalem = (cpuid_info()->cpuid_model
+ == CPUID_MODEL_NEHALEM);
+ start_info.target_cpu = slot_num;
+ start_info.target_lapic = lapic;
- LAPIC_WRITE(ICRD, lapic << LAPIC_ICRD_DEST_SHIFT);
- LAPIC_WRITE(ICR, LAPIC_ICR_DM_STARTUP|(MP_BOOT>>12));
- delay(200);
-
-#ifdef POSTCODE_DELAY
- /* Wait much longer if postcodes are displayed for a delay period. */
- i *= 10000;
-#endif
- mp_wait_for_cpu_up(slot_num, i, 10000);
+ /*
+ * For Nehalem, perform the processor startup with all running
+ * processors rendezvous'ed. This is required during periods when
+ * the cache-disable bit is set for MTRR/PAT initialization.
+ */
+ if (start_info.is_nehalem)
+ mp_rendezvous_no_intrs(start_cpu, (void *) &start_info);
+ else
+ start_cpu((void *) &start_info);
- mp_enable_preemption();
+ ml_set_interrupts_enabled(istate);
mutex_unlock(&mp_cpu_boot_lock);
if (!cpu_datap(slot_num)->cpu_running) {
DBGLOG(cpu_handle,my_cpu,MP_CALL);
i_bit_clear(MP_CALL, my_word);
mp_cpus_call_action();
+ } else if (i_bit(MP_CALL_PM, my_word)) {
+ DBGLOG(cpu_handle,my_cpu,MP_CALL_PM);
+ i_bit_clear(MP_CALL_PM, my_word);
+ mp_call_PM();
}
} while (*my_word);
}
}
+static volatile void (*mp_PM_func)(void) = NULL;
+
+static void
+mp_call_PM(void)
+{
+ assert(!ml_get_interrupts_enabled());
+
+ if (mp_PM_func != NULL)
+ mp_PM_func();
+}
+
+void
+cpu_PM_interrupt(int cpu)
+{
+ assert(!ml_get_interrupts_enabled());
+
+ if (mp_PM_func != NULL) {
+ if (cpu == cpu_number())
+ mp_PM_func();
+ else
+ i386_signal_cpu(cpu, MP_CALL_PM, ASYNC);
+ }
+}
+
+void
+PM_interrupt_register(void (*fn)(void))
+{
+ mp_PM_func = fn;
+}
+
void
i386_signal_cpu(int cpu, mp_event_t event, mp_sync_t mode)
{
simple_unlock(&x86_topo_lock);
}
+extern void etimer_timer_expire(void *arg);
+
void
i386_deactivate_cpu(void)
{
cdp->cpu_running = FALSE;
simple_unlock(&x86_topo_lock);
+ timer_queue_shutdown(&cdp->rtclock_timer.queue);
+ cdp->rtclock_timer.deadline = EndOfAllTime;
+ mp_cpus_call(cpu_to_cpumask(master_cpu), ASYNC, etimer_timer_expire, NULL);
+
/*
* In case a rendezvous/braodcast/call was initiated to this cpu
* before we cleared cpu_running, we must perform any actions due.
cause_ast_check(
processor_t processor)
{
- int cpu = PROCESSOR_DATA(processor, slot_num);
+ int cpu = processor->cpu_num;
if (cpu != cpu_number()) {
i386_signal_cpu(cpu, MP_AST, ASYNC);
void (*action_func)(void *),
void *arg);
+/*
+ * Power-management-specific SPI to:
+ * - register a callout function, and
+ * - request the callout (if registered) on a given cpu.
+ */
+extern void PM_interrupt_register(void (*fn)(void));
+extern void cpu_PM_interrupt(int cpu);
+
+
__END_DECLS
#if MP_DEBUG
/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
if (is_boot_cpu) {
assert(real_ncpus == 1);
- simple_lock_init(&cpu_lock, 0);
cdp = &cpu_data_master;
if (cdp->cpu_processor == NULL) {
+ simple_lock_init(&cpu_lock, 0);
cdp->cpu_processor = cpu_processor_alloc(TRUE);
cdp->cpu_pmap = pmap_cpu_alloc(TRUE);
cpu_desc_init(cdp, TRUE);
fast_syscall_init();
+ queue_init(&cdp->rtclock_timer.queue);
+ cdp->rtclock_timer.deadline = EndOfAllTime;
}
return cdp;
}
simple_unlock(&cpu_lock);
cdp->cpu_nanotime = &rtc_nanotime_info;
+ queue_init(&cdp->rtclock_timer.queue);
+ cdp->rtclock_timer.deadline = EndOfAllTime;
kprintf("cpu_data_alloc(%d) %p desc_table: %p "
"ldt: %p "
{
cpu_data_t *cdp = cpu_data_ptr[cpu];
cpu_desc_index_t *cdi = &cdp->cpu_desc_index;
- vm_offset_t phys_window;
+ vm_offset_t phys_window = cdp->cpu_physwindow_base;
- if (vm_allocate(kernel_map, &phys_window,
- PAGE_SIZE, VM_FLAGS_ANYWHERE)
+ if (phys_window == 0) {
+ if (vm_allocate(kernel_map, &phys_window,
+ PAGE_SIZE, VM_FLAGS_ANYWHERE)
!= KERN_SUCCESS)
- panic("cpu_physwindow_init: couldn't allocate phys map window");
+ panic("cpu_physwindow_init: "
+ "couldn't allocate phys map window");
- /*
- * make sure the page that encompasses the
- * pte pointer we're interested in actually
- * exists in the page table
- */
- pmap_expand(kernel_pmap, phys_window);
+ /*
+ * make sure the page that encompasses the
+ * pte pointer we're interested in actually
+ * exists in the page table
+ */
+ pmap_expand(kernel_pmap, phys_window);
- cdp->cpu_physwindow_base = phys_window;
- cdp->cpu_physwindow_ptep = vtopte(phys_window);
+ cdp->cpu_physwindow_base = phys_window;
+ cdp->cpu_physwindow_ptep = vtopte(phys_window);
+ }
cdi->cdi_gdt[sel_idx(PHYS_WINDOW_SEL)] = physwindow_desc_pattern;
cdi->cdi_gdt[sel_idx(PHYS_WINDOW_SEL)].offset = phys_window;
MP_CHUD,
MP_BROADCAST,
MP_CALL,
+ MP_CALL_PM,
MP_LAST
} mp_event_t;
"MP_CHUD", \
"MP_BROADCAST", \
"MP_CALL", \
+ "MP_CALL_PM", \
"MP_LAST" \
}
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
machine_thread_create(thread, thread->task);
/* If we're switching ourselves, reset the pcb addresses etc. */
- if (thread == current_thread())
- act_machine_switch_pcb(thread);
-
+ if (thread == current_thread()) {
+ if (current_cpu_datap()->cpu_active_cr3 != kernel_pmap->pm_cr3)
+ pmap_load_kernel_cr3();
+ act_machine_switch_pcb(thread);
+ }
enable_preemption();
}
goto out;
my_cpu->lcpu.state = LCPU_IDLE;
+ my_cpu->lcpu.flags |= X86CORE_FL_IDLE;
DBGLOG(cpu_handle, cpu_number(), MP_IDLE);
MARK_CPU_IDLE(cpu_number());
*/
MARK_CPU_ACTIVE(cpu_number());
DBGLOG(cpu_handle, cpu_number(), MP_UNIDLE);
+ my_cpu->lcpu.flags &= ~(X86CORE_FL_IDLE | X86CORE_FL_WAKEUP);
my_cpu->lcpu.state = LCPU_RUN;
/*
{
boolean_t do_ipi;
+ cpu->lcpu.flags |= X86CORE_FL_WAKEUP;
if (pmInitDone
&& pmDispatch != NULL
&& pmDispatch->exitIdle != NULL)
else
do_ipi = TRUE;
+ if (do_ipi)
+ cpu->lcpu.flags &= ~X86CORE_FL_WAKEUP;
+
return(do_ipi);
}
}
}
+static uint32_t saved_run_count = 0;
+
+void
+machine_run_count(uint32_t count)
+{
+ if (pmDispatch != NULL
+ && pmDispatch->pmSetRunCount != NULL)
+ pmDispatch->pmSetRunCount(count);
+ else
+ saved_run_count = count;
+}
+
+boolean_t
+machine_cpu_is_inactive(int cpu)
+{
+ if (pmDispatch != NULL
+ && pmDispatch->pmIsCPUUnAvailable != NULL)
+ return(pmDispatch->pmIsCPUUnAvailable(cpu_to_lcpu(cpu)));
+ else
+ return(FALSE);
+}
+
+static uint32_t
+pmGetSavedRunCount(void)
+{
+ return(saved_run_count);
+}
+
/*
* Returns the root of the package tree.
*/
return(cpu_datap(lcpu)->cpu_processor);
}
+static void
+pmReSyncDeadlines(int cpu)
+{
+ static boolean_t registered = FALSE;
+
+ if (!registered) {
+ PM_interrupt_register(&etimer_resync_deadlines);
+ registered = TRUE;
+ }
+
+ if ((uint32_t)cpu == current_cpu_datap()->lcpu.cpu_num)
+ etimer_resync_deadlines();
+ else
+ cpu_PM_interrupt(cpu);
+}
+
/*
* Called by the power management kext to register itself and to get the
* callbacks it might need into other kernel functions. This interface
pmCallBacks_t *callbacks)
{
if (callbacks != NULL && version == PM_DISPATCH_VERSION) {
- callbacks->setRTCPop = setPop;
- callbacks->resyncDeadlines = etimer_resync_deadlines;
- callbacks->initComplete= pmInitComplete;
- callbacks->GetLCPU = pmGetLogicalCPU;
- callbacks->GetCore = pmGetCore;
- callbacks->GetDie = pmGetDie;
- callbacks->GetPackage = pmGetPackage;
- callbacks->GetMyLCPU = pmGetMyLogicalCPU;
- callbacks->GetMyCore = pmGetMyCore;
- callbacks->GetMyDie = pmGetMyDie;
- callbacks->GetMyPackage= pmGetMyPackage;
- callbacks->GetPkgRoot = pmGetPkgRoot;
- callbacks->LockCPUTopology = pmLockCPUTopology;
- callbacks->GetHibernate = pmCPUGetHibernate;
- callbacks->LCPUtoProcessor = pmLCPUtoProcessor;
- callbacks->ThreadBind = thread_bind;
- callbacks->topoParms = &topoParms;
+ callbacks->setRTCPop = setPop;
+ callbacks->resyncDeadlines = pmReSyncDeadlines;
+ callbacks->initComplete = pmInitComplete;
+ callbacks->GetLCPU = pmGetLogicalCPU;
+ callbacks->GetCore = pmGetCore;
+ callbacks->GetDie = pmGetDie;
+ callbacks->GetPackage = pmGetPackage;
+ callbacks->GetMyLCPU = pmGetMyLogicalCPU;
+ callbacks->GetMyCore = pmGetMyCore;
+ callbacks->GetMyDie = pmGetMyDie;
+ callbacks->GetMyPackage = pmGetMyPackage;
+ callbacks->GetPkgRoot = pmGetPkgRoot;
+ callbacks->LockCPUTopology = pmLockCPUTopology;
+ callbacks->GetHibernate = pmCPUGetHibernate;
+ callbacks->LCPUtoProcessor = pmLCPUtoProcessor;
+ callbacks->ThreadBind = thread_bind;
+ callbacks->GetSavedRunCount = pmGetSavedRunCount;
+ callbacks->topoParms = &topoParms;
+ } else {
+ panic("Version mis-match between Kernel and CPU PM");
}
if (cpuFuncs != NULL) {
* This value should be changed each time that pmDsipatch_t or pmCallBacks_t
* changes.
*/
-#define PM_DISPATCH_VERSION 12
+#define PM_DISPATCH_VERSION 15
/*
* Dispatch table for functions that get installed when the power
void (*pmTimerStateRestore)(void);
kern_return_t (*exitHalt)(x86_lcpu_t *lcpu);
void (*markAllCPUsOff)(void);
+ void (*pmSetRunCount)(uint32_t count);
+ boolean_t (*pmIsCPUUnAvailable)(x86_lcpu_t *lcpu);
} pmDispatch_t;
typedef struct {
int (*setRTCPop)(uint64_t time);
- void (*resyncDeadlines)(void);
+ void (*resyncDeadlines)(int cpu);
void (*initComplete)(void);
x86_lcpu_t *(*GetLCPU)(int cpu);
x86_core_t *(*GetCore)(int cpu);
boolean_t (*GetHibernate)(int cpu);
processor_t (*LCPUtoProcessor)(int lcpu);
processor_t (*ThreadBind)(processor_t proc);
+ uint32_t (*GetSavedRunCount)(void);
x86_topology_parameters_t *topoParms;
} pmCallBacks_t;
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
vm_map_offset_t va;
va = pv_e->va;
- /*
- * first make sure any processor actively
- * using this pmap, flushes its TLB state
- */
-
- PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
/*
* Clear modify and/or reference bits.
pte = pmap_pte(pmap, va);
pmap_update_pte(pte, *pte, (*pte & ~bits));
-
+ /* Ensure all processors using this translation
+ * invalidate this TLB entry. The invalidation *must* follow
+ * the PTE update, to ensure that the TLB shadow of the
+ * 'D' bit (in particular) is synchronized with the
+ * updated PTE.
+ */
+ PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
}
pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
__asm__ volatile("wrmsr" : : "c" (msr), "a" (lo), "d" (hi))
#define rdtsc(lo,hi) \
- __asm__ volatile("rdtsc; lfence" : "=a" (lo), "=d" (hi))
+ __asm__ volatile("lfence; rdtsc; lfence" : "=a" (lo), "=d" (hi))
#define write_tsc(lo,hi) wrmsr(0x10, lo, hi)
static inline uint64_t rdtsc64(void)
{
uint64_t ret;
- __asm__ volatile("rdtsc; lfence" : "=A" (ret));
+ __asm__ volatile("lfence; rdtsc; lfence" : "=A" (ret));
+ return ret;
+}
+
+static inline uint64_t rdtscp64(uint32_t *aux)
+{
+ uint64_t ret;
+ __asm__ volatile("rdtscp; mov %%ecx, %1"
+ : "=A" (ret), "=m" (*aux)
+ :
+ : "ecx");
return ret;
}
#define MSR_IA32_BIOS_SIGN_ID 0x08B
+#define MSR_FLEX_RATIO 0x194
+#define MSR_PLATFORM_INFO 0x0ce
+#define MSR_CORE_THREAD_COUNT 0x035
+
#endif /* _I386_PROC_REG_H_ */
void rtclock_intr(x86_saved_state_t *regs);
static uint64_t maxDec; /* longest interval our hardware timer can handle (nsec) */
-/* XXX this should really be in a header somewhere */
-extern clock_timer_func_t rtclock_timer_expire;
-
static void rtc_set_timescale(uint64_t cycles);
static uint64_t rtc_export_speed(uint64_t cycles);
info->numer = info->denom = 1;
}
-void
-clock_set_timer_func(
- clock_timer_func_t func)
-{
- if (rtclock_timer_expire == NULL)
- rtclock_timer_expire = func;
-}
-
/*
* Real-time clock device interrupt.
*/
0: movl RNT_GENERATION(%edi),%esi /* being updated? */ ; \
testl %esi,%esi ; \
jz 0b /* wait until done */ ; \
+ lfence ; \
rdtsc ; \
lfence ; \
subl RNT_TSC_BASE(%edi),%eax ; \
0: movl RNT_GENERATION(%rdi),%esi ; \
test %esi,%esi /* info updating? */ ; \
jz 0b /* - wait if so */ ; \
+ lfence ; \
rdtsc ; \
lfence ; \
shlq $32,%rdx ; \
cpuid_info()->cpuid_family);
}
- {
+ switch (cpuid_info()->cpuid_model) {
+ case CPUID_MODEL_NEHALEM: {
+ uint64_t cpu_mhz;
+ uint64_t msr_flex_ratio;
+ uint64_t msr_platform_info;
+
+ /* See if FLEX_RATIO is being used */
+ msr_flex_ratio = rdmsr64(MSR_FLEX_RATIO);
+ msr_platform_info = rdmsr64(MSR_PLATFORM_INFO);
+ flex_ratio_min = (uint32_t)bitfield(msr_platform_info, 47, 40);
+ flex_ratio_max = (uint32_t)bitfield(msr_platform_info, 15, 8);
+ /* No BIOS-programed flex ratio. Use hardware max as default */
+ tscGranularity = flex_ratio_max;
+ if (msr_flex_ratio & bit(16)) {
+ /* Flex Enabled: Use this MSR if less than max */
+ flex_ratio = (uint32_t)bitfield(msr_flex_ratio, 15, 8);
+ if (flex_ratio < flex_ratio_max)
+ tscGranularity = flex_ratio;
+ }
+
+ /* If EFI isn't configured correctly, use a constant
+ * value. See 6036811.
+ */
+ if (busFreq == 0)
+ busFreq = BASE_NHM_CLOCK_SOURCE;
+
+ cpu_mhz = tscGranularity * BASE_NHM_CLOCK_SOURCE;
+
+ kprintf("[NHM] Maximum Non-Turbo Ratio = [%d]\n",
+ (uint32_t)tscGranularity);
+ kprintf("[NHM] CPU: Frequency = %6d.%04dMhz\n",
+ (uint32_t)(cpu_mhz / Mega), (uint32_t)(cpu_mhz % Mega));
+ break;
+ }
+ default: {
uint64_t prfsts;
prfsts = rdmsr64(IA32_PERF_STS);
tscGranularity = (uint32_t)bitfield(prfsts, 44, 40);
N_by_2_bus_ratio = (prfsts & bit(46)) != 0;
+ }
}
if (busFreq != 0) {
#ifndef _I386_TSC_H_
#define _I386_TSC_H_
+#define BASE_NHM_CLOCK_SOURCE 139806638ULL
#define IA32_PERF_STS 0x198
extern uint64_t busFCvtt2n;
vmx_resume()
{
VMX_KPRINTF("vmx_resume\n");
+ vmx_init(); /* init VMX on CPU #0 */
if (vmx_use_count)
vmx_on();
}
/*
* extern void
* ipc_kmsg_send_always(ipc_kmsg_t);
- *
- * Unfortunately, to avoid warnings/lint about unused variables
- * when assertions are turned off, we need two versions of this.
*/
-#if MACH_ASSERT
-
#define ipc_kmsg_send_always(kmsg) \
-MACRO_BEGIN \
- mach_msg_return_t mr2; \
- \
- mr2 = ipc_kmsg_send((kmsg), MACH_SEND_ALWAYS, \
- MACH_MSG_TIMEOUT_NONE); \
- assert(mr == MACH_MSG_SUCCESS); \
-MACRO_END
-
-#else /* MACH_ASSERT */
-
-#define ipc_kmsg_send_always(kmsg) \
-MACRO_BEGIN \
- (void) ipc_kmsg_send((kmsg), MACH_SEND_ALWAYS, \
- MACH_MSG_TIMEOUT_NONE); \
-MACRO_END
-
-#endif /* MACH_ASSERT */
+ ipc_kmsg_send((kmsg), MACH_SEND_ALWAYS, MACH_MSG_TIMEOUT_NONE)
/* Allocate a kernel message */
imq_lock(mqueue);
if (!imq_full(mqueue) ||
- (option & MACH_SEND_ALWAYS) ||
- (MACH_MSGH_BITS_REMOTE(kmsg->ikm_header->msgh_bits) ==
- MACH_MSG_TYPE_PORT_SEND_ONCE)) {
+ (!imq_full_kernel(mqueue) &&
+ ((option & MACH_SEND_ALWAYS) ||
+ (MACH_MSGH_BITS_REMOTE(kmsg->ikm_header->msgh_bits) ==
+ MACH_MSG_TYPE_PORT_SEND_ONCE)))) {
mqueue->imq_msgcount++;
assert(mqueue->imq_msgcount > 0);
imq_unlock(mqueue);
splx(s);
return MACH_SEND_TIMED_OUT;
}
+ if (imq_full_kernel(mqueue)) {
+ imq_unlock(mqueue);
+ splx(s);
+ return MACH_SEND_NO_BUFFER;
+ }
mqueue->imq_fullwaiters = TRUE;
thread_lock(cur_thread);
if (option & MACH_SEND_TIMEOUT)
#define imq_held(mq) wait_queue_held(&(mq)->imq_wait_queue)
#define imq_full(mq) ((mq)->imq_msgcount >= (mq)->imq_qlimit)
+#define imq_full_kernel(mq) ((mq)->imq_msgcount >= MACH_PORT_QLIMIT_KERNEL)
extern int ipc_mqueue_full;
extern int ipc_mqueue_rcv;
ipc_port_t port,
mach_port_name_t name)
{
- kern_return_t kr;
-
- kr = mach_notify_port_deleted(port, name);
- if (kr != KERN_SUCCESS) {
- printf("dropped port-deleted (%p, 0x%x)\n", port, name);
- ipc_port_release_sonce(port);
- }
+ (void)mach_notify_port_deleted(port, name);
+ /* send-once right consumed */
}
/*
ipc_port_t port,
ipc_port_t right)
{
- kern_return_t kr;
-
- kr = mach_notify_port_destroyed(port, right);
- if (kr != KERN_SUCCESS) {
- printf("dropped port-destroyed (%p, %p)\n",
- port, right);
- ipc_port_release_sonce(port);
- ipc_port_release_receive(right);
- }
+ mach_notify_port_destroyed(port, right);
+ /* send-once and receive rights consumed */
}
/*
ipc_port_t port,
mach_port_mscount_t mscount)
{
- kern_return_t kr;
-
- kr = mach_notify_no_senders(port, mscount);
- if (kr != KERN_SUCCESS) {
- printf("dropped no-senders (%p, %u)\n", port, mscount);
- ipc_port_release_sonce(port);
- }
+ (void)mach_notify_no_senders(port, mscount);
+ /* send-once right consumed */
}
/*
ipc_notify_send_once(
ipc_port_t port)
{
- kern_return_t kr;
-
- kr = mach_notify_send_once(port);
- if (kr != KERN_SUCCESS) {
- printf("dropped send-once (%p)\n", port);
- ipc_port_release_sonce(port);
- }
+ (void)mach_notify_send_once(port);
+ /* send-once right consumed */
}
/*
ipc_port_t port,
mach_port_name_t name)
{
- kern_return_t kr;
-
- kr = mach_notify_dead_name(port, name);
- if (kr != KERN_SUCCESS) {
- printf("dropped dead-name (%p, 0x%x)\n", port, name);
- ipc_port_release_sonce(port);
- }
+ (void)mach_notify_dead_name(port, name);
+ /* send-once right consumed */
}
}
default:
- panic("ipc_right_clean: strange type");
+ panic("ipc_right_clean: strange type - 0x%x", type);
}
}
(reply_port->ip_receiver_name != rcv_name) ||
(reply_port->ip_pset_count != 0))
{
+ /* try to enqueue by sending with an immediate timeout */
ip_unlock(reply_port);
- ipc_kmsg_send_always(kmsg);
+ mr = ipc_kmsg_send(kmsg, MACH_SEND_TIMEOUT, 0);
+ if (mr != MACH_MSG_SUCCESS) {
+ ipc_kmsg_destroy(kmsg);
+ }
HOT(c_mmot_cold_052++);
goto slow_get_rcv_port;
}
* If there are messages on the port
* or other threads waiting for a message,
* we cannot directly receive the reply.
+ * Try to enqueue it by sending with an
+ * immediate timeout.
*/
if (!wait_queue_empty(&rcv_mqueue->imq_wait_queue) ||
(ipc_kmsg_queue_first(&rcv_mqueue->imq_messages) != IKM_NULL))
imq_unlock(rcv_mqueue);
splx(s);
ip_unlock(reply_port);
- ipc_kmsg_send_always(kmsg);
+ mr = ipc_kmsg_send(kmsg, MACH_SEND_TIMEOUT, 0);
+ if (mr != MACH_MSG_SUCCESS) {
+ ipc_kmsg_destroy(kmsg);
+ }
HOT(c_mmot_cold_053++);
goto slow_get_rcv_port;
}
/*
- * Copyright (c) 2000-2008 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
/*
* Check for preemption.
*/
- if (reasons & AST_PREEMPT) {
- processor_t myprocessor = current_processor();
+ if (reasons & AST_PREEMPT)
+ reasons = csw_check(current_processor());
- if (csw_needed(thread, myprocessor))
- reasons = AST_PREEMPT;
- else
- reasons = AST_NONE;
- }
if ( (reasons & AST_PREEMPT) &&
wait_queue_assert_possible(thread) ) {
counter(c_ast_taken_block++);
/*
* Context switch check.
*/
- if ((preempt = csw_check(thread, processor)) != AST_NONE)
+ if ((preempt = csw_check(processor)) != AST_NONE)
ast_on(preempt);
}
}
/*
- * Copyright (c) 1993-1995, 1999-2000 Apple Computer, Inc.
- * All rights reserved.
+ * Copyright (c) 1993-1995, 1999-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
/*
- * Private declarations for thread-based callouts.
- *
- * HISTORY
- *
- * 10 July 1999 (debo)
- * Pulled into Mac OS X (microkernel).
- *
- * 3 July 1993 (debo)
- * Created.
+ * Declarations for generic call outs.
*/
#ifndef _KERN_CALL_ENTRY_H_
typedef struct call_entry {
queue_chain_t q_link;
+ queue_t queue;
call_entry_func_t func;
call_entry_param_t param0;
call_entry_param_t param1;
uint64_t deadline;
- enum {
- IDLE,
- PENDING,
- DELAYED } state;
} call_entry_data_t;
+typedef struct call_entry *call_entry_t;
+
+extern queue_t call_entry_enqueue_deadline(
+ call_entry_t entry,
+ queue_t queue,
+ uint64_t deadline);
+
+extern queue_t call_entry_enqueue_tail(
+ call_entry_t entry,
+ queue_t queue);
+
+extern queue_t call_entry_dequeue(
+ call_entry_t entry);
+
#define call_entry_setup(entry, pfun, p0) \
MACRO_BEGIN \
(entry)->func = (call_entry_func_t)(pfun); \
(entry)->param0 = (call_entry_param_t)(p0); \
- (entry)->state = IDLE; \
+ (entry)->queue = NULL; \
MACRO_END
#endif /* MACH_KERNEL_PRIVATE */
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#ifdef MACH_KERNEL_PRIVATE
+#include <kern/queue.h>
+
/*
* Clock operations list structure. Contains vectors to machine
* dependent clock routines.
*/
extern void clock_service_create(void);
-typedef void (*clock_timer_func_t)(
- uint64_t timestamp);
-
-extern void clock_set_timer_func(
- clock_timer_func_t func);
-
-extern void clock_set_timer_deadline(
- uint64_t deadline);
-
extern void clock_gettimeofday_set_commpage(
uint64_t abstime,
uint64_t epoch,
#include <kern/assert.h>
#include <kern/sched_prim.h>
#include <kern/misc_protos.h>
+#include <kern/clock.h>
#include <vm/vm_kern.h>
#include <vm/pmap.h>
#include <stdarg.h>
unsigned int return_on_panic = 0;
unsigned long panic_caller;
-char debug_buf[PAGE_SIZE];
-ppnum_t debug_buf_page;
-char *debug_buf_ptr;
-unsigned int debug_buf_size;
+#if CONFIG_EMBEDDED
+#define DEBUG_BUF_SIZE (PAGE_SIZE)
+#else
+#define DEBUG_BUF_SIZE (3 * PAGE_SIZE)
+#endif
+
+char debug_buf[DEBUG_BUF_SIZE];
+char *debug_buf_ptr = debug_buf;
+unsigned int debug_buf_size = sizeof(debug_buf);
static char model_name[64];
if (debug_buf_size != 0)
return;
debug_buf_ptr = debug_buf;
- debug_buf_size = PAGE_SIZE;
- debug_buf_page = pmap_find_phys(kernel_pmap,
- (addr64_t)(uintptr_t)debug_buf_ptr);
+ debug_buf_size = sizeof(debug_buf);
}
#if __i386__
kdb_printf("System model name: %s\n", model_name);
}
+static void panic_display_uptime(void) {
+ uint64_t uptime;
+ absolutetime_to_nanoseconds(mach_absolute_time(), &uptime);
+
+ kdb_printf("\nSystem uptime in nanoseconds: %llu\n", uptime);
+}
+
extern const char version[];
extern char osversion[];
(osversion[0] != 0) ? osversion : "Not yet set");
kdb_printf("\nKernel version:\n%s\n",version);
panic_display_model_name();
+ panic_display_uptime();
config_displayed = TRUE;
}
}
+extern zone_t first_zone;
+extern unsigned int num_zones, stack_total;
+
+#if defined(__i386__)
+extern unsigned int inuse_ptepages_count;
+#endif
+
+extern boolean_t panic_include_zprint;
+extern vm_size_t kalloc_large_total;
+
+__private_extern__ void panic_display_zprint()
+{
+ if(panic_include_zprint == TRUE) {
+
+ unsigned int i;
+ struct zone zone_copy;
+
+ if(first_zone!=NULL) {
+ if(ml_nofault_copy((vm_offset_t)first_zone, (vm_offset_t)&zone_copy, sizeof(struct zone)) == sizeof(struct zone)) {
+ for (i = 0; i < num_zones; i++) {
+ if(zone_copy.cur_size > (1024*1024)) {
+ kdb_printf("%.20s:%lu\n",zone_copy.zone_name,(uintptr_t)zone_copy.cur_size);
+ }
+
+ if(zone_copy.next_zone == NULL) {
+ break;
+ }
+
+ if(ml_nofault_copy((vm_offset_t)zone_copy.next_zone, (vm_offset_t)&zone_copy, sizeof(struct zone)) != sizeof(struct zone)) {
+ break;
+ }
+ }
+ }
+ }
+
+ kdb_printf("Kernel Stacks:%lu\n",(uintptr_t)(KERNEL_STACK_SIZE * stack_total));
+#if defined(__i386__)
+ kdb_printf("PageTables:%lu\n",(uintptr_t)(PAGE_SIZE * inuse_ptepages_count));
+#endif
+ kdb_printf("Kalloc.Large:%lu\n",(uintptr_t)kalloc_large_total);
+ }
+}
+
#if !MACH_KDP
static struct ether_addr kdp_current_mac_address = {{0, 0, 0, 0, 0, 0}};
unsigned int not_in_kdp = 1;
void unpackA(char *inbuf, uint32_t length);
void panic_display_system_configuration(void);
+void panic_display_zprint(void);
#endif /* MACH_KERNEL_PRIVATE */
uint64_t start, end, nsec;
vm_page_t m;
uint32_t pages = page_list->page_count;
- uint32_t count_zf = 0, count_throttled = 0, count_inactive = 0, count_active = 0;
+ uint32_t count_zf = 0, count_throttled = 0;
+ uint32_t count_inactive = 0, count_active = 0, count_speculative = 0;
uint32_t count_wire = pages;
uint32_t count_discard_active = 0;
uint32_t count_discard_inactive = 0;
uint32_t count_discard_purgeable = 0;
+ uint32_t count_discard_speculative = 0;
uint32_t i;
uint32_t bank;
hibernate_bitmap_t * bitmap;
queue_iterate( &vm_page_queue_zf,
m,
vm_page_t,
- pageq )
+ pageq )
{
if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
&& consider_discard(m))
hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
}
+ for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
+ {
+ queue_iterate(&vm_page_queue_speculative[i].age_q,
+ m,
+ vm_page_t,
+ pageq)
+ {
+ if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
+ && consider_discard(m))
+ {
+ hibernate_page_bitset(page_list, TRUE, m->phys_page);
+ count_discard_speculative++;
+ }
+ else
+ count_speculative++;
+ count_wire--;
+ hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
+ }
+ }
+
queue_iterate( &vm_page_queue_active,
m,
vm_page_t,
absolutetime_to_nanoseconds(end - start, &nsec);
HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
- HIBLOG("pages %d, wire %d, act %d, inact %d, zf %d, throt %d, could discard act %d inact %d purgeable %d\n",
- pages, count_wire, count_active, count_inactive, count_zf, count_throttled,
- count_discard_active, count_discard_inactive, count_discard_purgeable);
+ HIBLOG("pages %d, wire %d, act %d, inact %d, spec %d, zf %d, throt %d, could discard act %d inact %d purgeable %d spec %d\n",
+ pages, count_wire, count_active, count_inactive, count_speculative, count_zf, count_throttled,
+ count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative);
- *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable;
+ *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative;
}
void
uint64_t start, end, nsec;
vm_page_t m;
vm_page_t next;
+ uint32_t i;
uint32_t count_discard_active = 0;
uint32_t count_discard_inactive = 0;
uint32_t count_discard_purgeable = 0;
+ uint32_t count_discard_speculative = 0;
clock_get_uptime(&start);
m = next;
}
+ for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
+ {
+ m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
+ while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
+ {
+ next = (vm_page_t) m->pageq.next;
+ if (hibernate_page_bittst(page_list, m->phys_page))
+ {
+ count_discard_speculative++;
+ discard_page(m);
+ }
+ m = next;
+ }
+ }
+
m = (vm_page_t) queue_first(&vm_page_queue_inactive);
while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
{
clock_get_uptime(&end);
absolutetime_to_nanoseconds(end - start, &nsec);
- HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d\n",
+ HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d\n",
nsec / 1000000ULL,
- count_discard_active, count_discard_inactive, count_discard_purgeable);
+ count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative);
}
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
case HOST_BASIC_INFO:
{
register host_basic_info_t basic_info;
- register int master_slot;
+ register int master_num;
/*
* Basic information about this host.
basic_info->memory_size = machine_info.memory_size;
basic_info->max_cpus = machine_info.max_cpus;
basic_info->avail_cpus = processor_avail_count;
- master_slot = PROCESSOR_DATA(master_processor, slot_num);
- basic_info->cpu_type = slot_type(master_slot);
- basic_info->cpu_subtype = slot_subtype(master_slot);
+ master_num = master_processor->cpu_num;
+ basic_info->cpu_type = slot_type(master_num);
+ basic_info->cpu_subtype = slot_subtype(master_num);
if (*count >= HOST_BASIC_INFO_COUNT) {
- basic_info->cpu_threadtype = slot_threadtype(master_slot);
+ basic_info->cpu_threadtype = slot_threadtype(master_num);
basic_info->physical_cpu = machine_info.physical_cpu;
basic_info->physical_cpu_max = machine_info.physical_cpu_max;
basic_info->logical_cpu = machine_info.logical_cpu;
* Nothing locked.
* Returns:
* MACH_MSG_SUCCESS Sent the message.
- * MACH_MSG_SEND_NO_BUFFER Destination port had inuse fixed bufer
* MACH_SEND_INVALID_DEST Bad destination port.
+ * MACH_MSG_SEND_NO_BUFFER Destination port had inuse fixed bufer
+ * or destination is above kernel limit
*/
mach_msg_return_t
return mr;
ipc_kmsg_copyin_from_kernel(kmsg);
- ipc_kmsg_send_always(kmsg);
- return MACH_MSG_SUCCESS;
+ mr = ipc_kmsg_send_always(kmsg);
+ if (mr != MACH_MSG_SUCCESS) {
+ ipc_kmsg_destroy(kmsg);
+ }
+
+ return mr;
}
mach_msg_return_t
ipc_kmsg_copyin_from_kernel(kmsg);
mr = ipc_kmsg_send(kmsg, option, timeout_val);
if (mr != MACH_MSG_SUCCESS) {
- ipc_kmsg_free(kmsg);
+ ipc_kmsg_destroy(kmsg);
}
return mr;
ipc_kmsg_copyin_from_kernel(kmsg);
- ipc_kmsg_send_always(kmsg);
+ mr = ipc_kmsg_send_always(kmsg);
+ if (mr != MACH_MSG_SUCCESS) {
+ ipc_kmsg_destroy(kmsg);
+ return mr;
+ }
for (;;) {
ipc_mqueue_t mqueue;
#include <mach/host_priv_server.h>
#include <mach/vm_map.h>
+#include <kern/clock.h>
#include <kern/kalloc.h>
#include <kern/kern_types.h>
#include <kern/thread.h>
#include <mach-o/loader.h>
#include <mach-o/nlist.h>
+#include <mach/kext_panic_report.h>
+
/*
* XXX headers for which prototypes should be in a common include file;
* XXX see libsa/kext.cpp for why.
queue_head_t kmod_cmd_queue;
+/*******************************************************************************
+*******************************************************************************/
+#define KMOD_PANICLIST_SIZE (2 * PAGE_SIZE)
+
+char * unloaded_kext_paniclist = NULL;
+uint32_t unloaded_kext_paniclist_size = 0;
+uint32_t unloaded_kext_paniclist_length = 0;
+uint64_t last_loaded_timestamp = 0;
+
+char * loaded_kext_paniclist = NULL;
+uint32_t loaded_kext_paniclist_size = 0;
+uint32_t loaded_kext_paniclist_length = 0;
+uint64_t last_unloaded_timestamp = 0;
+
+int substitute(
+ const char * scan_string,
+ char * string_out,
+ uint32_t * to_index,
+ uint32_t * from_index,
+ const char * substring,
+ char marker,
+ char substitution);
+
+/* identifier_out must be at least KMOD_MAX_NAME bytes.
+ */
+int substitute(
+ const char * scan_string,
+ char * string_out,
+ uint32_t * to_index,
+ uint32_t * from_index,
+ const char * substring,
+ char marker,
+ char substitution)
+{
+ uint32_t substring_length = strnlen(substring, KMOD_MAX_NAME - 1);
+
+ if (!strncmp(scan_string, substring, substring_length)) {
+ if (marker) {
+ string_out[(*to_index)++] = marker;
+ }
+ string_out[(*to_index)++] = substitution;
+ (*from_index) += substring_length;
+ return 1;
+ }
+ return 0;
+}
+
+void compactIdentifier(
+ const char * identifier,
+ char * identifier_out,
+ char ** identifier_out_end);
+
+void compactIdentifier(
+ const char * identifier,
+ char * identifier_out,
+ char ** identifier_out_end)
+{
+ uint32_t from_index, to_index;
+ uint32_t scan_from_index = 0;
+ uint32_t scan_to_index = 0;
+ subs_entry_t * subs_entry = NULL;
+ int did_sub = 0;
+
+ from_index = to_index = 0;
+ identifier_out[0] = '\0';
+
+ /* Replace certain identifier prefixes with shorter @+character sequences.
+ */
+ for (subs_entry = &kext_identifier_prefix_subs[0];
+ subs_entry->substring && !did_sub;
+ subs_entry++) {
+
+ did_sub = substitute(identifier, identifier_out,
+ &scan_to_index, &scan_from_index,
+ subs_entry->substring, /* marker */ '\0', subs_entry->substitute);
+ }
+ did_sub = 0;
+
+ /* Now scan through the identifier looking for the common substrings
+ * and replacing them with shorter !+character sequences.
+ */
+ for (/* see above */;
+ scan_from_index < KMOD_MAX_NAME - 1 && identifier[scan_from_index];
+ /* see loop */) {
+
+ const char * scan_string = &identifier[scan_from_index];
+
+ did_sub = 0;
+
+ if (scan_from_index) {
+ for (subs_entry = &kext_identifier_substring_subs[0];
+ subs_entry->substring && !did_sub;
+ subs_entry++) {
+
+ did_sub = substitute(scan_string, identifier_out,
+ &scan_to_index, &scan_from_index,
+ subs_entry->substring, '!', subs_entry->substitute);
+ }
+ }
+
+ if (!did_sub) {
+ identifier_out[scan_to_index++] = identifier[scan_from_index++];
+ }
+ }
+
+ identifier_out[scan_to_index] = '\0';
+ if (identifier_out_end) {
+ *identifier_out_end = &identifier_out[scan_to_index];
+ }
+
+ return;
+}
+
+/* identPlusVers must be at least 2*KMOD_MAX_NAME in length.
+ */
+int assemble_identifier_and_version(
+ kmod_info_t * kmod_info,
+ char * identPlusVers);
+int assemble_identifier_and_version(
+ kmod_info_t * kmod_info,
+ char * identPlusVers)
+{
+ int result = 0;
+
+ compactIdentifier(kmod_info->name, identPlusVers, NULL);
+ result = strnlen(identPlusVers, KMOD_MAX_NAME - 1);
+ identPlusVers[result++] = '\t'; // increment for real char
+ identPlusVers[result] = '\0'; // don't increment for nul char
+ result = strlcat(identPlusVers, kmod_info->version, KMOD_MAX_NAME);
+
+ return result;
+}
+
+#define LAST_LOADED " - last loaded "
+#define LAST_LOADED_TS_WIDTH (16)
+
+uint32_t save_loaded_kext_paniclist_typed(
+ const char * prefix,
+ int invertFlag,
+ int libsFlag,
+ char * paniclist,
+ uint32_t list_size,
+ uint32_t * list_length_ptr,
+ int (*printf_func)(const char *fmt, ...));
+uint32_t save_loaded_kext_paniclist_typed(
+ const char * prefix,
+ int invertFlag,
+ int libsFlag,
+ char * paniclist,
+ uint32_t list_size,
+ uint32_t * list_length_ptr,
+ int (*printf_func)(const char *fmt, ...))
+{
+ uint32_t result = 0;
+ int error = 0;
+ kmod_info_t * kmod_info;
+
+ for (kmod_info = kmod;
+ kmod_info && (*list_length_ptr + 1 < list_size);
+ kmod_info = kmod_info->next) {
+
+ int match;
+ char identPlusVers[2*KMOD_MAX_NAME];
+ uint32_t identPlusVersLength;
+ char timestampBuffer[17]; // enough for a uint64_t
+
+ if (!pmap_find_phys(kernel_pmap, (addr64_t)((uintptr_t)kmod_info))) {
+ (*printf_func)("kmod scan stopped due to missing kmod page: %p\n",
+ kmod_info);
+ error = 1;
+ goto finish;
+ }
+
+ /* Skip all built-in/fake entries.
+ */
+ if (!kmod_info->address) {
+ continue;
+ }
+
+ /* Filter for kmod name (bundle identifier).
+ */
+ match = !strncmp(kmod_info->name, prefix, strnlen(prefix, KMOD_MAX_NAME));
+ if ((match && invertFlag) || (!match && !invertFlag)) {
+ continue;
+ }
+
+ /* Filter for libraries. This isn't a strictly correct check,
+ * but any kext that does have references to it has to be a library.
+ * A kext w/o references may or may not be a library.
+ */
+ if ((libsFlag == 0 && kmod_info->reference_count) ||
+ (libsFlag == 1 && !kmod_info->reference_count)) {
+
+ continue;
+ }
+
+ identPlusVersLength = assemble_identifier_and_version(kmod_info,
+ identPlusVers);
+ if (!identPlusVersLength) {
+ printf_func("error saving loaded kext info\n");
+ goto finish;
+ }
+
+ /* We're going to note the last-loaded kext in the list.
+ */
+ if (kmod_info == kmod) {
+ snprintf(timestampBuffer, sizeof(timestampBuffer), "%llu",
+ last_loaded_timestamp);
+ identPlusVersLength += sizeof(LAST_LOADED) - 1 +
+ strnlen(timestampBuffer, sizeof(timestampBuffer));
+ }
+
+ /* Adding 1 for the newline.
+ */
+ if (*list_length_ptr + identPlusVersLength + 1 >= list_size) {
+ goto finish;
+ }
+
+ *list_length_ptr = strlcat(paniclist, identPlusVers, list_size);
+ if (kmod_info == kmod) {
+ *list_length_ptr = strlcat(paniclist, LAST_LOADED, list_size);
+ *list_length_ptr = strlcat(paniclist, timestampBuffer, list_size);
+ }
+ *list_length_ptr = strlcat(paniclist, "\n", list_size);
+ }
+
+finish:
+ if (!error) {
+ if (*list_length_ptr + 1 <= list_size) {
+ result = list_size - (*list_length_ptr + 1);
+ }
+ }
+
+ return result;
+}
+
+void save_loaded_kext_paniclist(
+ int (*printf_func)(const char *fmt, ...));
+
+void save_loaded_kext_paniclist(
+ int (*printf_func)(const char *fmt, ...))
+{
+ char * newlist = NULL;
+ uint32_t newlist_size = 0;
+ uint32_t newlist_length = 0;
+
+ newlist_length = 0;
+ newlist_size = KMOD_PANICLIST_SIZE;
+ newlist = (char *)kalloc(newlist_size);
+
+ if (!newlist) {
+ printf_func("couldn't allocate kext panic log buffer\n");
+ goto finish;
+ }
+
+ newlist[0] = '\0';
+
+ // non-"com.apple." kexts
+ if (!save_loaded_kext_paniclist_typed("com.apple.", /* invert? */ 1,
+ /* libs? */ -1, newlist, newlist_size, &newlist_length,
+ printf_func)) {
+
+ goto finish;
+ }
+ // "com.apple." nonlibrary kexts
+ if (!save_loaded_kext_paniclist_typed("com.apple.", /* invert? */ 0,
+ /* libs? */ 0, newlist, newlist_size, &newlist_length,
+ printf_func)) {
+
+ goto finish;
+ }
+ // "com.apple." library kexts
+ if (!save_loaded_kext_paniclist_typed("com.apple.", /* invert? */ 0,
+ /* libs? */ 1, newlist, newlist_size, &newlist_length,
+ printf_func)) {
+
+ goto finish;
+ }
+
+ if (loaded_kext_paniclist) {
+ kfree(loaded_kext_paniclist, loaded_kext_paniclist_size);
+ }
+ loaded_kext_paniclist = newlist;
+ loaded_kext_paniclist_size = newlist_size;
+ loaded_kext_paniclist_length = newlist_length;
+
+finish:
+ return;
+}
+
+void save_unloaded_kext_paniclist(
+ kmod_info_t * kmod_info,
+ int (*printf_func)(const char *fmt, ...));
+void save_unloaded_kext_paniclist(
+ kmod_info_t * kmod_info,
+ int (*printf_func)(const char *fmt, ...))
+{
+ char * newlist = NULL;
+ uint32_t newlist_size = 0;
+ uint32_t newlist_length = 0;
+ char identPlusVers[2*KMOD_MAX_NAME];
+ uint32_t identPlusVersLength;
+
+ identPlusVersLength = assemble_identifier_and_version(kmod_info,
+ identPlusVers);
+ if (!identPlusVersLength) {
+ printf_func("error saving unloaded kext info\n");
+ goto finish;
+ }
+
+ newlist_length = identPlusVersLength;
+ newlist_size = newlist_length + 1;
+ newlist = (char *)kalloc(newlist_size);
+
+ if (!newlist) {
+ printf_func("couldn't allocate kext panic log buffer\n");
+ goto finish;
+ }
+
+ newlist[0] = '\0';
+
+ strlcpy(newlist, identPlusVers, newlist_size);
+
+ if (unloaded_kext_paniclist) {
+ kfree(unloaded_kext_paniclist, unloaded_kext_paniclist_size);
+ }
+ unloaded_kext_paniclist = newlist;
+ unloaded_kext_paniclist_size = newlist_size;
+ unloaded_kext_paniclist_length = newlist_length;
+
+finish:
+ return;
+}
+
+// proto is in header
+void record_kext_unload(kmod_t kmod_id)
+{
+ kmod_info_t * kmod_info = NULL;
+
+ mutex_lock(kmod_lock);
+
+ kmod_info = kmod_lookupbyid(kmod_id);
+ if (kmod_info) {
+ clock_get_uptime(&last_unloaded_timestamp);
+ save_unloaded_kext_paniclist(kmod_info, &printf);
+ }
+ mutex_unlock(kmod_lock);
+ return;
+}
+
+void dump_kext_info(int (*printf_func)(const char *fmt, ...))
+{
+ printf_func("unloaded kexts:\n");
+ if (unloaded_kext_paniclist && (pmap_find_phys(kernel_pmap, (addr64_t) (uintptr_t) unloaded_kext_paniclist))) {
+ printf_func("%.*s - last unloaded %llu\n",
+ unloaded_kext_paniclist_length, unloaded_kext_paniclist,
+ last_unloaded_timestamp);
+ } else {
+ printf_func("(none)\n");
+ }
+ printf_func("loaded kexts:\n");
+ if (loaded_kext_paniclist && (pmap_find_phys(kernel_pmap, (addr64_t) (uintptr_t) loaded_kext_paniclist)) && loaded_kext_paniclist[0]) {
+ printf_func("%.*s", loaded_kext_paniclist_length, loaded_kext_paniclist);
+ } else {
+ printf_func("(none)\n");
+ }
+ return;
+}
+
+/*******************************************************************************
+*******************************************************************************/
void
kmod_init(void)
{
int kmod_lookupidbyaddress_locked(vm_address_t addr)
{
kmod_info_t *k = 0;
-
+
mutex_lock(kmod_queue_lock);
k = kmod;
- if(NULL != k) {
- while (k) {
- if ((k->address <= addr) && ((k->address + k->size) > addr)) {
- break;
- }
- k = k->next;
- }
- mutex_unlock(kmod_queue_lock);
- } else {
- mutex_unlock(kmod_queue_lock);
- return -1;
- }
-
- if(NULL == k) {
- return -1;
- } else {
- return k->id;
- }
+ if(NULL != k) {
+ while (k) {
+ if ((k->address <= addr) && ((k->address + k->size) > addr)) {
+ break;
+ }
+ k = k->next;
+ }
+ mutex_unlock(kmod_queue_lock);
+ } else {
+ mutex_unlock(kmod_queue_lock);
+ return -1;
+ }
+
+ if(NULL == k) {
+ return -1;
+ } else {
+ return k->id;
+ }
}
kmod_info_t *
*id = info->id;
+ clock_get_uptime(&last_loaded_timestamp);
+ save_loaded_kext_paniclist(&printf);
+
mutex_unlock(kmod_lock);
#if DEBUG
k = k->next;
}
+ if (!fake) {
+ save_loaded_kext_paniclist(&printf);
+ }
+
mutex_unlock(kmod_lock);
return KERN_INVALID_ARGUMENT;
pset = processor->processor_set;
pset_lock(pset);
if (++pset->processor_count == 1)
- pset->low_pri = processor;
+ pset->low_pri = pset->low_count = processor;
enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
processor->state = PROCESSOR_RUNNING;
(void)hw_atomic_add(&processor_avail_count, 1);
return (KERN_SUCCESS);
}
- if (processor->state == PROCESSOR_IDLE) {
+ if (processor->state == PROCESSOR_IDLE)
remqueue(&pset->idle_queue, (queue_entry_t)processor);
- pset->idle_count--;
- }
else
if (processor->state == PROCESSOR_RUNNING)
remqueue(&pset->active_queue, (queue_entry_t)processor);
- else
- panic("processor_shutdown");
processor->state = PROCESSOR_SHUTDOWN;
processor_doshutdown(processor);
splx(s);
- cpu_exit_wait(PROCESSOR_DATA(processor, slot_num));
+ cpu_exit_wait(processor->cpu_num);
return (KERN_SUCCESS);
}
old_thread = machine_processor_shutdown(self, processor_offline, processor);
thread_dispatch(old_thread, self);
-
- /*
- * If we just shutdown another processor, move any
- * threads and timer call outs to the current processor.
- */
- if (processor != current_processor()) {
- processor_set_t pset = processor->processor_set;
-
- pset_lock(pset);
-
- if (processor->state == PROCESSOR_OFF_LINE || processor->state == PROCESSOR_SHUTDOWN) {
- timer_call_shutdown(processor);
- processor_queue_shutdown(processor);
- return;
- }
-
- pset_unlock(pset);
- }
}
/*
thread_dispatch(old_thread, new_thread);
- PMAP_DEACTIVATE_KERNEL(PROCESSOR_DATA(processor, slot_num));
+ PMAP_DEACTIVATE_KERNEL(processor->cpu_num);
pset = processor->processor_set;
pset_lock(pset);
processor->state = PROCESSOR_OFF_LINE;
if (--pset->processor_count == 0)
- pset->low_pri = PROCESSOR_NULL;
+ pset->low_pri = pset->low_count = PROCESSOR_NULL;
(void)hw_atomic_sub(&processor_avail_count, 1);
- pset_unlock(pset);
+ processor_queue_shutdown(processor);
+ /* pset lock dropped */
+
ml_cpu_down();
cpu_sleep();
extern int kdb_printf(const char *format, ...) __printflike(1,2);
+extern int kdb_log(const char *format, ...) __printflike(1,2);
+
extern void printf_init(void);
extern int snprintf(char *, size_t, const char *, ...) __printflike(3,4);
extern void consdebug_putc(char);
+extern void consdebug_log(char);
+
extern void cnputc(char);
extern int cngetc(void);
PE_kputc(c);
}
+
+void
+consdebug_log(char c)
+{
+ debug_putc(c);
+}
+
int
kdb_printf(const char *fmt, ...)
{
return 0;
}
+int
+kdb_log(const char *fmt, ...)
+{
+ va_list listp;
+
+ va_start(listp, fmt);
+ _doprnt(fmt, &listp, consdebug_log, 16);
+ va_end(listp);
+ return 0;
+}
+
static void
copybyte(int c, void *arg)
{
/*
* Context switch check.
*/
- if ((preempt = csw_check(thread, processor)) != AST_NONE)
+ if ((preempt = csw_check(processor)) != AST_NONE)
ast_on(preempt);
else {
processor_set_t pset = processor->processor_set;
pset_lock(pset);
pset_pri_hint(pset, processor, processor->current_pri);
+ pset_count_hint(pset, processor, processor->runq.count);
pset_unlock(pset);
}
/*
* Initialize the given processor for the cpu
- * indicated by slot_num, and assign to the
+ * indicated by cpu_num, and assign to the
* specified processor set.
*/
void
processor_init(
- processor_t p,
- int slot_num,
- processor_set_t pset)
+ processor_t processor,
+ int cpu_num,
+ processor_set_t pset)
{
- run_queue_init(&p->runq);
-
- p->state = PROCESSOR_OFF_LINE;
- p->active_thread = p->next_thread = p->idle_thread = THREAD_NULL;
- p->processor_set = pset;
- p->current_pri = MINPRI;
- timer_call_setup(&p->quantum_timer, thread_quantum_expire, p);
- p->deadline = UINT64_MAX;
- p->timeslice = 0;
- p->processor_self = IP_NULL;
- simple_lock_init(&p->lock, 0);
- processor_data_init(p);
- PROCESSOR_DATA(p, slot_num) = slot_num;
- p->processor_list = NULL;
+ run_queue_init(&processor->runq);
+
+ processor->state = PROCESSOR_OFF_LINE;
+ processor->active_thread = processor->next_thread = processor->idle_thread = THREAD_NULL;
+ processor->processor_set = pset;
+ processor->current_pri = MINPRI;
+ processor->cpu_num = cpu_num;
+ timer_call_setup(&processor->quantum_timer, thread_quantum_expire, processor);
+ processor->deadline = UINT64_MAX;
+ processor->timeslice = 0;
+ processor->processor_self = IP_NULL;
+ simple_lock_init(&processor->lock, 0);
+ processor_data_init(processor);
+ processor->processor_list = NULL;
simple_lock(&processor_list_lock);
if (processor_list == NULL)
- processor_list = p;
+ processor_list = processor;
else
- processor_list_tail->processor_list = p;
- processor_list_tail = p;
+ processor_list_tail->processor_list = processor;
+ processor_list_tail = processor;
processor_count++;
simple_unlock(&processor_list_lock);
}
{
queue_init(&pset->active_queue);
queue_init(&pset->idle_queue);
- pset->idle_count = 0;
pset->processor_count = 0;
- pset->low_pri = PROCESSOR_NULL;
+ pset->low_pri = pset->low_count = PROCESSOR_NULL;
pset_lock_init(pset);
pset->pset_self = IP_NULL;
pset->pset_name_self = IP_NULL;
processor_info_t info,
mach_msg_type_number_t *count)
{
- register int slot_num, state;
+ register int cpu_num, state;
kern_return_t result;
if (processor == PROCESSOR_NULL)
return (KERN_INVALID_ARGUMENT);
- slot_num = PROCESSOR_DATA(processor, slot_num);
+ cpu_num = processor->cpu_num;
switch (flavor) {
return (KERN_FAILURE);
basic_info = (processor_basic_info_t) info;
- basic_info->cpu_type = slot_type(slot_num);
- basic_info->cpu_subtype = slot_subtype(slot_num);
+ basic_info->cpu_type = slot_type(cpu_num);
+ basic_info->cpu_subtype = slot_subtype(cpu_num);
state = processor->state;
if (state == PROCESSOR_OFF_LINE)
basic_info->running = FALSE;
else
basic_info->running = TRUE;
- basic_info->slot_num = slot_num;
+ basic_info->slot_num = cpu_num;
if (processor == master_processor)
basic_info->is_master = TRUE;
else
}
default:
- result = cpu_info(flavor, slot_num, info, count);
+ result = cpu_info(flavor, cpu_num, info, count);
if (result == KERN_SUCCESS)
*host = &realhost;
prev = thread_bind(processor);
thread_block(THREAD_CONTINUE_NULL);
- result = cpu_start(PROCESSOR_DATA(processor, slot_num));
+ result = cpu_start(processor->cpu_num);
thread_bind(prev);
if (processor->processor_self == IP_NULL)
ipc_processor_init(processor);
- result = cpu_start(PROCESSOR_DATA(processor, slot_num));
+ result = cpu_start(processor->cpu_num);
if (result != KERN_SUCCESS) {
s = splsched();
pset_lock(pset);
processor->state = PROCESSOR_OFF_LINE;
- timer_call_shutdown(processor);
pset_unlock(pset);
splx(s);
if (processor == PROCESSOR_NULL)
return(KERN_INVALID_ARGUMENT);
- return(cpu_control(PROCESSOR_DATA(processor, slot_num), info, count));
+ return(cpu_control(processor->cpu_num, info, count));
}
kern_return_t
struct processor_set {
queue_head_t active_queue; /* active processors */
queue_head_t idle_queue; /* idle processors */
- int idle_count;
- processor_t low_pri;
+ processor_t low_pri, low_count;
int processor_count;
processor_set_t processor_set; /* assigned set */
int current_pri; /* priority of current thread */
+ int cpu_num; /* platform numeric id */
timer_call_data_t quantum_timer; /* timer for quantum expiration */
uint64_t quantum_end; /* time when current quantum ends */
extern unsigned int processor_count;
decl_simple_lock_data(extern,processor_list_lock)
-extern processor_t master_processor;
+extern uint32_t processor_avail_count;
+
+extern processor_t master_processor;
/*
* Processor state is accessed by locking the scheduling lock
#define PROCESSOR_OFF_LINE 0 /* Not available */
#define PROCESSOR_SHUTDOWN 1 /* Going off-line */
#define PROCESSOR_START 2 /* Being started */
-#define PROCESSOR_IDLE 3 /* Idle */
-#define PROCESSOR_DISPATCHING 4 /* Dispatching (idle -> running) */
-#define PROCESSOR_RUNNING 5 /* Normal execution */
+#define PROCESSOR_INACTIVE 3 /* Inactive (unavailable) */
+#define PROCESSOR_IDLE 4 /* Idle (available) */
+#define PROCESSOR_DISPATCHING 5 /* Dispatching (idle -> active) */
+#define PROCESSOR_RUNNING 6 /* Normal execution */
extern processor_t current_processor(void);
if ((p) != (ps)->low_pri) { \
if ((pri) < (ps)->low_pri->current_pri) \
(ps)->low_pri = (p); \
+ else \
+ if ((ps)->low_pri->state < PROCESSOR_IDLE) \
+ (ps)->low_pri = (p); \
+ } \
+MACRO_END
+
+#define pset_count_hint(ps, p, cnt) \
+MACRO_BEGIN \
+ if ((p) != (ps)->low_count) { \
+ if ((cnt) < (ps)->low_count->runq.count) \
+ (ps)->low_count = (p); \
+ else \
+ if ((ps)->low_count->state < PROCESSOR_IDLE) \
+ (ps)->low_count = (p); \
} \
MACRO_END
extern void processor_init(
processor_t processor,
- int slot_num,
+ int cpu_num,
processor_set_t processor_set) __attribute__((section("__TEXT, initcode")));
extern kern_return_t processor_shutdown(
#define pset_deallocate(x)
#define pset_reference(x)
+extern void machine_run_count(
+ uint32_t count);
+
+extern boolean_t machine_cpu_is_inactive(
+ int num);
+
#else /* MACH_KERNEL_PRIVATE */
__BEGIN_DECLS
#endif /* MACH_KERNEL_PRIVATE */
-#ifdef XNU_KERNEL_PRIVATE
-
-extern uint32_t processor_avail_count;
-
-#endif
#endif /* _KERN_PROCESSOR_H_ */
/*
- * Copyright (c) 2003-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
timer_init(&PROCESSOR_DATA(processor, idle_state));
timer_init(&PROCESSOR_DATA(processor, system_state));
timer_init(&PROCESSOR_DATA(processor, user_state));
-
- queue_init(&PROCESSOR_DATA(processor, timer_call_queue));
}
/*
- * Copyright (c) 2003-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
unsigned int count;
} stack_cache;
- /* Pending timer callouts */
- queue_head_t timer_call_queue;
-
/* VM event counters */
vm_statistics_data_t vm_stat;
unsigned int avail;
} ikm_cache;
- int slot_num;
-
unsigned long page_grab_count;
int start_color;
void *free_pages;
/*
- * Copyright (c) 2000-2007 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
(thread)->realtime.computation: std_quantum; \
MACRO_END
-/* Invoked at splsched by a thread on itself */
-#define csw_needed(thread, processor) ( \
- ((thread)->state & TH_SUSP) || \
- (first_timeslice(processor)? \
- ((processor)->runq.highq > (thread)->sched_pri || \
- rt_runq.highq > (thread)->sched_pri) : \
- ((processor)->runq.highq >= (thread)->sched_pri || \
- rt_runq.highq >= (thread)->sched_pri)) )
-
extern struct run_queue rt_runq;
/*
timer_call_param_t processor,
timer_call_param_t thread);
-/* Called at splsched by a thread on itself */
-extern ast_t csw_check(
- thread_t thread,
- processor_t processor);
+/* Context switch check for current processor */
+extern ast_t csw_check(processor_t processor);
extern uint32_t std_quantum, min_std_quantum;
extern uint32_t std_quantum_us;
extern uint64_t max_poll_computation;
#define sched_run_incr() \
- (void)hw_atomic_add(&sched_run_count, 1)
+MACRO_BEGIN \
+ machine_run_count(hw_atomic_add(&sched_run_count, 1)); \
+MACRO_END
#define sched_run_decr() \
- (void)hw_atomic_sub(&sched_run_count, 1)
+MACRO_BEGIN \
+ machine_run_count(hw_atomic_sub(&sched_run_count, 1)); \
+MACRO_END
#define sched_share_incr() \
- (void)hw_atomic_add(&sched_share_count, 1)
+MACRO_BEGIN \
+ (void)hw_atomic_add(&sched_share_count, 1); \
+MACRO_END
#define sched_share_decr() \
- (void)hw_atomic_sub(&sched_share_count, 1)
+MACRO_BEGIN \
+ (void)hw_atomic_sub(&sched_share_count, 1); \
+MACRO_END
/*
* thread_timer_delta macro takes care of both thread timers.
/*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
static void load_shift_init(void) __attribute__((section("__TEXT, initcode")));
static void preempt_pri_init(void) __attribute__((section("__TEXT, initcode")));
+static thread_t run_queue_dequeue(
+ run_queue_t runq,
+ integer_t options);
+
static thread_t thread_select_idle(
thread_t thread,
processor_t processor);
thread_t thread,
processor_t processor);
-static thread_t choose_thread(
- processor_t processor);
-
static thread_t steal_thread(
processor_set_t pset);
{
processor_set_t pset = processor->processor_set;
thread_t new_thread = THREAD_NULL;
- boolean_t other_runnable;
+ boolean_t other_runnable, inactive_state;
do {
/*
pset_lock(pset);
+ inactive_state = processor->state != PROCESSOR_SHUTDOWN && machine_cpu_is_inactive(processor->cpu_num);
+
simple_lock(&rt_lock);
/*
return (thread);
}
- if ( (!other_runnable ||
+ if (!inactive_state &&
+ (!other_runnable ||
(processor->runq.highq < thread->sched_pri &&
rt_runq.highq < thread->sched_pri)) ) {
pset_pri_hint(pset, processor, processor->current_pri);
+ pset_count_hint(pset, processor, processor->runq.count);
+
processor->deadline = UINT64_MAX;
pset_unlock(pset);
}
}
- if (other_runnable)
- return choose_thread(processor);
+ if (other_runnable) {
+ if (processor->runq.count > 0 && processor->runq.highq >= rt_runq.highq) {
+ simple_unlock(&rt_lock);
+
+ thread = run_queue_dequeue(&processor->runq, SCHED_HEADQ);
+
+ if (!inactive_state) {
+ pset_pri_hint(pset, processor, thread->sched_pri);
+
+ pset_count_hint(pset, processor, processor->runq.count);
+ }
+
+ processor->deadline = UINT64_MAX;
+ pset_unlock(pset);
+
+ return (thread);
+ }
+
+ thread = run_queue_dequeue(&rt_runq, SCHED_HEADQ);
+ simple_unlock(&rt_lock);
+
+ processor->deadline = thread->realtime.deadline;
+ pset_unlock(pset);
+
+ return (thread);
+ }
simple_unlock(&rt_lock);
+ processor->deadline = UINT64_MAX;
+
+ if (inactive_state) {
+ if (processor->state == PROCESSOR_RUNNING)
+ remqueue(&pset->active_queue, (queue_entry_t)processor);
+ else
+ if (processor->state == PROCESSOR_IDLE)
+ remqueue(&pset->idle_queue, (queue_entry_t)processor);
+
+ processor->state = PROCESSOR_INACTIVE;
+
+ pset_unlock(pset);
+
+ return (processor->idle_thread);
+ }
+
/*
* No runnable threads, attempt to steal
* from other processors.
processor->state = PROCESSOR_IDLE;
enqueue_head(&pset->idle_queue, (queue_entry_t)processor);
- pset->low_pri = processor;
- pset->idle_count++;
+ pset->low_pri = pset->low_count = processor;
}
- processor->deadline = UINT64_MAX;
-
pset_unlock(pset);
/*
*/
if (processor->state == PROCESSOR_IDLE) {
remqueue(&pset->idle_queue, (queue_entry_t)processor);
- pset->idle_count--;
enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
processor->next_thread = thread;
*/
if (processor->state == PROCESSOR_IDLE) {
remqueue(&pset->idle_queue, (queue_entry_t)processor);
- pset->idle_count--;
enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
processor->next_thread = thread;
if (testbit(thread->sched_pri, sched_preempt_pri))
preempt = (AST_PREEMPT | AST_URGENT);
else
- if (thread->sched_mode & TH_MODE_TIMESHARE && thread->priority < BASEPRI_BACKGROUND)
+ if (thread->sched_mode & TH_MODE_TIMESHARE && thread->sched_pri < thread->priority)
preempt = AST_NONE;
else
preempt = (options & SCHED_PREEMPT)? AST_PREEMPT: AST_NONE;
if (preempt != AST_NONE) {
if (processor == current_processor()) {
- thread_t self = processor->active_thread;
-
- if (csw_needed(self, processor))
+ if (csw_check(processor) != AST_NONE)
ast_on(preempt);
}
else
* Prefer the last processor, when appropriate.
*/
if (processor != PROCESSOR_NULL) {
- if (processor->processor_set != pset ||
+ if (processor->processor_set != pset || processor->state == PROCESSOR_INACTIVE ||
processor->state == PROCESSOR_SHUTDOWN || processor->state == PROCESSOR_OFF_LINE)
processor = PROCESSOR_NULL;
else
- if (processor->state == PROCESSOR_IDLE || processor->current_pri < thread->sched_pri)
+ if (processor->state == PROCESSOR_IDLE || ( thread->sched_pri > BASEPRI_DEFAULT && processor->current_pri < thread->sched_pri))
return (processor);
}
}
else {
/*
- * Check the low hint processor in the processor set if available.
+ * Check any hinted processors in the processor set if available.
*/
- if (cset->low_pri != PROCESSOR_NULL &&
- cset->low_pri->state != PROCESSOR_SHUTDOWN && cset->low_pri->state != PROCESSOR_OFF_LINE) {
- if (processor == PROCESSOR_NULL || cset->low_pri->current_pri < thread->sched_pri)
- processor = cset->low_pri;
+ if (cset->low_pri != PROCESSOR_NULL && cset->low_pri->state != PROCESSOR_INACTIVE &&
+ cset->low_pri->state != PROCESSOR_SHUTDOWN && cset->low_pri->state != PROCESSOR_OFF_LINE &&
+ (processor == PROCESSOR_NULL ||
+ (thread->sched_pri > BASEPRI_DEFAULT && cset->low_pri->current_pri < thread->sched_pri))) {
+ processor = cset->low_pri;
+ }
+ else
+ if (cset->low_count != PROCESSOR_NULL && cset->low_count->state != PROCESSOR_INACTIVE &&
+ cset->low_count->state != PROCESSOR_SHUTDOWN && cset->low_count->state != PROCESSOR_OFF_LINE &&
+ (processor == PROCESSOR_NULL ||
+ ( thread->sched_pri <= BASEPRI_DEFAULT && cset->low_count->runq.count < processor->runq.count))) {
+ processor = cset->low_count;
}
/*
do {
/*
* If we haven't been able to choose a processor,
- * pick the current one and return it.
+ * pick the boot processor and return it.
*/
if (processor == PROCESSOR_NULL) {
- processor = current_processor();
+ processor = master_processor;
/*
* Check that the correct processor set is
/*
* We must verify that the chosen processor is still available.
*/
- if (processor->state == PROCESSOR_SHUTDOWN || processor->state == PROCESSOR_OFF_LINE)
+ if (processor->state == PROCESSOR_INACTIVE ||
+ processor->state == PROCESSOR_SHUTDOWN || processor->state == PROCESSOR_OFF_LINE)
processor = PROCESSOR_NULL;
} while (processor == PROCESSOR_NULL);
/*
* processor_queue_shutdown:
*
- * Shutdown a processor run queue by moving
- * non-bound threads to the current processor.
+ * Shutdown a processor run queue by
+ * re-dispatching non-bound threads.
*
* Associated pset must be locked, and is
* returned unlocked.
pset_unlock(pset);
- processor = current_processor();
- pset = processor->processor_set;
-
while ((thread = (thread_t)dequeue_head(&tqueue)) != THREAD_NULL) {
thread_lock(thread);
- thread->last_processor = PROCESSOR_NULL;
- pset_lock(pset);
-
- processor_enqueue(processor, thread, SCHED_TAILQ);
-
- pset_unlock(pset);
+ thread_setrun(thread, SCHED_TAILQ);
thread_unlock(thread);
}
}
/*
- * Check for a possible preemption point in
- * the (current) thread.
+ * Check for a preemption point in
+ * the current context.
*
* Called at splsched.
*/
ast_t
csw_check(
- thread_t thread,
processor_t processor)
{
- int current_pri = thread->sched_pri;
ast_t result = AST_NONE;
run_queue_t runq;
if (runq->highq >= BASEPRI_RTQUEUES)
return (AST_PREEMPT | AST_URGENT);
- if (runq->highq > current_pri) {
+ if (runq->highq > processor->current_pri) {
if (runq->urgency > 0)
return (AST_PREEMPT | AST_URGENT);
}
runq = &processor->runq;
- if (runq->highq > current_pri) {
+ if (runq->highq > processor->current_pri) {
if (runq->urgency > 0)
return (AST_PREEMPT | AST_URGENT);
}
else {
runq = &rt_runq;
- if (runq->highq >= current_pri) {
+ if (runq->highq >= processor->current_pri) {
if (runq->urgency > 0)
return (AST_PREEMPT | AST_URGENT);
}
runq = &processor->runq;
- if (runq->highq >= current_pri) {
+ if (runq->highq >= processor->current_pri) {
if (runq->urgency > 0)
return (AST_PREEMPT | AST_URGENT);
if (result != AST_NONE)
return (result);
- if (thread->state & TH_SUSP)
- result |= AST_PREEMPT;
+ if (machine_cpu_is_inactive(processor->cpu_num))
+ return (AST_PREEMPT);
- return (result);
+ if (processor->active_thread->state & TH_SUSP)
+ return (AST_PREEMPT);
+
+ return (AST_NONE);
}
/*
processor_t processor = thread->last_processor;
if (thread == current_thread()) {
- ast_t preempt = csw_check(thread, processor);
+ ast_t preempt;
- if (preempt != AST_NONE)
- ast_on(preempt);
processor->current_pri = priority;
+ if ((preempt = csw_check(processor)) != AST_NONE)
+ ast_on(preempt);
}
else
if ( processor != PROCESSOR_NULL &&
return (processor != PROCESSOR_NULL);
}
-/*
- * choose_thread:
- *
- * Choose a thread to execute from the run queues
- * and return it.
- *
- * Called with pset scheduling lock and rt lock held,
- * released on return.
- */
-static thread_t
-choose_thread(
- processor_t processor)
-{
- processor_set_t pset = processor->processor_set;
- thread_t thread;
-
- if (processor->runq.count > 0 && processor->runq.highq >= rt_runq.highq) {
- simple_unlock(&rt_lock);
-
- thread = run_queue_dequeue(&processor->runq, SCHED_HEADQ);
-
- pset_pri_hint(pset, processor, thread->sched_pri);
-
- processor->deadline = UINT64_MAX;
- pset_unlock(pset);
-
- return (thread);
- }
-
- thread = run_queue_dequeue(&rt_runq, SCHED_HEADQ);
- simple_unlock(&rt_lock);
-
- processor->deadline = thread->realtime.deadline;
- pset_unlock(pset);
-
- return (thread);
-}
-
/*
* steal_processor_thread:
*
remqueue(&cset->active_queue, (queue_entry_t)processor);
enqueue_tail(&cset->active_queue, (queue_entry_t)processor);
- processor->deadline = UINT64_MAX;
pset_unlock(cset);
return (thread);
machine_idle();
(void)splsched();
+
+ if (processor->state == PROCESSOR_INACTIVE && !machine_cpu_is_inactive(processor->cpu_num))
+ break;
}
timer_switch(&PROCESSOR_DATA(processor, idle_state),
else
if (state == PROCESSOR_IDLE) {
remqueue(&pset->idle_queue, (queue_entry_t)processor);
- pset->idle_count--;
processor->state = PROCESSOR_RUNNING;
enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
}
else
+ if (state == PROCESSOR_INACTIVE) {
+ processor->state = PROCESSOR_RUNNING;
+ enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
+ }
+ else
if (state == PROCESSOR_SHUTDOWN) {
/*
* Going off-line. Force a
static vm_offset_t stack_free_list;
static unsigned int stack_free_count, stack_free_hiwat; /* free list count */
-static unsigned int stack_total, stack_hiwat; /* current total count */
+static unsigned int stack_hiwat;
+unsigned int stack_total; /* current total count */
static unsigned int stack_free_target;
static int stack_free_delta;
load_context_kprintf("calling processor_up\n");
processor_up(processor);
- PMAP_ACTIVATE_KERNEL(PROCESSOR_DATA(processor, slot_num));
+ PMAP_ACTIVATE_KERNEL(processor->cpu_num);
/*
* Acquire a stack if none attached. The panic
timer_start(&PROCESSOR_DATA(processor, system_state), processor->last_dispatch);
PROCESSOR_DATA(processor, current_state) = &PROCESSOR_DATA(processor, system_state);
- PMAP_ACTIVATE_USER(thread, PROCESSOR_DATA(processor, slot_num));
+ PMAP_ACTIVATE_USER(thread, processor->cpu_num);
load_context_kprintf("calling machine_load_context\n");
machine_load_context(thread);
/*
- * Copyright (c) 2000-2007 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
self->depress_timer_active++;
thread_unlock(self);
- if ((preempt = csw_check(self, myprocessor)) != AST_NONE)
+ if ((preempt = csw_check(myprocessor)) != AST_NONE)
ast_on(preempt);
}
}
/*
- * Copyright (c) 1993-1995, 1999-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 1993-1995, 1999-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <mach/thread_act.h>
#include <kern/kern_types.h>
-#include <kern/kalloc.h>
+#include <kern/zalloc.h>
#include <kern/sched_prim.h>
#include <kern/clock.h>
#include <kern/task.h>
#include <sys/kdebug.h>
-#define internal_call_num 768
+decl_simple_lock_data(static,thread_call_lock)
-#define thread_call_thread_min 4
+static zone_t thread_call_zone;
-static
-thread_call_data_t
- internal_call_storage[internal_call_num];
+struct thread_call_group {
+ queue_head_t pending_queue;
+ uint32_t pending_count;
-decl_simple_lock_data(static,thread_call_lock)
+ queue_head_t delayed_queue;
-static
-timer_call_data_t
- thread_call_delaytimer;
+ timer_call_data_t delayed_timer;
-static
-queue_head_t
- thread_call_xxx_queue,
- thread_call_pending_queue, thread_call_delayed_queue;
+ struct wait_queue idle_wqueue;
+ uint32_t idle_count, active_count;
+};
-static
-struct wait_queue
- call_thread_waitqueue;
+typedef struct thread_call_group *thread_call_group_t;
-static
-boolean_t
- activate_thread_awake;
-
-static struct {
- int pending_num,
- pending_hiwat;
- int active_num,
- active_hiwat,
- active_lowat;
- int delayed_num,
- delayed_hiwat;
- int idle_thread_num;
- int thread_num,
- thread_hiwat,
- thread_lowat;
-} thread_call_vars;
+static struct thread_call_group thread_call_group0;
-static __inline__ thread_call_t
- _internal_call_allocate(void);
+static boolean_t thread_call_daemon_awake;
-static __inline__ void
-_internal_call_release(
- thread_call_t call
-);
+#define thread_call_thread_min 4
-static __inline__ void
-_pending_call_enqueue(
- thread_call_t call
-),
-_pending_call_dequeue(
- thread_call_t call
-),
-_delayed_call_enqueue(
- thread_call_t call
-),
-_delayed_call_dequeue(
- thread_call_t call
-);
+#define internal_call_count 768
-static __inline__ void
-_set_delayed_call_timer(
- thread_call_t call
-);
-
-static boolean_t
-_remove_from_pending_queue(
- thread_call_func_t func,
- thread_call_param_t param0,
- boolean_t remove_all
-),
-_remove_from_delayed_queue(
- thread_call_func_t func,
- thread_call_param_t param0,
- boolean_t remove_all
-);
+static thread_call_data_t internal_call_storage[internal_call_count];
+static queue_head_t thread_call_internal_queue;
-static inline void
- _call_thread_wake(void);
+static __inline__ thread_call_t _internal_call_allocate(void);
-static void
- _call_thread(void),
- _activate_thread(void);
+static __inline__ void _internal_call_release(
+ thread_call_t call);
-static void
-_delayed_call_timer(
- timer_call_param_t p0,
- timer_call_param_t p1
-);
+static __inline__ boolean_t _pending_call_enqueue(
+ thread_call_t call,
+ thread_call_group_t group),
+ _delayed_call_enqueue(
+ thread_call_t call,
+ thread_call_group_t group,
+ uint64_t deadline),
+ _call_dequeue(
+ thread_call_t call,
+ thread_call_group_t group);
+
+static __inline__ void thread_call_wake(
+ thread_call_group_t group);
+
+static __inline__ void _set_delayed_call_timer(
+ thread_call_t call,
+ thread_call_group_t group);
+
+static boolean_t _remove_from_pending_queue(
+ thread_call_func_t func,
+ thread_call_param_t param0,
+ boolean_t remove_all),
+ _remove_from_delayed_queue(
+ thread_call_func_t func,
+ thread_call_param_t param0,
+ boolean_t remove_all);
+
+static void thread_call_daemon(
+ thread_call_group_t group),
+ thread_call_thread(
+ thread_call_group_t group);
+
+static void thread_call_delayed_timer(
+ timer_call_param_t p0,
+ timer_call_param_t p1);
#define qe(x) ((queue_entry_t)(x))
#define TC(x) ((thread_call_t)(x))
/*
- * Routine: thread_call_initialize [public]
- *
- * Description: Initialize this module, called
- * early during system initialization.
+ * thread_call_initialize:
*
- * Preconditions: None.
- *
- * Postconditions: None.
+ * Initialize this module, called
+ * early during system initialization.
*/
-
void
thread_call_initialize(void)
{
- kern_return_t result;
- thread_t thread;
- thread_call_t call;
- spl_t s;
+ thread_call_t call;
+ thread_call_group_t group = &thread_call_group0;
+ kern_return_t result;
+ thread_t thread;
+ int i;
+ spl_t s;
+
+ i = sizeof (thread_call_data_t);
+ thread_call_zone = zinit(i, 4096 * i, 16 * i, "thread_call");
simple_lock_init(&thread_call_lock, 0);
s = splsched();
simple_lock(&thread_call_lock);
- queue_init(&thread_call_pending_queue);
- queue_init(&thread_call_delayed_queue);
+ queue_init(&group->pending_queue);
+ queue_init(&group->delayed_queue);
+
+ timer_call_setup(&group->delayed_timer, thread_call_delayed_timer, group);
- queue_init(&thread_call_xxx_queue);
+ wait_queue_init(&group->idle_wqueue, SYNC_POLICY_FIFO);
+
+ queue_init(&thread_call_internal_queue);
for (
call = internal_call_storage;
- call < &internal_call_storage[internal_call_num];
+ call < &internal_call_storage[internal_call_count];
call++) {
- enqueue_tail(&thread_call_xxx_queue, qe(call));
+ enqueue_tail(&thread_call_internal_queue, qe(call));
}
- timer_call_setup(&thread_call_delaytimer, _delayed_call_timer, NULL);
-
- wait_queue_init(&call_thread_waitqueue, SYNC_POLICY_FIFO);
- thread_call_vars.thread_lowat = thread_call_thread_min;
-
- activate_thread_awake = TRUE;
+ thread_call_daemon_awake = TRUE;
simple_unlock(&thread_call_lock);
splx(s);
- result = kernel_thread_start_priority((thread_continue_t)_activate_thread, NULL, MAXPRI_KERNEL - 2, &thread);
+ result = kernel_thread_start_priority((thread_continue_t)thread_call_daemon, group, BASEPRI_PREEMPT + 1, &thread);
if (result != KERN_SUCCESS)
panic("thread_call_initialize");
thread_call_setup(
thread_call_t call,
thread_call_func_t func,
- thread_call_param_t param0
-)
+ thread_call_param_t param0)
{
call_entry_setup(call, func, param0);
}
/*
- * Routine: _internal_call_allocate [private, inline]
- *
- * Purpose: Allocate an internal callout entry.
+ * _internal_call_allocate:
*
- * Preconditions: thread_call_lock held.
+ * Allocate an internal callout entry.
*
- * Postconditions: None.
+ * Called with thread_call_lock held.
*/
-
static __inline__ thread_call_t
_internal_call_allocate(void)
{
thread_call_t call;
- if (queue_empty(&thread_call_xxx_queue))
+ if (queue_empty(&thread_call_internal_queue))
panic("_internal_call_allocate");
- call = TC(dequeue_head(&thread_call_xxx_queue));
+ call = TC(dequeue_head(&thread_call_internal_queue));
return (call);
}
/*
- * Routine: _internal_call_release [private, inline]
+ * _internal_call_release:
*
- * Purpose: Release an internal callout entry which
- * is no longer pending (or delayed).
+ * Release an internal callout entry which
+ * is no longer pending (or delayed).
*
- * Preconditions: thread_call_lock held.
- *
- * Postconditions: None.
+ * Called with thread_call_lock held.
*/
-
-static __inline__
-void
+static __inline__ void
_internal_call_release(
- thread_call_t call
-)
+ thread_call_t call)
{
if ( call >= internal_call_storage &&
- call < &internal_call_storage[internal_call_num] )
- enqueue_head(&thread_call_xxx_queue, qe(call));
+ call < &internal_call_storage[internal_call_count] )
+ enqueue_head(&thread_call_internal_queue, qe(call));
}
/*
- * Routine: _pending_call_enqueue [private, inline]
+ * _pending_call_enqueue:
*
- * Purpose: Place an entry at the end of the
- * pending queue, to be executed soon.
+ * Place an entry at the end of the
+ * pending queue, to be executed soon.
*
- * Preconditions: thread_call_lock held.
+ * Returns TRUE if the entry was already
+ * on a queue.
*
- * Postconditions: None.
+ * Called with thread_call_lock held.
*/
-
-static __inline__
-void
+static __inline__ boolean_t
_pending_call_enqueue(
- thread_call_t call
-)
+ thread_call_t call,
+ thread_call_group_t group)
{
- enqueue_tail(&thread_call_pending_queue, qe(call));
- if (++thread_call_vars.pending_num > thread_call_vars.pending_hiwat)
- thread_call_vars.pending_hiwat = thread_call_vars.pending_num;
+ queue_t old_queue;
- call->state = PENDING;
-}
+ old_queue = call_entry_enqueue_tail(call, &group->pending_queue);
-/*
- * Routine: _pending_call_dequeue [private, inline]
- *
- * Purpose: Remove an entry from the pending queue,
- * effectively unscheduling it.
- *
- * Preconditions: thread_call_lock held.
- *
- * Postconditions: None.
- */
+ group->pending_count++;
-static __inline__
-void
-_pending_call_dequeue(
- thread_call_t call
-)
-{
- (void)remque(qe(call));
- thread_call_vars.pending_num--;
-
- call->state = IDLE;
+ return (old_queue != NULL);
}
/*
- * Routine: _delayed_call_enqueue [private, inline]
+ * _delayed_call_enqueue:
*
- * Purpose: Place an entry on the delayed queue,
- * after existing entries with an earlier
- * (or identical) deadline.
+ * Place an entry on the delayed queue,
+ * after existing entries with an earlier
+ * (or identical) deadline.
*
- * Preconditions: thread_call_lock held.
+ * Returns TRUE if the entry was already
+ * on a queue.
*
- * Postconditions: None.
+ * Called with thread_call_lock held.
*/
-
-static __inline__
-void
+static __inline__ boolean_t
_delayed_call_enqueue(
- thread_call_t call
-)
+ thread_call_t call,
+ thread_call_group_t group,
+ uint64_t deadline)
{
- thread_call_t current;
-
- current = TC(queue_first(&thread_call_delayed_queue));
-
- while (TRUE) {
- if ( queue_end(&thread_call_delayed_queue, qe(current)) ||
- call->deadline < current->deadline ) {
- current = TC(queue_prev(qe(current)));
- break;
- }
-
- current = TC(queue_next(qe(current)));
- }
+ queue_t old_queue;
- insque(qe(call), qe(current));
- if (++thread_call_vars.delayed_num > thread_call_vars.delayed_hiwat)
- thread_call_vars.delayed_hiwat = thread_call_vars.delayed_num;
-
- call->state = DELAYED;
+ old_queue = call_entry_enqueue_deadline(call, &group->delayed_queue, deadline);
+
+ if (old_queue == &group->pending_queue)
+ group->pending_count--;
+
+ return (old_queue != NULL);
}
/*
- * Routine: _delayed_call_dequeue [private, inline]
+ * _call_dequeue:
*
- * Purpose: Remove an entry from the delayed queue,
- * effectively unscheduling it.
+ * Remove an entry from a queue.
*
- * Preconditions: thread_call_lock held.
+ * Returns TRUE if the entry was on a queue.
*
- * Postconditions: None.
+ * Called with thread_call_lock held.
*/
-
-static __inline__
-void
-_delayed_call_dequeue(
- thread_call_t call
-)
+static __inline__ boolean_t
+_call_dequeue(
+ thread_call_t call,
+ thread_call_group_t group)
{
- (void)remque(qe(call));
- thread_call_vars.delayed_num--;
-
- call->state = IDLE;
+ queue_t old_queue;
+
+ old_queue = call_entry_dequeue(call);
+
+ if (old_queue == &group->pending_queue)
+ group->pending_count--;
+
+ return (old_queue != NULL);
}
/*
- * Routine: _set_delayed_call_timer [private]
+ * _set_delayed_call_timer:
*
- * Purpose: Reset the timer so that it
- * next expires when the entry is due.
+ * Reset the timer so that it
+ * next expires when the entry is due.
*
- * Preconditions: thread_call_lock held.
- *
- * Postconditions: None.
+ * Called with thread_call_lock held.
*/
-
static __inline__ void
_set_delayed_call_timer(
- thread_call_t call
-)
+ thread_call_t call,
+ thread_call_group_t group)
{
- timer_call_enter(&thread_call_delaytimer, call->deadline);
+ timer_call_enter(&group->delayed_timer, call->deadline);
}
/*
- * Routine: _remove_from_pending_queue [private]
+ * _remove_from_pending_queue:
*
- * Purpose: Remove the first (or all) matching
- * entries from the pending queue,
- * effectively unscheduling them.
- * Returns whether any matching entries
- * were found.
+ * Remove the first (or all) matching
+ * entries from the pending queue.
*
- * Preconditions: thread_call_lock held.
+ * Returns TRUE if any matching entries
+ * were found.
*
- * Postconditions: None.
+ * Called with thread_call_lock held.
*/
-
-static
-boolean_t
+static boolean_t
_remove_from_pending_queue(
thread_call_func_t func,
thread_call_param_t param0,
- boolean_t remove_all
-)
+ boolean_t remove_all)
{
- boolean_t call_removed = FALSE;
- thread_call_t call;
+ boolean_t call_removed = FALSE;
+ thread_call_t call;
+ thread_call_group_t group = &thread_call_group0;
- call = TC(queue_first(&thread_call_pending_queue));
+ call = TC(queue_first(&group->pending_queue));
- while (!queue_end(&thread_call_pending_queue, qe(call))) {
+ while (!queue_end(&group->pending_queue, qe(call))) {
if ( call->func == func &&
call->param0 == param0 ) {
thread_call_t next = TC(queue_next(qe(call)));
- _pending_call_dequeue(call);
+ _call_dequeue(call, group);
_internal_call_release(call);
}
/*
- * Routine: _remove_from_delayed_queue [private]
+ * _remove_from_delayed_queue:
*
- * Purpose: Remove the first (or all) matching
- * entries from the delayed queue,
- * effectively unscheduling them.
- * Returns whether any matching entries
- * were found.
+ * Remove the first (or all) matching
+ * entries from the delayed queue.
*
- * Preconditions: thread_call_lock held.
+ * Returns TRUE if any matching entries
+ * were found.
*
- * Postconditions: None.
+ * Called with thread_call_lock held.
*/
-
-static
-boolean_t
+static boolean_t
_remove_from_delayed_queue(
thread_call_func_t func,
thread_call_param_t param0,
- boolean_t remove_all
-)
+ boolean_t remove_all)
{
- boolean_t call_removed = FALSE;
- thread_call_t call;
+ boolean_t call_removed = FALSE;
+ thread_call_t call;
+ thread_call_group_t group = &thread_call_group0;
- call = TC(queue_first(&thread_call_delayed_queue));
+ call = TC(queue_first(&group->delayed_queue));
- while (!queue_end(&thread_call_delayed_queue, qe(call))) {
+ while (!queue_end(&group->delayed_queue, qe(call))) {
if ( call->func == func &&
call->param0 == param0 ) {
thread_call_t next = TC(queue_next(qe(call)));
- _delayed_call_dequeue(call);
+ _call_dequeue(call, group);
_internal_call_release(call);
}
/*
- * Routine: thread_call_func [public]
- *
- * Purpose: Schedule a function callout.
- * Guarantees { function, argument }
- * uniqueness if unique_call is TRUE.
+ * thread_call_func:
*
- * Preconditions: Callable from an interrupt context
- * below splsched.
+ * Enqueue a function callout.
*
- * Postconditions: None.
+ * Guarantees { function, argument }
+ * uniqueness if unique_call is TRUE.
*/
-
void
thread_call_func(
thread_call_func_t func,
thread_call_param_t param,
- boolean_t unique_call
-)
+ boolean_t unique_call)
{
- thread_call_t call;
- spl_t s;
+ thread_call_t call;
+ thread_call_group_t group = &thread_call_group0;
+ spl_t s;
s = splsched();
simple_lock(&thread_call_lock);
- call = TC(queue_first(&thread_call_pending_queue));
+ call = TC(queue_first(&group->pending_queue));
- while (unique_call && !queue_end(&thread_call_pending_queue, qe(call))) {
+ while (unique_call && !queue_end(&group->pending_queue, qe(call))) {
if ( call->func == func &&
call->param0 == param ) {
break;
call = TC(queue_next(qe(call)));
}
- if (!unique_call || queue_end(&thread_call_pending_queue, qe(call))) {
+ if (!unique_call || queue_end(&group->pending_queue, qe(call))) {
call = _internal_call_allocate();
call->func = func;
call->param0 = param;
call->param1 = NULL;
- _pending_call_enqueue(call);
+ _pending_call_enqueue(call, group);
- if (thread_call_vars.active_num <= 0)
- _call_thread_wake();
+ if (group->active_count == 0)
+ thread_call_wake(group);
}
simple_unlock(&thread_call_lock);
}
/*
- * Routine: thread_call_func_delayed [public]
- *
- * Purpose: Schedule a function callout to
- * occur at the stated time.
- *
- * Preconditions: Callable from an interrupt context
- * below splsched.
+ * thread_call_func_delayed:
*
- * Postconditions: None.
+ * Enqueue a function callout to
+ * occur at the stated time.
*/
-
void
thread_call_func_delayed(
thread_call_func_t func,
thread_call_param_t param,
- uint64_t deadline
-)
+ uint64_t deadline)
{
- thread_call_t call;
- spl_t s;
+ thread_call_t call;
+ thread_call_group_t group = &thread_call_group0;
+ spl_t s;
s = splsched();
simple_lock(&thread_call_lock);
call->func = func;
call->param0 = param;
call->param1 = 0;
- call->deadline = deadline;
- _delayed_call_enqueue(call);
+ _delayed_call_enqueue(call, group, deadline);
- if (queue_first(&thread_call_delayed_queue) == qe(call))
- _set_delayed_call_timer(call);
+ if (queue_first(&group->delayed_queue) == qe(call))
+ _set_delayed_call_timer(call, group);
simple_unlock(&thread_call_lock);
splx(s);
}
/*
- * Routine: thread_call_func_cancel [public]
+ * thread_call_func_cancel:
*
- * Purpose: Unschedule a function callout.
- * Removes one (or all)
- * { function, argument }
- * instance(s) from either (or both)
- * the pending and the delayed queue,
- * in that order. Returns a boolean
- * indicating whether any calls were
- * cancelled.
+ * Dequeue a function callout.
*
- * Preconditions: Callable from an interrupt context
- * below splsched.
+ * Removes one (or all) { function, argument }
+ * instance(s) from either (or both)
+ * the pending and the delayed queue,
+ * in that order.
*
- * Postconditions: None.
+ * Returns TRUE if any calls were cancelled.
*/
-
boolean_t
thread_call_func_cancel(
thread_call_func_t func,
thread_call_param_t param,
- boolean_t cancel_all
-)
+ boolean_t cancel_all)
{
boolean_t result;
spl_t s;
}
/*
- * Routine: thread_call_allocate [public]
+ * thread_call_allocate:
*
- * Purpose: Allocate an external callout
- * entry.
- *
- * Preconditions: None.
- *
- * Postconditions: None.
+ * Allocate a callout entry.
*/
-
thread_call_t
thread_call_allocate(
thread_call_func_t func,
- thread_call_param_t param0
-)
+ thread_call_param_t param0)
{
- thread_call_t call = (void *)kalloc(sizeof (thread_call_data_t));
-
- call->func = func;
- call->param0 = param0;
- call->state = IDLE;
-
+ thread_call_t call = zalloc(thread_call_zone);
+
+ call_entry_setup(call, func, param0);
+
return (call);
}
/*
- * Routine: thread_call_free [public]
- *
- * Purpose: Free an external callout
- * entry.
- *
- * Preconditions: None.
+ * thread_call_free:
*
- * Postconditions: None.
+ * Free a callout entry.
*/
-
boolean_t
thread_call_free(
- thread_call_t call
-)
+ thread_call_t call)
{
spl_t s;
s = splsched();
simple_lock(&thread_call_lock);
- if (call->state != IDLE) {
+ if (call->queue != NULL) {
simple_unlock(&thread_call_lock);
splx(s);
simple_unlock(&thread_call_lock);
splx(s);
- kfree(call, sizeof (thread_call_data_t));
+ zfree(thread_call_zone, call);
return (TRUE);
}
/*
- * Routine: thread_call_enter [public]
+ * thread_call_enter:
*
- * Purpose: Schedule an external callout
- * entry to occur "soon". Returns a
- * boolean indicating whether the call
- * had been already scheduled.
+ * Enqueue a callout entry to occur "soon".
*
- * Preconditions: Callable from an interrupt context
- * below splsched.
- *
- * Postconditions: None.
+ * Returns TRUE if the call was
+ * already on a queue.
*/
-
boolean_t
thread_call_enter(
- thread_call_t call
-)
+ thread_call_t call)
{
- boolean_t result = TRUE;
- spl_t s;
+ boolean_t result = TRUE;
+ thread_call_group_t group = &thread_call_group0;
+ spl_t s;
s = splsched();
simple_lock(&thread_call_lock);
- if (call->state != PENDING) {
- if (call->state == DELAYED)
- _delayed_call_dequeue(call);
- else if (call->state == IDLE)
- result = FALSE;
-
- _pending_call_enqueue(call);
+ if (call->queue != &group->pending_queue) {
+ result = _pending_call_enqueue(call, group);
- if (thread_call_vars.active_num <= 0)
- _call_thread_wake();
+ if (group->active_count == 0)
+ thread_call_wake(group);
}
call->param1 = 0;
boolean_t
thread_call_enter1(
thread_call_t call,
- thread_call_param_t param1
-)
+ thread_call_param_t param1)
{
- boolean_t result = TRUE;
- spl_t s;
+ boolean_t result = TRUE;
+ thread_call_group_t group = &thread_call_group0;
+ spl_t s;
s = splsched();
simple_lock(&thread_call_lock);
- if (call->state != PENDING) {
- if (call->state == DELAYED)
- _delayed_call_dequeue(call);
- else if (call->state == IDLE)
- result = FALSE;
-
- _pending_call_enqueue(call);
-
- if (thread_call_vars.active_num <= 0)
- _call_thread_wake();
- }
+ if (call->queue != &group->pending_queue) {
+ result = _pending_call_enqueue(call, group);
+
+ if (group->active_count == 0)
+ thread_call_wake(group);
+ }
call->param1 = param1;
}
/*
- * Routine: thread_call_enter_delayed [public]
- *
- * Purpose: Schedule an external callout
- * entry to occur at the stated time.
- * Returns a boolean indicating whether
- * the call had been already scheduled.
+ * thread_call_enter_delayed:
*
- * Preconditions: Callable from an interrupt context
- * below splsched.
+ * Enqueue a callout entry to occur
+ * at the stated time.
*
- * Postconditions: None.
+ * Returns TRUE if the call was
+ * already on a queue.
*/
-
boolean_t
thread_call_enter_delayed(
thread_call_t call,
- uint64_t deadline
-)
+ uint64_t deadline)
{
- boolean_t result = TRUE;
- spl_t s;
+ boolean_t result = TRUE;
+ thread_call_group_t group = &thread_call_group0;
+ spl_t s;
s = splsched();
simple_lock(&thread_call_lock);
- if (call->state == PENDING)
- _pending_call_dequeue(call);
- else if (call->state == DELAYED)
- _delayed_call_dequeue(call);
- else if (call->state == IDLE)
- result = FALSE;
-
- call->param1 = 0;
- call->deadline = deadline;
+ result = _delayed_call_enqueue(call, group, deadline);
- _delayed_call_enqueue(call);
+ if (queue_first(&group->delayed_queue) == qe(call))
+ _set_delayed_call_timer(call, group);
- if (queue_first(&thread_call_delayed_queue) == qe(call))
- _set_delayed_call_timer(call);
+ call->param1 = 0;
simple_unlock(&thread_call_lock);
splx(s);
thread_call_enter1_delayed(
thread_call_t call,
thread_call_param_t param1,
- uint64_t deadline
-)
+ uint64_t deadline)
{
- boolean_t result = TRUE;
- spl_t s;
+ boolean_t result = TRUE;
+ thread_call_group_t group = &thread_call_group0;
+ spl_t s;
s = splsched();
simple_lock(&thread_call_lock);
- if (call->state == PENDING)
- _pending_call_dequeue(call);
- else if (call->state == DELAYED)
- _delayed_call_dequeue(call);
- else if (call->state == IDLE)
- result = FALSE;
+ result = _delayed_call_enqueue(call, group, deadline);
- call->param1 = param1;
- call->deadline = deadline;
+ if (queue_first(&group->delayed_queue) == qe(call))
+ _set_delayed_call_timer(call, group);
- _delayed_call_enqueue(call);
-
- if (queue_first(&thread_call_delayed_queue) == qe(call))
- _set_delayed_call_timer(call);
+ call->param1 = param1;
simple_unlock(&thread_call_lock);
splx(s);
}
/*
- * Routine: thread_call_cancel [public]
- *
- * Purpose: Unschedule a callout entry.
- * Returns a boolean indicating
- * whether the call had actually
- * been scheduled.
+ * thread_call_cancel:
*
- * Preconditions: Callable from an interrupt context
- * below splsched.
+ * Dequeue a callout entry.
*
- * Postconditions: None.
+ * Returns TRUE if the call was
+ * on a queue.
*/
-
boolean_t
thread_call_cancel(
- thread_call_t call
-)
+ thread_call_t call)
{
- boolean_t result = TRUE;
- spl_t s;
+ boolean_t result;
+ thread_call_group_t group = &thread_call_group0;
+ spl_t s;
s = splsched();
simple_lock(&thread_call_lock);
-
- if (call->state == PENDING)
- _pending_call_dequeue(call);
- else if (call->state == DELAYED)
- _delayed_call_dequeue(call);
- else
- result = FALSE;
+
+ result = _call_dequeue(call, group);
simple_unlock(&thread_call_lock);
splx(s);
}
/*
- * Routine: thread_call_is_delayed [public]
- *
- * Purpose: Returns a boolean indicating
- * whether a call is currently scheduled
- * to occur at a later time. Optionally
- * returns the expiration time.
+ * thread_call_is_delayed:
*
- * Preconditions: Callable from an interrupt context
- * below splsched.
+ * Returns TRUE if the call is
+ * currently on a delayed queue.
*
- * Postconditions: None.
+ * Optionally returns the expiration time.
*/
-
boolean_t
thread_call_is_delayed(
thread_call_t call,
uint64_t *deadline)
{
- boolean_t result = FALSE;
- spl_t s;
+ boolean_t result = FALSE;
+ thread_call_group_t group = &thread_call_group0;
+ spl_t s;
s = splsched();
simple_lock(&thread_call_lock);
- if (call->state == DELAYED) {
+ if (call->queue == &group->delayed_queue) {
if (deadline != NULL)
*deadline = call->deadline;
result = TRUE;
}
/*
- * Routine: _call_thread_wake [private, inline]
- *
- * Purpose: Wake a callout thread to service
- * pending callout entries. May wake
- * the activate thread in order to
- * create additional callout threads.
+ * thread_call_wake:
*
- * Preconditions: thread_call_lock held.
+ * Wake a call thread to service
+ * pending call entries. May wake
+ * the daemon thread in order to
+ * create additional call threads.
*
- * Postconditions: None.
+ * Called with thread_call_lock held.
*/
-
-static inline void
-_call_thread_wake(void)
+static __inline__ void
+thread_call_wake(
+ thread_call_group_t group)
{
- if (wait_queue_wakeup_one(&call_thread_waitqueue, NULL, THREAD_AWAKENED) == KERN_SUCCESS) {
- thread_call_vars.idle_thread_num--;
-
- if (++thread_call_vars.active_num > thread_call_vars.active_hiwat)
- thread_call_vars.active_hiwat = thread_call_vars.active_num;
+ if (group->idle_count > 0 && wait_queue_wakeup_one(&group->idle_wqueue, NULL, THREAD_AWAKENED) == KERN_SUCCESS) {
+ group->idle_count--; group->active_count++;
}
else
- if (!activate_thread_awake) {
- thread_wakeup_one(&activate_thread_awake);
- activate_thread_awake = TRUE;
+ if (!thread_call_daemon_awake) {
+ thread_call_daemon_awake = TRUE;
+ thread_wakeup_one(&thread_call_daemon_awake);
}
}
*
* Call out invoked by the scheduler.
*/
-
static void
sched_call_thread(
- int type,
-__unused thread_t thread)
+ int type,
+__unused thread_t thread)
{
+ thread_call_group_t group = &thread_call_group0;
+
simple_lock(&thread_call_lock);
switch (type) {
case SCHED_CALL_BLOCK:
- if (--thread_call_vars.active_num < thread_call_vars.active_lowat)
- thread_call_vars.active_lowat = thread_call_vars.active_num;
-
- if ( thread_call_vars.active_num <= 0 &&
- thread_call_vars.pending_num > 0 )
- _call_thread_wake();
+ if (--group->active_count == 0 && group->pending_count > 0)
+ thread_call_wake(group);
break;
case SCHED_CALL_UNBLOCK:
- if (++thread_call_vars.active_num > thread_call_vars.active_hiwat)
- thread_call_vars.active_hiwat = thread_call_vars.active_num;
+ group->active_count++;
break;
}
}
/*
- * Routine: _call_thread [private]
- *
- * Purpose: Executed by a callout thread.
- *
- * Preconditions: None.
- *
- * Postconditions: None.
+ * thread_call_thread:
*/
-
-static
-void
-_call_thread_continue(void)
+static void
+thread_call_thread(
+ thread_call_group_t group)
{
thread_t self = current_thread();
thread_sched_call(self, sched_call_thread);
- while (thread_call_vars.pending_num > 0) {
+ while (group->pending_count > 0) {
thread_call_t call;
thread_call_func_t func;
thread_call_param_t param0, param1;
- call = TC(dequeue_head(&thread_call_pending_queue));
- thread_call_vars.pending_num--;
+ call = TC(dequeue_head(&group->pending_queue));
+ group->pending_count--;
func = call->func;
param0 = call->param0;
param1 = call->param1;
- call->state = IDLE;
+ call->queue = NULL;
_internal_call_release(call);
(*func)(param0, param1);
- (void)thread_funnel_set(self->funnel_lock, FALSE);
+ (void)thread_funnel_set(self->funnel_lock, FALSE); /* XXX */
(void) splsched();
simple_lock(&thread_call_lock);
}
thread_sched_call(self, NULL);
+ group->active_count--;
- if (--thread_call_vars.active_num < thread_call_vars.active_lowat)
- thread_call_vars.active_lowat = thread_call_vars.active_num;
-
- if (thread_call_vars.idle_thread_num < thread_call_vars.thread_lowat) {
- thread_call_vars.idle_thread_num++;
+ if (group->idle_count < thread_call_thread_min) {
+ group->idle_count++;
- wait_queue_assert_wait(&call_thread_waitqueue, NULL, THREAD_UNINT, 0);
+ wait_queue_assert_wait(&group->idle_wqueue, NULL, THREAD_UNINT, 0);
simple_unlock(&thread_call_lock);
(void) spllo();
- thread_block((thread_continue_t)_call_thread_continue);
+ thread_block_parameter((thread_continue_t)thread_call_thread, group);
/* NOTREACHED */
}
-
- thread_call_vars.thread_num--;
-
+
simple_unlock(&thread_call_lock);
(void) spllo();
/* NOTREACHED */
}
-static
-void
-_call_thread(void)
-{
- _call_thread_continue();
- /* NOTREACHED */
-}
-
/*
- * Routine: _activate_thread [private]
- *
- * Purpose: Executed by the activate thread.
- *
- * Preconditions: None.
- *
- * Postconditions: Never terminates.
+ * thread_call_daemon:
*/
-
-static
-void
-_activate_thread_continue(void)
+static void
+thread_call_daemon_continue(
+ thread_call_group_t group)
{
kern_return_t result;
thread_t thread;
(void) splsched();
simple_lock(&thread_call_lock);
- while ( thread_call_vars.active_num <= 0 &&
- thread_call_vars.pending_num > 0 ) {
-
- if (++thread_call_vars.active_num > thread_call_vars.active_hiwat)
- thread_call_vars.active_hiwat = thread_call_vars.active_num;
-
- if (++thread_call_vars.thread_num > thread_call_vars.thread_hiwat)
- thread_call_vars.thread_hiwat = thread_call_vars.thread_num;
+ while (group->active_count == 0 && group->pending_count > 0) {
+ group->active_count++;
simple_unlock(&thread_call_lock);
(void) spllo();
- result = kernel_thread_start_priority((thread_continue_t)_call_thread, NULL, MAXPRI_KERNEL - 1, &thread);
+ result = kernel_thread_start_priority((thread_continue_t)thread_call_thread, group, BASEPRI_PREEMPT, &thread);
if (result != KERN_SUCCESS)
- panic("activate_thread");
+ panic("thread_call_daemon");
thread_deallocate(thread);
(void) splsched();
simple_lock(&thread_call_lock);
}
-
- assert_wait(&activate_thread_awake, THREAD_INTERRUPTIBLE);
- activate_thread_awake = FALSE;
+
+ thread_call_daemon_awake = FALSE;
+ assert_wait(&thread_call_daemon_awake, THREAD_UNINT);
simple_unlock(&thread_call_lock);
(void) spllo();
- thread_block((thread_continue_t)_activate_thread_continue);
+ thread_block_parameter((thread_continue_t)thread_call_daemon_continue, group);
/* NOTREACHED */
}
-static
-void
-_activate_thread(void)
+static void
+thread_call_daemon(
+ thread_call_group_t group)
{
thread_t self = current_thread();
self->options |= TH_OPT_VMPRIV;
vm_page_free_reserve(2); /* XXX */
- _activate_thread_continue();
+ thread_call_daemon_continue(group);
/* NOTREACHED */
}
-static
-void
-_delayed_call_timer(
- __unused timer_call_param_t p0,
+static void
+thread_call_delayed_timer(
+ timer_call_param_t p0,
__unused timer_call_param_t p1
)
{
- uint64_t timestamp;
- thread_call_t call;
- boolean_t new_pending = FALSE;
- spl_t s;
+ thread_call_t call;
+ thread_call_group_t group = p0;
+ boolean_t new_pending = FALSE;
+ uint64_t timestamp;
- s = splsched();
simple_lock(&thread_call_lock);
- clock_get_uptime(×tamp);
+ timestamp = mach_absolute_time();
- call = TC(queue_first(&thread_call_delayed_queue));
+ call = TC(queue_first(&group->delayed_queue));
- while (!queue_end(&thread_call_delayed_queue, qe(call))) {
+ while (!queue_end(&group->delayed_queue, qe(call))) {
if (call->deadline <= timestamp) {
- _delayed_call_dequeue(call);
-
- _pending_call_enqueue(call);
+ _pending_call_enqueue(call, group);
new_pending = TRUE;
}
else
break;
- call = TC(queue_first(&thread_call_delayed_queue));
+ call = TC(queue_first(&group->delayed_queue));
}
- if (!queue_end(&thread_call_delayed_queue, qe(call)))
- _set_delayed_call_timer(call);
+ if (!queue_end(&group->delayed_queue, qe(call)))
+ _set_delayed_call_timer(call, group);
- if (new_pending && thread_call_vars.active_num <= 0)
- _call_thread_wake();
+ if (new_pending && group->active_count == 0)
+ thread_call_wake(group);
simple_unlock(&thread_call_lock);
- splx(s);
}
/*
- * Copyright (c) 1993-1995, 1999-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 1993-1995, 1999-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
thread_call_param_t param1);
__BEGIN_DECLS
-boolean_t
-thread_call_enter(
- thread_call_t call
-);
-
-boolean_t
-thread_call_enter1(
- thread_call_t call,
- thread_call_param_t param1
-);
-
-boolean_t
-thread_call_enter_delayed(
- thread_call_t call,
- uint64_t deadline
-);
-
-boolean_t
-thread_call_enter1_delayed(
- thread_call_t call,
- thread_call_param_t param1,
- uint64_t deadline
-);
-
-boolean_t
-thread_call_cancel(
- thread_call_t call
-);
-
-thread_call_t
-thread_call_allocate(
- thread_call_func_t func,
- thread_call_param_t param0
-);
-
-boolean_t
-thread_call_free(
- thread_call_t call
-);
+extern boolean_t thread_call_enter(
+ thread_call_t call);
+
+extern boolean_t thread_call_enter1(
+ thread_call_t call,
+ thread_call_param_t param1);
+
+extern boolean_t thread_call_enter_delayed(
+ thread_call_t call,
+ uint64_t deadline);
+
+extern boolean_t thread_call_enter1_delayed(
+ thread_call_t call,
+ thread_call_param_t param1,
+ uint64_t deadline);
+
+extern boolean_t thread_call_cancel(
+ thread_call_t call);
+
+extern thread_call_t thread_call_allocate(
+ thread_call_func_t func,
+ thread_call_param_t param0);
+
+extern boolean_t thread_call_free(
+ thread_call_t call);
__END_DECLS
typedef struct call_entry thread_call_data_t;
-void
-thread_call_initialize(void);
+extern void thread_call_initialize(void);
-void
-thread_call_setup(
- thread_call_t call,
- thread_call_func_t func,
- thread_call_param_t param0
-);
+extern void thread_call_setup(
+ thread_call_t call,
+ thread_call_func_t func,
+ thread_call_param_t param0);
#endif /* MACH_KERNEL_PRIVATE */
* Obsolete interfaces.
*/
-boolean_t
-thread_call_is_delayed(
- thread_call_t call,
- uint64_t *deadline
-);
-
-void
-thread_call_func(
- thread_call_func_t func,
- thread_call_param_t param,
- boolean_t unique_call
-);
-
-void
-thread_call_func_delayed(
- thread_call_func_t func,
- thread_call_param_t param,
- uint64_t deadline
-);
-
-boolean_t
-thread_call_func_cancel(
- thread_call_func_t func,
- thread_call_param_t param,
- boolean_t cancel_all
-);
+extern boolean_t thread_call_is_delayed(
+ thread_call_t call,
+ uint64_t *deadline);
+
+extern void thread_call_func(
+ thread_call_func_t func,
+ thread_call_param_t param,
+ boolean_t unique_call);
+
+extern void thread_call_func_delayed(
+ thread_call_func_t func,
+ thread_call_param_t param,
+ uint64_t deadline);
+
+extern boolean_t thread_call_func_cancel(
+ thread_call_func_t func,
+ thread_call_param_t param,
+ boolean_t cancel_all);
#ifndef MACH_KERNEL_PRIVATE
/*
- * Copyright (c) 1993-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 1993-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <kern/processor.h>
#include <kern/etimer.h>
#include <kern/timer_call.h>
+#include <kern/timer_queue.h>
#include <kern/call_entry.h>
#include <sys/kdebug.h>
decl_simple_lock_data(static,timer_call_lock)
-static void
-timer_call_interrupt(
- uint64_t timestamp);
-
#define qe(x) ((queue_entry_t)(x))
#define TC(x) ((timer_call_t)(x))
void
timer_call_initialize(void)
{
- spl_t s;
-
simple_lock_init(&timer_call_lock, 0);
-
- s = splclock();
- simple_lock(&timer_call_lock);
-
- clock_set_timer_func((clock_timer_func_t)timer_call_interrupt);
-
- simple_unlock(&timer_call_lock);
- splx(s);
}
void
call_entry_setup(call, func, param0);
}
-static __inline__
-void
-_delayed_call_enqueue(
- queue_t queue,
- timer_call_t call)
+__inline__ queue_t
+call_entry_enqueue_deadline(
+ call_entry_t entry,
+ queue_t queue,
+ uint64_t deadline)
{
+ queue_t old_queue = entry->queue;
timer_call_t current;
- current = TC(queue_first(queue));
+ if (old_queue != queue || entry->deadline < deadline) {
+ if (old_queue != queue)
+ current = TC(queue_first(queue));
+ else
+ current = TC(queue_next(qe(entry)));
- while (TRUE) {
- if ( queue_end(queue, qe(current)) ||
- call->deadline < current->deadline ) {
- current = TC(queue_prev(qe(current)));
- break;
+ if (old_queue != NULL)
+ (void)remque(qe(entry));
+
+ while (TRUE) {
+ if ( queue_end(queue, qe(current)) ||
+ deadline < current->deadline ) {
+ current = TC(queue_prev(qe(current)));
+ break;
+ }
+
+ current = TC(queue_next(qe(current)));
}
- current = TC(queue_next(qe(current)));
+ insque(qe(entry), qe(current));
}
+ else
+ if (deadline < entry->deadline) {
+ current = TC(queue_prev(qe(entry)));
- insque(qe(call), qe(current));
+ (void)remque(qe(entry));
- call->state = DELAYED;
-}
+ while (TRUE) {
+ if ( queue_end(queue, qe(current)) ||
+ current->deadline <= deadline ) {
+ break;
+ }
-static __inline__
-void
-_delayed_call_dequeue(
- timer_call_t call)
-{
- (void)remque(qe(call));
+ current = TC(queue_prev(qe(current)));
+ }
- call->state = IDLE;
-}
+ insque(qe(entry), qe(current));
+ }
-static __inline__
-void
-_set_delayed_call_timer(
- timer_call_t call)
-{
- etimer_set_deadline(call->deadline);
+ entry->queue = queue;
+ entry->deadline = deadline;
+
+ return (old_queue);
}
-boolean_t
-timer_call_enter(
- timer_call_t call,
- uint64_t deadline)
+__inline__ queue_t
+call_entry_enqueue_tail(
+ call_entry_t entry,
+ queue_t queue)
{
- boolean_t result = TRUE;
- queue_t queue;
- spl_t s;
+ queue_t old_queue = entry->queue;
- s = splclock();
- simple_lock(&timer_call_lock);
+ if (old_queue != NULL)
+ (void)remque(qe(entry));
- if (call->state == DELAYED)
- _delayed_call_dequeue(call);
- else
- result = FALSE;
+ enqueue_tail(queue, qe(entry));
- call->param1 = NULL;
- call->deadline = deadline;
+ entry->queue = queue;
- queue = &PROCESSOR_DATA(current_processor(), timer_call_queue);
+ return (old_queue);
+}
- _delayed_call_enqueue(queue, call);
+__inline__ queue_t
+call_entry_dequeue(
+ call_entry_t entry)
+{
+ queue_t old_queue = entry->queue;
- if (queue_first(queue) == qe(call))
- _set_delayed_call_timer(call);
+ if (old_queue != NULL)
+ (void)remque(qe(entry));
- simple_unlock(&timer_call_lock);
- splx(s);
+ entry->queue = NULL;
- return (result);
+ return (old_queue);
}
boolean_t
-timer_call_enter1(
- timer_call_t call,
- timer_call_param_t param1,
- uint64_t deadline)
+timer_call_enter(
+ timer_call_t call,
+ uint64_t deadline)
{
- boolean_t result = TRUE;
- queue_t queue;
+ queue_t queue, old_queue;
spl_t s;
s = splclock();
simple_lock(&timer_call_lock);
- if (call->state == DELAYED)
- _delayed_call_dequeue(call);
- else
- result = FALSE;
-
- call->param1 = param1;
- call->deadline = deadline;
-
- queue = &PROCESSOR_DATA(current_processor(), timer_call_queue);
+ queue = timer_queue_assign(deadline);
- _delayed_call_enqueue(queue, call);
+ old_queue = call_entry_enqueue_deadline(call, queue, deadline);
- if (queue_first(queue) == qe(call))
- _set_delayed_call_timer(call);
+ call->param1 = NULL;
simple_unlock(&timer_call_lock);
splx(s);
- return (result);
+ return (old_queue != NULL);
}
boolean_t
-timer_call_cancel(
- timer_call_t call)
+timer_call_enter1(
+ timer_call_t call,
+ timer_call_param_t param1,
+ uint64_t deadline)
{
- boolean_t result = TRUE;
+ queue_t queue, old_queue;
spl_t s;
s = splclock();
simple_lock(&timer_call_lock);
- if (call->state == DELAYED) {
- queue_t queue = &PROCESSOR_DATA(current_processor(), timer_call_queue);
+ queue = timer_queue_assign(deadline);
- if (queue_first(queue) == qe(call)) {
- _delayed_call_dequeue(call);
+ old_queue = call_entry_enqueue_deadline(call, queue, deadline);
- if (!queue_empty(queue))
- _set_delayed_call_timer((timer_call_t)queue_first(queue));
- }
- else
- _delayed_call_dequeue(call);
- }
- else
- result = FALSE;
+ call->param1 = param1;
simple_unlock(&timer_call_lock);
splx(s);
- return (result);
+ return (old_queue != NULL);
}
boolean_t
-timer_call_is_delayed(
- timer_call_t call,
- uint64_t *deadline)
+timer_call_cancel(
+ timer_call_t call)
{
- boolean_t result = FALSE;
+ queue_t old_queue;
spl_t s;
s = splclock();
simple_lock(&timer_call_lock);
- if (call->state == DELAYED) {
- if (deadline != NULL)
- *deadline = call->deadline;
- result = TRUE;
+ old_queue = call_entry_dequeue(call);
+
+ if (old_queue != NULL) {
+ if (!queue_empty(old_queue))
+ timer_queue_cancel(old_queue, call->deadline, TC(queue_first(old_queue))->deadline);
+ else
+ timer_queue_cancel(old_queue, call->deadline, UINT64_MAX);
}
simple_unlock(&timer_call_lock);
splx(s);
- return (result);
+ return (old_queue != NULL);
}
-/*
- * Called at splclock.
- */
-
void
-timer_call_shutdown(
- processor_t processor)
+timer_queue_shutdown(
+ queue_t queue)
{
- timer_call_t call;
- queue_t queue, myqueue;
-
- assert(processor != current_processor());
-
- queue = &PROCESSOR_DATA(processor, timer_call_queue);
- myqueue = &PROCESSOR_DATA(current_processor(), timer_call_queue);
+ timer_call_t call;
+ queue_t new_queue;
+ spl_t s;
+ s = splclock();
simple_lock(&timer_call_lock);
call = TC(queue_first(queue));
while (!queue_end(queue, qe(call))) {
- _delayed_call_dequeue(call);
+ new_queue = timer_queue_assign(call->deadline);
- _delayed_call_enqueue(myqueue, call);
+ call_entry_enqueue_deadline(call, new_queue, call->deadline);
call = TC(queue_first(queue));
}
- call = TC(queue_first(myqueue));
-
- if (!queue_end(myqueue, qe(call)))
- _set_delayed_call_timer(call);
-
simple_unlock(&timer_call_lock);
+ splx(s);
}
-static void
-timer_call_interrupt(uint64_t timestamp)
+uint64_t
+timer_queue_expire(
+ queue_t queue,
+ uint64_t deadline)
{
- timer_call_t call;
- queue_t queue;
+ timer_call_t call;
simple_lock(&timer_call_lock);
- queue = &PROCESSOR_DATA(current_processor(), timer_call_queue);
-
call = TC(queue_first(queue));
while (!queue_end(queue, qe(call))) {
- if (call->deadline <= timestamp) {
+ if (call->deadline <= deadline) {
timer_call_func_t func;
timer_call_param_t param0, param1;
- _delayed_call_dequeue(call);
+ call_entry_dequeue(call);
func = call->func;
param0 = call->param0;
(unsigned int)param1, 0, 0);
simple_lock(&timer_call_lock);
- } else
+ }
+ else
break;
call = TC(queue_first(queue));
}
if (!queue_end(queue, qe(call)))
- _set_delayed_call_timer(call);
+ deadline = call->deadline;
+ else
+ deadline = UINT64_MAX;
simple_unlock(&timer_call_lock);
+
+ return (deadline);
}
/*
- * Copyright (c) 1993-1995, 1999-2000 Apple Computer, Inc.
- * All rights reserved.
+ * Copyright (c) 1993-1995, 1999-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
*/
/*
* Declarations for timer interrupt callouts.
- *
- * HISTORY
- *
- * 20 December 2000 (debo)
- * Created.
*/
#ifndef _KERN_TIMER_CALL_H_
timer_call_param_t param0,
timer_call_param_t param1);
-boolean_t
-timer_call_enter(
- timer_call_t call,
- uint64_t deadline);
+extern boolean_t timer_call_enter(
+ timer_call_t call,
+ uint64_t deadline);
-boolean_t
-timer_call_enter1(
- timer_call_t call,
- timer_call_param_t param1,
- uint64_t deadline);
+extern boolean_t timer_call_enter1(
+ timer_call_t call,
+ timer_call_param_t param1,
+ uint64_t deadline);
-boolean_t
-timer_call_cancel(
- timer_call_t call);
-
-boolean_t
-timer_call_is_delayed(
- timer_call_t call,
- uint64_t *deadline);
+extern boolean_t timer_call_cancel(
+ timer_call_t call);
#include <kern/call_entry.h>
typedef struct call_entry timer_call_data_t;
-void
-timer_call_initialize(void);
-
-void
-timer_call_setup(
- timer_call_t call,
- timer_call_func_t func,
- timer_call_param_t param0);
+extern void timer_call_initialize(void);
-void
-timer_call_shutdown(
- processor_t processor);
+extern void timer_call_setup(
+ timer_call_t call,
+ timer_call_func_t func,
+ timer_call_param_t param0);
#endif /* MACH_KERNEL_PRIVATE */
--- /dev/null
+/*
+ * Copyright (c) 2008 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+ * Timer queue support routines.
+ */
+
+#ifndef _KERN_TIMER_QUEUE_H_
+#define _KERN_TIMER_QUEUE_H_
+
+#include <mach/mach_types.h>
+
+#ifdef MACH_KERNEL_PRIVATE
+
+#include <kern/queue.h>
+
+/*
+ * Invoked by kernel, implemented by platform.
+ */
+
+/* Request an expiration deadline, returns queue association */
+extern queue_t timer_queue_assign(
+ uint64_t deadline);
+
+/* Cancel an associated expiration deadline and specify new deadline */
+extern void timer_queue_cancel(
+ queue_t queue,
+ uint64_t deadline,
+ uint64_t new_deadline);
+
+/*
+ * Invoked by platform, implemented by kernel.
+ */
+
+/* Process deadline expiration for queue, returns new deadline */
+extern uint64_t timer_queue_expire(
+ queue_t queue,
+ uint64_t deadline);
+
+/* Shutdown a timer queue and reassign existing activities */
+extern void timer_queue_shutdown(
+ queue_t queue);
+
+#endif /* MACH_KERNEL_PRIVATE */
+
+#endif /* _KERN_TIMER_QUEUE_H_ */
#include <ppc/mappings.h>
#endif
-int check_freed_element = 0;
-#if MACH_ASSERT
-/* Detect use of zone elt after freeing it by two methods:
+/*
+ * Zone Corruption Debugging
+ *
+ * We provide three methods to detect use of a zone element after it's been freed. These
+ * checks are enabled by specifying "-zc" and/or "-zp" in the boot-args:
+ *
* (1) Range-check the free-list "next" ptr for sanity.
* (2) Store the ptr in two different words, and compare them against
- * each other when re-using the zone elt, to detect modifications;
+ * each other when re-using the zone element, to detect modifications.
+ * (3) poison the freed memory by overwriting it with 0xdeadbeef.
+ *
+ * The first two checks are farily light weight and are enabled by specifying "-zc"
+ * in the boot-args. If you want more aggressive checking for use-after-free bugs
+ * and you don't mind the additional overhead, then turn on poisoning by adding
+ * "-zp" to the boot-args in addition to "-zc". If you specify -zp without -zc,
+ * it still poisons the memory when it's freed, but doesn't check if the memory
+ * has been altered later when it's reallocated.
*/
-#if defined(__alpha)
-
-#define is_kernel_data_addr(a) \
- (!(a) || (IS_SYS_VA(a) && !((a) & (sizeof(long)-1))))
-
-#else /* !defined(__alpha) */
+boolean_t check_freed_element = FALSE; /* enabled by -zc in boot-args */
+boolean_t zfree_clear = FALSE; /* enabled by -zp in boot-args */
-#define is_kernel_data_addr(a) \
- (!(a) || ((a) >= vm_min_kernel_address && !((a) & 0x3)))
-
-#endif /* defined(__alpha) */
-
-/* Should we set all words of the zone element to an illegal address
- * when it is freed, to help catch usage after freeing? The down-side
- * is that this obscures the identity of the freed element.
- */
-boolean_t zfree_clear = FALSE;
+#define is_kernel_data_addr(a) (!(a) || ((a) >= vm_min_kernel_address && !((a) & 0x3)))
#define ADD_TO_ZONE(zone, element) \
MACRO_BEGIN \
- if (zfree_clear) \
- { unsigned int i; \
- for (i=1; \
- i < zone->elem_size/sizeof(vm_offset_t) - 1; \
- i++) \
- ((vm_offset_t *)(element))[i] = 0xdeadbeef; \
- } \
- ((vm_offset_t *)(element))[0] = (zone)->free_elements; \
- (zone)->free_elements = (vm_offset_t) (element); \
- (zone)->count--; \
-MACRO_END
-
-#define REMOVE_FROM_ZONE(zone, ret, type) \
-MACRO_BEGIN \
- (ret) = (type) (zone)->free_elements; \
- if ((ret) != (type) 0) { \
- if (!is_kernel_data_addr(((vm_offset_t *)(ret))[0])) { \
- panic("A freed zone element has been modified.\n"); \
- } \
- (zone)->count++; \
- (zone)->free_elements = *((vm_offset_t *)(ret)); \
+ if (zfree_clear) \
+ { unsigned int i; \
+ for (i=0; \
+ i < zone->elem_size/sizeof(uint32_t); \
+ i++) \
+ ((uint32_t *)(element))[i] = 0xdeadbeef; \
} \
-MACRO_END
-#else /* MACH_ASSERT */
-
-#define ADD_TO_ZONE(zone, element) \
-MACRO_BEGIN \
- *((vm_offset_t *)(element)) = (zone)->free_elements; \
- if (check_freed_element) { \
- if ((zone)->elem_size >= (2 * sizeof(vm_offset_t))) \
- ((vm_offset_t *)(element))[((zone)->elem_size/sizeof(vm_offset_t))-1] = \
- (zone)->free_elements; \
- } \
- (zone)->free_elements = (vm_offset_t) (element); \
- (zone)->count--; \
-MACRO_END
-
-#define REMOVE_FROM_ZONE(zone, ret, type) \
-MACRO_BEGIN \
- (ret) = (type) (zone)->free_elements; \
- if ((ret) != (type) 0) { \
- if (check_freed_element) { \
- if ((zone)->elem_size >= (2 * sizeof(vm_offset_t)) && \
- ((vm_offset_t *)(ret))[((zone)->elem_size/sizeof(vm_offset_t))-1] != \
- ((vm_offset_t *)(ret))[0]) \
- panic("a freed zone element has been modified");\
- } \
- (zone)->count++; \
- (zone)->free_elements = *((vm_offset_t *)(ret)); \
+ *((vm_offset_t *)(element)) = (zone)->free_elements; \
+ if (check_freed_element) { \
+ if ((zone)->elem_size >= (2 * sizeof(vm_offset_t))) \
+ ((vm_offset_t *)(element))[((zone)->elem_size/sizeof(vm_offset_t))-1] = \
+ (zone)->free_elements; \
} \
+ (zone)->free_elements = (vm_offset_t) (element); \
+ (zone)->count--; \
MACRO_END
-#endif /* MACH_ASSERT */
+#define REMOVE_FROM_ZONE(zone, ret, type) \
+MACRO_BEGIN \
+ (ret) = (type) (zone)->free_elements; \
+ if ((ret) != (type) 0) { \
+ if (check_freed_element) { \
+ if (!is_kernel_data_addr(((vm_offset_t *)(ret))[0]) || \
+ ((zone)->elem_size >= (2 * sizeof(vm_offset_t)) && \
+ ((vm_offset_t *)(ret))[((zone)->elem_size/sizeof(vm_offset_t))-1] != \
+ ((vm_offset_t *)(ret))[0])) \
+ panic("a freed zone element has been modified");\
+ if (zfree_clear) { \
+ unsigned int ii; \
+ for (ii = sizeof(vm_offset_t) / sizeof(uint32_t); \
+ ii < zone->elem_size/sizeof(uint32_t) - sizeof(vm_offset_t) / sizeof(uint32_t); \
+ ii++) \
+ if (((uint32_t *)(ret))[ii] != (uint32_t)0xdeadbeef) \
+ panic("a freed zone element has been modified");\
+ } \
+ } \
+ (zone)->count++; \
+ (zone)->free_elements = *((vm_offset_t *)(ret)); \
+ } \
+MACRO_END
#if ZONE_DEBUG
#define zone_debug_enabled(z) z->active_zones.next
boolean_t zone_gc_allowed = TRUE;
boolean_t zone_gc_forced = FALSE;
+boolean_t panic_include_zprint = FALSE;
unsigned zone_gc_last_tick = 0;
unsigned zone_gc_max_rate = 0; /* in ticks */
+/*
+ * Zone leak debugging code
+ *
+ * When enabled, this code keeps a log to track allocations to a particular zone that have not
+ * yet been freed. Examining this log will reveal the source of a zone leak. The log is allocated
+ * only when logging is enabled, so there is no effect on the system when it's turned off. Logging is
+ * off by default.
+ *
+ * Enable the logging via the boot-args. Add the parameter "zlog=<zone>" to boot-args where <zone>
+ * is the name of the zone you wish to log.
+ *
+ * This code only tracks one zone, so you need to identify which one is leaking first.
+ * Generally, you'll know you have a leak when you get a "zalloc retry failed 3" panic from the zone
+ * garbage collector. Note that the zone name printed in the panic message is not necessarily the one
+ * containing the leak. So do a zprint from gdb and locate the zone with the bloated size. This
+ * is most likely the problem zone, so set zlog in boot-args to this zone name, reboot and re-run the test. The
+ * next time it panics with this message, examine the log using the kgmacros zstack, findoldest and countpcs.
+ * See the help in the kgmacros for usage info.
+ *
+ *
+ * Zone corruption logging
+ *
+ * Logging can also be used to help identify the source of a zone corruption. First, identify the zone
+ * that is being corrupted, then add "-zc zlog=<zone name>" to the boot-args. When -zc is used in conjunction
+ * with zlog, it changes the logging style to track both allocations and frees to the zone. So when the
+ * corruption is detected, examining the log will show you the stack traces of the callers who last allocated
+ * and freed any particular element in the zone. Use the findelem kgmacro with the address of the element that's been
+ * corrupted to examine its history. This should lead to the source of the corruption.
+ */
+
+static int log_records; /* size of the log, expressed in number of records */
+
+#define MAX_ZONE_NAME 32 /* max length of a zone name we can take from the boot-args */
+
+static char zone_name_to_log[MAX_ZONE_NAME] = ""; /* the zone name we're logging, if any */
+
+/*
+ * The number of records in the log is configurable via the zrecs parameter in boot-args. Set this to
+ * the number of records you want in the log. For example, "zrecs=1000" sets it to 1000 records. Note
+ * that the larger the size of the log, the slower the system will run due to linear searching in the log,
+ * but one doesn't generally care about performance when tracking down a leak. The log is capped at 8000
+ * records since going much larger than this tends to make the system unresponsive and unbootable on small
+ * memory configurations. The default value is 4000 records.
+ *
+ * MAX_DEPTH configures how deep of a stack trace is taken on each zalloc in the zone of interrest. 15
+ * levels is usually enough to get past all the layers of code in kalloc and IOKit and see who the actual
+ * caller is up above these lower levels.
+ */
+
+#define ZRECORDS_MAX 8000 /* Max records allowed in the log */
+#define ZRECORDS_DEFAULT 4000 /* default records in log if zrecs is not specificed in boot-args */
+#define MAX_DEPTH 15 /* number of levels of the stack trace to record */
+/*
+ * Each record in the log contains a pointer to the zone element it refers to, a "time" number that allows
+ * the records to be ordered chronologically, and a small array to hold the pc's from the stack trace. A
+ * record is added to the log each time a zalloc() is done in the zone_of_interest. For leak debugging,
+ * the record is cleared when a zfree() is done. For corruption debugging, the log tracks both allocs and frees.
+ * If the log fills, old records are replaced as if it were a circular buffer.
+ */
+
+struct zrecord {
+ void *z_element; /* the element that was zalloc'ed of zfree'ed */
+ uint32_t z_opcode:1, /* whether it was a zalloc or zfree */
+ z_time:31; /* time index when operation was done */
+ void *z_pc[MAX_DEPTH]; /* stack trace of caller */
+};
+
+/*
+ * Opcodes for the z_opcode field:
+ */
+
+#define ZOP_ALLOC 1
+#define ZOP_FREE 0
+
+/*
+ * The allocation log and all the related variables are protected by the zone lock for the zone_of_interest
+ */
+
+static struct zrecord *zrecords; /* the log itself, dynamically allocated when logging is enabled */
+static int zcurrent = 0; /* index of the next slot in the log to use */
+static int zrecorded = 0; /* number of allocations recorded in the log */
+static unsigned int ztime = 0; /* a timestamp of sorts */
+static zone_t zone_of_interest = NULL; /* the zone being watched; corresponds to zone_name_to_log */
+
+/*
+ * Decide if we want to log this zone by doing a string compare between a zone name and the name
+ * of the zone to log. Return true if the strings are equal, false otherwise. Because it's not
+ * possible to include spaces in strings passed in via the boot-args, a period in the logname will
+ * match a space in the zone name.
+ */
+
+static int
+log_this_zone(const char *zonename, const char *logname)
+{
+ int len;
+ const char *zc = zonename;
+ const char *lc = logname;
+
+ /*
+ * Compare the strings. We bound the compare by MAX_ZONE_NAME.
+ */
+
+ for (len = 1; len <= MAX_ZONE_NAME; zc++, lc++, len++) {
+
+ /*
+ * If the current characters don't match, check for a space in
+ * in the zone name and a corresponding period in the log name.
+ * If that's not there, then the strings don't match.
+ */
+
+ if (*zc != *lc && !(*zc == ' ' && *lc == '.'))
+ break;
+
+ /*
+ * The strings are equal so far. If we're at the end, then it's a match.
+ */
+
+ if (*zc == '\0')
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+
+/*
+ * Test if we want to log this zalloc/zfree event. We log if this is the zone we're interested in and
+ * the buffer for the records has been allocated.
+ */
+
+#define DO_LOGGING(z) (zrecords && (z) == zone_of_interest)
+
+extern boolean_t zlog_ready;
+
+
/*
* zinit initializes a new zone. The zone data structures themselves
* are stored in a zone, which is initially a static structure that
num_zones++;
simple_unlock(&all_zones_lock);
+ /*
+ * Check if we should be logging this zone. If so, remember the zone pointer.
+ */
+
+ if (log_this_zone(z->zone_name, zone_name_to_log)) {
+ zone_of_interest = z;
+ }
+
+ /*
+ * If we want to log a zone, see if we need to allocate buffer space for the log. Some vm related zones are
+ * zinit'ed before we can do a kmem_alloc, so we have to defer allocation in that case. zlog_ready is set to
+ * TRUE once enough of the VM system is up and running to allow a kmem_alloc to work. If we want to log one
+ * of the VM related zones that's set up early on, we will skip allocation of the log until zinit is called again
+ * later on some other zone. So note we may be allocating a buffer to log a zone other than the one being initialized
+ * right now.
+ */
+
+ if (zone_of_interest != NULL && zrecords == NULL && zlog_ready) {
+ if (kmem_alloc(kernel_map, (vm_offset_t *)&zrecords, log_records * sizeof(struct zrecord)) == KERN_SUCCESS) {
+
+ /*
+ * We got the memory for the log. Zero it out since the code needs this to identify unused records.
+ * At this point, everything is set up and we're ready to start logging this zone.
+ */
+
+ bzero((void *)zrecords, log_records * sizeof(struct zrecord));
+ printf("zone: logging started for zone %s (%p)\n", zone_of_interest->zone_name, zone_of_interest);
+
+ } else {
+ printf("zone: couldn't allocate memory for zrecords, turning off zleak logging\n");
+ zone_of_interest = NULL;
+ }
+ }
+
return(z);
}
vm_offset_t zone_zone_space;
char temp_buf[16];
- /* see if we want freed zone element checking */
+ /* see if we want freed zone element checking and/or poisoning */
if (PE_parse_boot_argn("-zc", temp_buf, sizeof (temp_buf))) {
- check_freed_element = 1;
+ check_freed_element = TRUE;
+ }
+
+ if (PE_parse_boot_argn("-zp", temp_buf, sizeof (temp_buf))) {
+ zfree_clear = TRUE;
+ }
+
+ /*
+ * Check for and set up zone leak detection if requested via boot-args. We recognized two
+ * boot-args:
+ *
+ * zlog=<zone_to_log>
+ * zrecs=<num_records_in_log>
+ *
+ * The zlog arg is used to specify the zone name that should be logged, and zrecs is used to
+ * control the size of the log. If zrecs is not specified, a default value is used.
+ */
+
+ if (PE_parse_boot_argn("zlog", zone_name_to_log, sizeof(zone_name_to_log)) == TRUE) {
+ if (PE_parse_boot_argn("zrecs", &log_records, sizeof(log_records)) == TRUE) {
+
+ /*
+ * Don't allow more than ZRECORDS_MAX records even if the user asked for more.
+ * This prevents accidentally hogging too much kernel memory and making the system
+ * unusable.
+ */
+
+ log_records = MIN(ZRECORDS_MAX, log_records);
+
+ } else {
+ log_records = ZRECORDS_DEFAULT;
+ }
}
simple_lock_init(&all_zones_lock, 0);
{
vm_offset_t addr;
kern_return_t retval;
+ void *bt[MAX_DEPTH]; /* only used if zone logging is enabled */
+ int numsaved = 0;
+ int i;
assert(zone != ZONE_NULL);
+ /*
+ * If zone logging is turned on and this is the zone we're tracking, grab a backtrace.
+ */
+
+ if (DO_LOGGING(zone))
+ numsaved = OSBacktrace(&bt[0], MAX_DEPTH);
+
lock_zone(zone);
REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
zone_gc();
printf("zalloc did gc\n");
}
- if (retry == 3)
+ if (retry == 3) {
+ panic_include_zprint = TRUE;
panic("zalloc: \"%s\" (%d elements) retry fail %d", zone->zone_name, zone->count, retval);
+ }
} else {
break;
}
REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
}
+ /*
+ * See if we should be logging allocations in this zone. Logging is rarely done except when a leak is
+ * suspected, so this code rarely executes. We need to do this code while still holding the zone lock
+ * since it protects the various log related data structures.
+ */
+
+ if (DO_LOGGING(zone) && addr) {
+
+ /*
+ * Look for a place to record this new allocation. We implement two different logging strategies
+ * depending on whether we're looking for the source of a zone leak or a zone corruption. When looking
+ * for a leak, we want to log as many allocations as possible in order to clearly identify the leaker
+ * among all the records. So we look for an unused slot in the log and fill that in before overwriting
+ * an old entry. When looking for a corrution however, it's better to have a chronological log of all
+ * the allocations and frees done in the zone so that the history of operations for a specific zone
+ * element can be inspected. So in this case, we treat the log as a circular buffer and overwrite the
+ * oldest entry whenever a new one needs to be added.
+ *
+ * The check_freed_element flag tells us what style of logging to do. It's set if we're supposed to be
+ * doing corruption style logging (indicated via -zc in the boot-args).
+ */
+
+ if (!check_freed_element && zrecords[zcurrent].z_element && zrecorded < log_records) {
+
+ /*
+ * If we get here, we're doing leak style logging and there's still some unused entries in
+ * the log (since zrecorded is smaller than the size of the log). Look for an unused slot
+ * starting at zcurrent and wrap-around if we reach the end of the buffer. If the buffer
+ * is already full, we just fall through and overwrite the element indexed by zcurrent.
+ */
+
+ for (i = zcurrent; i < log_records; i++) {
+ if (zrecords[i].z_element == NULL) {
+ zcurrent = i;
+ goto empty_slot;
+ }
+ }
+
+ for (i = 0; i < zcurrent; i++) {
+ if (zrecords[i].z_element == NULL) {
+ zcurrent = i;
+ goto empty_slot;
+ }
+ }
+ }
+
+ /*
+ * Save a record of this allocation
+ */
+
+empty_slot:
+ if (zrecords[zcurrent].z_element == NULL)
+ zrecorded++;
+
+ zrecords[zcurrent].z_element = (void *)addr;
+ zrecords[zcurrent].z_time = ztime++;
+ zrecords[zcurrent].z_opcode = ZOP_ALLOC;
+
+ for (i = 0; i < numsaved; i++)
+ zrecords[zcurrent].z_pc[i] = bt[i];
+
+ for (; i < MAX_DEPTH; i++)
+ zrecords[zcurrent].z_pc[i] = 0;
+
+ zcurrent++;
+
+ if (zcurrent >= log_records)
+ zcurrent = 0;
+ }
+
if ((addr == 0) && !canblock && (zone->async_pending == FALSE) && (zone->exhaustible == FALSE) && (!vm_pool_low())) {
zone->async_pending = TRUE;
unlock_zone(zone);
void *addr)
{
vm_offset_t elem = (vm_offset_t) addr;
+ void *bt[MAX_DEPTH]; /* only used if zone logging is enable via boot-args */
+ int numsaved = 0;
+
+ assert(zone != ZONE_NULL);
+
+ /*
+ * If zone logging is turned on and this is the zone we're tracking, grab a backtrace.
+ */
+
+ if (DO_LOGGING(zone))
+ numsaved = OSBacktrace(&bt[0], MAX_DEPTH);
#if MACH_ASSERT
/* Basic sanity checks */
}
lock_zone(zone);
+
+ /*
+ * See if we're doing logging on this zone. There are two styles of logging used depending on
+ * whether we're trying to catch a leak or corruption. See comments above in zalloc for details.
+ */
+
+ if (DO_LOGGING(zone)) {
+ int i;
+
+ if (check_freed_element) {
+
+ /*
+ * We're logging to catch a corruption. Add a record of this zfree operation
+ * to log.
+ */
+
+ if (zrecords[zcurrent].z_element == NULL)
+ zrecorded++;
+
+ zrecords[zcurrent].z_element = (void *)addr;
+ zrecords[zcurrent].z_time = ztime++;
+ zrecords[zcurrent].z_opcode = ZOP_FREE;
+
+ for (i = 0; i < numsaved; i++)
+ zrecords[zcurrent].z_pc[i] = bt[i];
+
+ for (; i < MAX_DEPTH; i++)
+ zrecords[zcurrent].z_pc[i] = 0;
+
+ zcurrent++;
+
+ if (zcurrent >= log_records)
+ zcurrent = 0;
+
+ } else {
+
+ /*
+ * We're logging to catch a leak. Remove any record we might have for this
+ * element since it's being freed. Note that we may not find it if the buffer
+ * overflowed and that's OK. Since the log is of a limited size, old records
+ * get overwritten if there are more zallocs than zfrees.
+ */
+
+ for (i = 0; i < log_records; i++) {
+ if (zrecords[i].z_element == addr) {
+ zrecords[i].z_element = NULL;
+ zcurrent = i;
+ zrecorded--;
+ break;
+ }
+ }
+ }
+ }
+
+
#if ZONE_DEBUG
if (zone_debug_enabled(zone)) {
queue_t tmp_elem;
mach_interface.h \
$(filter-out mach_traps.h mach_syscalls.h thread_switch.h, ${DATAFILES})
+INSTALL_MI_LCL_LIST = kext_panic_report.h \
+ bootstrap.h \
+ ${DATAFILES}
+
INSTALL_MI_GEN_LIST =
INSTALL_MI_DIR = mach
--- /dev/null
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#ifndef _KEXT_PANIC_REPORT_H_
+#define _KEXT_PANIC_REPORT_H_
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+
+/*******************************************************************************
+* String-compaction tables for panic reports' kext listing.
+*******************************************************************************/
+
+typedef struct subs_entry_t {
+ const char * substring;
+ char substitute;
+} subs_entry_t;
+
+/* Prefix substitution list. Common prefixes are replaced with a single
+ * nonalphanumeric character at the beginning of the identifier.
+ *
+ * List should be in descending order of # components, and should then
+ * be in descending frequency order.
+ */
+subs_entry_t kext_identifier_prefix_subs[] = {
+ { "com.apple.driver.", '>' },
+ { "com.apple.iokit.", '|' },
+ { "com.apple.security.", '$' },
+ { "com.apple.", '@' },
+
+ { (char *)0, '\0' }
+};
+
+/* Substring substitution list. Substrings are replaced with a '!' followed
+ * by a single letter mapping to the original string.
+ *
+ * List should be in descending frequency order, and within
+ * groups containing same prefix, in descending length order.
+ */
+subs_entry_t kext_identifier_substring_subs[] = {
+ { "AppleUSB", 'U' },
+ { "Apple", 'A' },
+ { "Family", 'F' },
+ { "Storage", 'S' },
+ { "Controller", 'C' },
+ { "Bluetooth", 'B' },
+ { "Intel", 'I' },
+
+ // CHUD kexts, typically not on user installs
+ { "Profile", 'P' },
+ { "Action", 'a' }, // maybe K if we want to stick to all-caps
+
+ { (char *)0, '\0' }
+};
+
+__END_DECLS
+#endif /* _KEXT_PANIC_REPORT_H_ */
extern kern_return_t kmod_initialize_cpp(kmod_info_t *info);
extern kern_return_t kmod_finalize_cpp(kmod_info_t *info);
+void record_kext_unload(kmod_t kmod_id);
+void dump_kext_info(int (*printf_func)(const char *fmt, ...));
+
extern void kmod_dump(vm_offset_t *addr, unsigned int dump_cnt);
__END_DECLS
#define CPU_SUBTYPE_ARM_V6 ((cpu_subtype_t) 6)
#define CPU_SUBTYPE_ARM_V5TEJ ((cpu_subtype_t) 7)
#define CPU_SUBTYPE_ARM_XSCALE ((cpu_subtype_t) 8)
+#define CPU_SUBTYPE_ARM_V7 ((cpu_subtype_t) 9)
/*
* CPU families (sysctl hw.cpufamily)
#define CPUFAMILY_ARM_9 0xe73283ae
#define CPUFAMILY_ARM_11 0x8ff620d8
#define CPUFAMILY_ARM_XSCALE 0x53b005f5
+#define CPUFAMILY_ARM_13 0x0cc90e64
#define CPUFAMILY_INTEL_YONAH CPUFAMILY_INTEL_6_14
#define CPUFAMILY_INTEL_MEROM CPUFAMILY_INTEL_6_15
#define MACH_PORT_QLIMIT_BASIC ((mach_port_msgcount_t) 5)
#define MACH_PORT_QLIMIT_SMALL ((mach_port_msgcount_t) 16)
#define MACH_PORT_QLIMIT_LARGE ((mach_port_msgcount_t) 1024)
+#define MACH_PORT_QLIMIT_KERNEL ((mach_port_msgcount_t) 65536)
#define MACH_PORT_QLIMIT_MIN MACH_PORT_QLIMIT_ZERO
#define MACH_PORT_QLIMIT_DEFAULT MACH_PORT_QLIMIT_BASIC
#define MACH_PORT_QLIMIT_MAX MACH_PORT_QLIMIT_LARGE
/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
prssr = (processor_t)port->ip_kobject; /* Extract the processor */
is_write_unlock(current_space()); /* All done with the space now, unlock it */
- save->save_r3 = (uint64_t)(uint32_t)PerProcTable[prssr->processor_data.slot_num].ppe_vaddr; /* Pass back ther per proc */
+ save->save_r3 = (uint64_t)(uint32_t)PerProcTable[prssr->cpu_num].ppe_vaddr; /* Pass back ther per proc */
return -1; /* Return and check asts */
/*
/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <kern/misc_protos.h>
#include <kern/thread.h>
#include <kern/sched_prim.h>
+#include <kern/timer_queue.h>
#include <kern/processor.h>
#include <kern/pms.h>
proc_info->debstackptr = (vm_offset_t)debugger_stack + KERNEL_STACK_SIZE - FM_SIZE;
proc_info->debstack_top_ss = proc_info->debstackptr;
+ queue_init(&proc_info->rtclock_timer.queue);
+ proc_info->rtclock_timer.deadline = EndOfAllTime;
+
return proc_info;
}
proc_info->running = FALSE;
+ if (proc_info->cpu_number != master_cpu) {
+ timer_queue_shutdown(&proc_info->rtclock_timer.queue);
+ proc_info->rtclock_timer.deadline = EndOfAllTime;
+ }
+
fowner = proc_info->FPU_owner; /* Cache this */
if(fowner) /* If anyone owns FPU, save it */
fpu_save(fowner);
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <kern/clock.h>
#include <kern/thread.h>
+#include <kern/timer_queue.h>
#include <kern/processor.h>
#include <kern/macro_help.h>
#include <kern/spl.h>
#include <sys/kdebug.h>
#include <ppc/exception.h>
-/* XXX from <arch>/rtclock.c */
-clock_timer_func_t rtclock_timer_expire;
-
/*
* Event timer interrupt.
*
/* has a pending clock timer expired? */
if (mytimer->deadline <= abstime) { /* Have we expired the deadline? */
mytimer->has_expired = TRUE; /* Remember that we popped */
- mytimer->deadline = EndOfAllTime; /* Set timer request to the end of all time in case we have no more events */
- (*rtclock_timer_expire)(abstime); /* Process pop */
+ mytimer->deadline = timer_queue_expire(&mytimer->queue, abstime);
mytimer->has_expired = FALSE;
}
}
/*
- * Set the clock deadline; called by the thread scheduler.
+ * Set the clock deadline.
*/
void etimer_set_deadline(uint64_t deadline)
{
}
splx(s);
}
+
+queue_t
+timer_queue_assign(
+ uint64_t deadline)
+{
+ struct per_proc_info *pp = getPerProc();
+ rtclock_timer_t *timer;
+
+ if (pp->running) {
+ timer = &pp->rtclock_timer;
+
+ if (deadline < timer->deadline)
+ etimer_set_deadline(deadline);
+ }
+ else
+ timer = &PerProcTable[master_cpu].ppe_vaddr->rtclock_timer;
+
+ return (&timer->queue);
+}
+
+void
+timer_queue_cancel(
+ queue_t queue,
+ uint64_t deadline,
+ uint64_t new_deadline)
+{
+ if (queue == &getPerProc()->rtclock_timer.queue) {
+ if (deadline < new_deadline)
+ etimer_set_deadline(new_deadline);
+ }
+}
/*
- * Copyright (c) 2000-2007 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Computer, Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
/* PPC cache line boundary here - 140 */
void * pp_cbfr;
void * pp_chud;
- uint64_t rtclock_intr_deadline;
rtclock_timer_t rtclock_timer;
unsigned int ppbbTaskEnv; /* BlueBox Task Environment */
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
return;
}
+
+
+/*
+ * Stubs for CPU Stepper
+ */
+void
+machine_run_count(__unused uint32_t count)
+{
+}
+
+boolean_t
+machine_cpu_is_inactive(__unused int num)
+{
+ return(FALSE);
+}
while(pbtcnt); /* Wait for completion */
pbt_exit:
panic_display_system_configuration();
-
+ panic_display_zprint();
+ dump_kext_info(&kdb_log);
return;
}
BootProcInfo.VMX_owner = NULL;
BootProcInfo.pp_cbfr = console_per_proc_alloc(TRUE);
BootProcInfo.rtcPop = EndOfAllTime;
+ queue_init(&BootProcInfo.rtclock_timer.queue);
+ BootProcInfo.rtclock_timer.deadline = EndOfAllTime;
BootProcInfo.pp2ndPage = (addr64_t)(uintptr_t)&BootProcInfo; /* Initial physical address of the second page */
BootProcInfo.pms.pmsStamp = 0; /* Dummy transition time */
/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
static boolean_t rtclock_timebase_initialized;
-/* XXX this should really be in a header somewhere */
-extern clock_timer_func_t rtclock_timer_expire;
-
decl_simple_lock_data(static,rtclock_lock)
/*
UNLOCK_RTC(s);
}
-void
-clock_set_timer_func(
- clock_timer_func_t func)
-{
- spl_t s;
-
- LOCK_RTC(s);
- if (rtclock_timer_expire == NULL)
- rtclock_timer_expire = func;
- UNLOCK_RTC(s);
-}
-
void
clock_interval_to_absolutetime_interval(
uint32_t interval,
/*
- * Copyright (c) 2004-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2004-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#pragma pack(push,4)
struct rtclock_timer_t {
+ queue_head_t queue;
uint64_t deadline;
uint32_t
/*boolean_t*/ is_set:1,
m->list_req_pending = TRUE;
m->cleaning = TRUE;
- if (should_flush) {
+ if (should_flush &&
+ /* let's no flush a wired page... */
+ !m->wire_count) {
/*
* and add additional state
* for the flush
pl_count = length / PAGE_SIZE;
for (cur_offset = 0; cur_offset < length; cur_offset += PAGE_SIZE) {
ppnum_t dst_pnum;
+ int type_of_fault;
if (!upl_page_present(upl_pl, cur_offset / PAGE_SIZE)) {
/* this page is not in the UPL: skip it */
&prot,
&src_page,
&top_page,
- NULL,
+ &type_of_fault,
&error_code,
FALSE,
FALSE,
boolean_t need_collapse = FALSE;
int object_lock_type = 0;
int cur_object_lock_type;
+ vm_object_t top_object = VM_OBJECT_NULL;
KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 2)) | DBG_FUNC_START,
prot &= ~VM_PROT_WRITE;
- /*
- * Set up to map the page...
- * mark the page busy, drop
- * unneeded object lock
- */
if (object != cur_object) {
- /*
- * don't need the original object anymore
+ /*
+ * We still need to hold the top object
+ * lock here to prevent a race between
+ * a read fault (taking only "shared"
+ * locks) and a write fault (taking
+ * an "exclusive" lock on the top
+ * object.
+ * Otherwise, as soon as we release the
+ * top lock, the write fault could
+ * proceed and actually complete before
+ * the read fault, and the copied page's
+ * translation could then be overwritten
+ * by the read fault's translation for
+ * the original page.
+ *
+ * Let's just record what the top object
+ * is and we'll release it later.
*/
- vm_object_unlock(object);
+ top_object = object;
/*
* switch to the object that has the new page
&type_of_fault);
}
+ if (top_object != VM_OBJECT_NULL) {
+ /*
+ * It's safe to drop the top object
+ * now that we've done our
+ * vm_fault_enter(). Any other fault
+ * in progress for that virtual
+ * address will either find our page
+ * and translation or put in a new page
+ * and translation.
+ */
+ vm_object_unlock(top_object);
+ top_object = VM_OBJECT_NULL;
+ }
+
if (need_collapse == TRUE)
vm_object_collapse(object, offset, TRUE);
const vm_offset_t vm_max_kernel_address = VM_MAX_KERNEL_ADDRESS;
boolean_t vm_kernel_ready = FALSE;
+boolean_t zlog_ready = FALSE;
/*
* vm_mem_bootstrap initializes the virtual memory system.
vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling pmap_init\n"));
pmap_init();
+ zlog_ready = TRUE;
+
if (PE_parse_boot_argn("zsize", &zsizearg, sizeof (zsizearg)))
zsize = zsizearg * 1024ULL * 1024ULL;
else {
movl S_ARG0, %ecx
+ lfence
rdtsc
lfence
+
void PE_init_kprintf(
boolean_t vm_initialized);
void mac_cred_label_free(struct label *label);
void mac_cred_label_init(kauth_cred_t cred);
void mac_cred_label_update(kauth_cred_t cred, struct label *newlabel);
-void mac_cred_label_update_execve(vfs_context_t ctx, kauth_cred_t newcred,
+int mac_cred_label_update_execve(vfs_context_t ctx, kauth_cred_t newcred,
struct vnode *vp, struct label *scriptvnodelabel,
struct label *execlabel);
void mac_devfs_label_associate_device(dev_t dev, struct devnode *de,
The final label, execlabel, corresponds to a label supplied by a
user space application through the use of the mac_execve system call.
+ If non-NULL, the value pointed to by disjointp will be set to 0 to
+ indicate that the old and new credentials are not disjoint, or 1 to
+ indicate that they are.
+
The vnode lock is held during this operation. No changes should be
made to the old credential structure.
*/
struct vnode *vp,
struct label *vnodelabel,
struct label *scriptvnodelabel,
- struct label *execlabel
+ struct label *execlabel,
+ int *disjointp
);
/**
@brief Update a credential label
return (error);
}
-void
+int
mac_cred_label_update_execve(vfs_context_t ctx, kauth_cred_t new, struct vnode *vp,
struct label *scriptvnodelabel, struct label *execl)
{
kauth_cred_t cred;
+ int disjoint = 0;
if (!mac_proc_enforce && !mac_vnode_enforce)
- return;
+ return disjoint;
/* mark the new cred to indicate "matching" includes the label */
new->cr_flags |= CRF_MAC_ENFORCE;
cred = vfs_context_ucred(ctx);
MAC_PERFORM(cred_label_update_execve, cred, new, vp, vp->v_label,
- scriptvnodelabel, execl);
+ scriptvnodelabel, execl, &disjoint);
+
+ return (disjoint);
}
int