From ecc0ceb4089d506a0b8d16686a95817b331af9cb Mon Sep 17 00:00:00 2001 From: Apple Date: Sat, 16 Jul 2016 00:29:29 +0000 Subject: [PATCH] xnu-3248.50.21.tar.gz --- bsd/dev/dtrace/dtrace.c | 84 ++++++++-- bsd/hfs/hfs_catalog.c | 23 ++- bsd/hfs/hfs_vfsutils.c | 2 +- bsd/kern/kern_exit.c | 2 + bsd/kern/kern_fork.c | 31 ++++ bsd/kern/sys_generic.c | 5 + bsd/net/route.h | 4 + bsd/netinet/dhcp_options.c | 249 +++++----------------------- bsd/netinet/dhcp_options.h | 79 +-------- bsd/netinet/flow_divert.c | 17 +- bsd/netinet/mptcp_subr.c | 4 +- bsd/netinet/tcp_cc.c | 11 ++ bsd/netinet/tcp_input.c | 86 +++++++--- bsd/netinet/tcp_ledbat.c | 33 +--- bsd/netinet/tcp_output.c | 7 +- bsd/netinet/tcp_var.h | 9 +- bsd/netkey/key.c | 21 ++- bsd/nfs/nfs_vfsops.c | 6 +- bsd/sys/dtrace_impl.h | 21 ++- config/MasterVersion | 2 +- iokit/IOKit/IOPolledInterface.h | 2 + iokit/Kernel/IOHibernateIO.cpp | 2 +- iokit/Kernel/IOMemoryDescriptor.cpp | 13 +- iokit/Kernel/IOPolledInterface.cpp | 4 + libkern/c++/OSDictionary.cpp | 11 +- libkern/c++/OSSerializeBinary.cpp | 2 +- libkern/libkern/c++/OSDictionary.h | 3 + osfmk/bank/bank.c | 7 +- osfmk/kern/ledger.h | 2 + osfmk/kern/thread.c | 16 ++ osfmk/kern/thread.h | 3 + osfmk/vm/vm_compressor.c | 15 ++ osfmk/vm/vm_map.c | 15 +- 33 files changed, 384 insertions(+), 407 deletions(-) diff --git a/bsd/dev/dtrace/dtrace.c b/bsd/dev/dtrace/dtrace.c index ae38f260a..4a2e5e23a 100644 --- a/bsd/dev/dtrace/dtrace.c +++ b/bsd/dev/dtrace/dtrace.c @@ -147,7 +147,8 @@ int dtrace_destructive_disallow = 0; dtrace_optval_t dtrace_nonroot_maxsize = (16 * 1024 * 1024); size_t dtrace_difo_maxsize = (256 * 1024); dtrace_optval_t dtrace_dof_maxsize = (384 * 1024); -size_t dtrace_global_maxsize = (16 * 1024); +dtrace_optval_t dtrace_statvar_maxsize = (16 * 1024); +dtrace_optval_t dtrace_statvar_maxsize_max = (16 * 10 * 1024); size_t dtrace_actions_max = (16 * 1024); size_t dtrace_retain_max = 1024; dtrace_optval_t dtrace_helper_actions_max = 32; @@ -718,7 +719,7 @@ SYSCTL_PROC(_kern_dtrace, OID_AUTO, dof_maxsize, sysctl_dtrace_dof_maxsize, "Q", "dtrace dof maxsize"); static int -sysctl_dtrace_global_maxsize SYSCTL_HANDLER_ARGS +sysctl_dtrace_statvar_maxsize SYSCTL_HANDLER_ARGS { #pragma unused(oidp, arg2, req) int changed, error; @@ -730,9 +731,11 @@ sysctl_dtrace_global_maxsize SYSCTL_HANDLER_ARGS if (value <= 0) return (ERANGE); + if (value > dtrace_statvar_maxsize_max) + return (ERANGE); lck_mtx_lock(&dtrace_lock); - dtrace_global_maxsize = value; + dtrace_statvar_maxsize = value; lck_mtx_unlock(&dtrace_lock); return (0); @@ -741,14 +744,14 @@ sysctl_dtrace_global_maxsize SYSCTL_HANDLER_ARGS /* * kern.dtrace.global_maxsize * - * Set the global variable max size in bytes, check the definition of - * dtrace_global_maxsize to get the default value. Attempting to set a null or - * negative size will result in a failure. + * Set the variable max size in bytes, check the definition of + * dtrace_statvar_maxsize to get the default value. Attempting to set a null, + * too high or negative size will result in a failure. */ SYSCTL_PROC(_kern_dtrace, OID_AUTO, global_maxsize, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, - &dtrace_global_maxsize, 0, - sysctl_dtrace_global_maxsize, "Q", "dtrace global maxsize"); + &dtrace_statvar_maxsize, 0, + sysctl_dtrace_statvar_maxsize, "Q", "dtrace statvar maxsize"); static int sysctl_dtrace_provide_private_probes SYSCTL_HANDLER_ARGS @@ -882,12 +885,33 @@ dtrace_canstore_statvar(uint64_t addr, size_t sz, { int i; + size_t maxglobalsize, maxlocalsize; + + maxglobalsize = dtrace_statvar_maxsize; + maxlocalsize = (maxglobalsize + sizeof (uint64_t)) * NCPU; + + if (nsvars == 0) + return (0); + for (i = 0; i < nsvars; i++) { dtrace_statvar_t *svar = svars[i]; + uint8_t scope; + size_t size; - if (svar == NULL || svar->dtsv_size == 0) + if (svar == NULL || (size = svar->dtsv_size) == 0) continue; + scope = svar->dtsv_var.dtdv_scope; + + /** + * We verify that our size is valid in the spirit of providing + * defense in depth: we want to prevent attackers from using + * DTrace to escalate an orthogonal kernel heap corruption bug + * into the ability to store to arbitrary locations in memory. + */ + VERIFY((scope == DIFV_SCOPE_GLOBAL && size < maxglobalsize) || + (scope == DIFV_SCOPE_LOCAL && size < maxlocalsize)); + if (DTRACE_INRANGE(addr, sz, svar->dtsv_data, svar->dtsv_size)) return (1); } @@ -3698,7 +3722,8 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, if (!dtrace_destructive_disallow && dtrace_priv_proc_control(state) && - !dtrace_istoxic(kaddr, size)) { + !dtrace_istoxic(kaddr, size) && + dtrace_canload(kaddr, size, mstate, vstate)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); dtrace_copyout(kaddr, uaddr, size, flags); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); @@ -3713,7 +3738,8 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, if (!dtrace_destructive_disallow && dtrace_priv_proc_control(state) && - !dtrace_istoxic(kaddr, size)) { + !dtrace_istoxic(kaddr, size) && + dtrace_strcanload(kaddr, size, mstate, vstate)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); dtrace_copyoutstr(kaddr, uaddr, size, flags); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); @@ -5340,6 +5366,10 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, regs[r2] ? regs[r2] : dtrace_strsize_default) + 1; } else { + if (regs[r2] > LONG_MAX) { + *flags |= CPU_DTRACE_ILLOP; + break; + } tupregs[ttop].dttk_size = regs[r2]; } @@ -8799,9 +8829,10 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, break; } - if (v->dtdv_scope == DIFV_SCOPE_GLOBAL && - vt->dtdt_size > dtrace_global_maxsize) { - err += efunc(i, "oversized by-ref global\n"); + if ((v->dtdv_scope == DIFV_SCOPE_GLOBAL || + v->dtdv_scope == DIFV_SCOPE_LOCAL) && + vt->dtdt_size > dtrace_statvar_maxsize) { + err += efunc(i, "oversized by-ref static\n"); break; } } @@ -9137,6 +9168,9 @@ dtrace_difo_chunksize(dtrace_difo_t *dp, dtrace_vstate_t *vstate) if (srd == 0) return; + if (sval > LONG_MAX) + return; + tupregs[ttop++].dttk_size = sval; } @@ -9198,6 +9232,19 @@ dtrace_difo_chunksize(dtrace_difo_t *dp, dtrace_vstate_t *vstate) */ size = P2ROUNDUP(size, sizeof (uint64_t)); + /* + * Before setting the chunk size, check that we're not going + * to set it to a negative value... + */ + if (size > LONG_MAX) + return; + + /* + * ...and make certain that we didn't badly overflow. + */ + if (size < ksize || size < sizeof (dtrace_dynvar_t)) + return; + if (size > vstate->dtvs_dynvars.dtds_chunksize) vstate->dtvs_dynvars.dtds_chunksize = size; } @@ -12542,6 +12589,8 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) if ((dstate->dtds_chunksize = chunksize) == 0) dstate->dtds_chunksize = DTRACE_DYNVAR_CHUNKSIZE; + VERIFY(dstate->dtds_chunksize < (LONG_MAX - sizeof (dtrace_dynhash_t))); + if (size < (min_size = dstate->dtds_chunksize + sizeof (dtrace_dynhash_t))) size = min_size; @@ -12582,6 +12631,9 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) ((uintptr_t)base + hashsize * sizeof (dtrace_dynhash_t)); limit = (uintptr_t)base + size; + VERIFY((uintptr_t)start < limit); + VERIFY((uintptr_t)start >= (uintptr_t)base); + maxper = (limit - (uintptr_t)start) / (int)NCPU; maxper = (maxper / dstate->dtds_chunksize) * dstate->dtds_chunksize; @@ -12603,7 +12655,7 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) start = (dtrace_dynvar_t *)limit; } - ASSERT(limit <= (uintptr_t)base + size); + VERIFY(limit <= (uintptr_t)base + size); for (;;) { next = (dtrace_dynvar_t *)((uintptr_t)dvar + @@ -12612,6 +12664,8 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) if ((uintptr_t)next + dstate->dtds_chunksize >= limit) break; + VERIFY((uintptr_t)dvar >= (uintptr_t)base && + (uintptr_t)dvar <= (uintptr_t)base + size); dvar->dtdv_next = next; dvar = next; } diff --git a/bsd/hfs/hfs_catalog.c b/bsd/hfs/hfs_catalog.c index 8e0a65c4f..0069ac2d4 100644 --- a/bsd/hfs/hfs_catalog.c +++ b/bsd/hfs/hfs_catalog.c @@ -3057,6 +3057,7 @@ exit: } #define SMALL_DIRENTRY_SIZE (int)(sizeof(struct dirent) - (MAXNAMLEN + 1) + 8) +#define MAX_LINKINFO_ENTRIES 3000 /* * Callback to pack directory entries. @@ -3523,12 +3524,13 @@ cat_getdirentries(struct hfsmount *hfsmp, u_int32_t entrycnt, directoryhint_t *d struct packdirentry_state state; void * buffer; int bufsize; + int maxlinks; int result; int index; int have_key; int extended; - + extended = flags & VNODE_READDIR_EXTENDED; if (extended && (hfsmp->hfs_flags & HFS_STANDARD)) { @@ -3537,10 +3539,23 @@ cat_getdirentries(struct hfsmount *hfsmp, u_int32_t entrycnt, directoryhint_t *d fcb = hfsmp->hfs_catalog_cp->c_datafork; /* - * Get a buffer for link info array, btree iterator and a direntry: + * Get a buffer for link info array, btree iterator and a direntry. + * + * We impose an cap of 3000 link entries when trying to compute + * the total number of hardlink entries that we'll allow in the + * linkinfo array. + * + * Note that in the case where there are very few hardlinks, + * this does not restrict or prevent us from vending out as many entries + * as we can to the uio_resid, because the getdirentries callback + * uiomoves the directory entries to the uio itself and does not use + * this MALLOC'd array. It also limits itself to maxlinks of hardlinks. */ - maxlinks = MIN(entrycnt, (u_int32_t)(uio_resid(uio) / SMALL_DIRENTRY_SIZE)); - bufsize = MAXPATHLEN + (maxlinks * sizeof(linkinfo_t)) + sizeof(*iterator); + + /* Now compute the maximum link array size */ + maxlinks = MIN (entrycnt, MAX_LINKINFO_ENTRIES); + + bufsize = MAXPATHLEN + (maxlinks * sizeof(linkinfo_t)) + sizeof(*iterator); if (extended) { bufsize += 2*sizeof(struct direntry); } diff --git a/bsd/hfs/hfs_vfsutils.c b/bsd/hfs/hfs_vfsutils.c index 1015fbd91..ade6d0ca0 100644 --- a/bsd/hfs/hfs_vfsutils.c +++ b/bsd/hfs/hfs_vfsutils.c @@ -3844,7 +3844,7 @@ hfs_generate_document_id(struct hfsmount *hfsmp, uint32_t *docid) struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((void *)((char *)&cp->c_attr.ca_finderinfo + 16)); int lockflags; - if (hfs_start_transaction(hfsmp) != 0) { + if ((error = hfs_start_transaction(hfsmp)) != 0) { return error; } lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); diff --git a/bsd/kern/kern_exit.c b/bsd/kern/kern_exit.c index d99430bd0..f9739accd 100644 --- a/bsd/kern/kern_exit.c +++ b/bsd/kern/kern_exit.c @@ -1372,6 +1372,7 @@ reap_child_locked(proc_t parent, proc_t child, int deadparent, int reparentedtoi #if CONFIG_FINE_LOCK_GROUPS lck_mtx_destroy(&child->p_mlock, proc_mlock_grp); lck_mtx_destroy(&child->p_fdmlock, proc_fdmlock_grp); + lck_mtx_destroy(&child->p_ucred_mlock, proc_ucred_mlock_grp); #if CONFIG_DTRACE lck_mtx_destroy(&child->p_dtrace_sprlock, proc_lck_grp); #endif @@ -1379,6 +1380,7 @@ reap_child_locked(proc_t parent, proc_t child, int deadparent, int reparentedtoi #else /* CONFIG_FINE_LOCK_GROUPS */ lck_mtx_destroy(&child->p_mlock, proc_lck_grp); lck_mtx_destroy(&child->p_fdmlock, proc_lck_grp); + lck_mtx_destroy(&child->p_ucred_mlock, proc_lck_grp); #if CONFIG_DTRACE lck_mtx_destroy(&child->p_dtrace_sprlock, proc_lck_grp); #endif diff --git a/bsd/kern/kern_fork.c b/bsd/kern/kern_fork.c index e23d52ead..284752296 100644 --- a/bsd/kern/kern_fork.c +++ b/bsd/kern/kern_fork.c @@ -1047,6 +1047,15 @@ bad: void forkproc_free(proc_t p) { +#if CONFIG_PERSONAS + persona_proc_drop(p); +#endif /* CONFIG_PERSONAS */ + +#if PSYNCH + pth_proc_hashdelete(p); +#endif /* PSYNCH */ + + workqueue_destroy_lock(p); /* We held signal and a transition locks; drop them */ proc_signalend(p, 0); @@ -1091,12 +1100,34 @@ forkproc_free(proc_t p) /* Update the audit session proc count */ AUDIT_SESSION_PROCEXIT(p); +#if CONFIG_FINE_LOCK_GROUPS + lck_mtx_destroy(&p->p_mlock, proc_mlock_grp); + lck_mtx_destroy(&p->p_fdmlock, proc_fdmlock_grp); + lck_mtx_destroy(&p->p_ucred_mlock, proc_ucred_mlock_grp); +#if CONFIG_DTRACE + lck_mtx_destroy(&p->p_dtrace_sprlock, proc_lck_grp); +#endif + lck_spin_destroy(&p->p_slock, proc_slock_grp); +#else /* CONFIG_FINE_LOCK_GROUPS */ + lck_mtx_destroy(&p->p_mlock, proc_lck_grp); + lck_mtx_destroy(&p->p_fdmlock, proc_lck_grp); + lck_mtx_destroy(&p->p_ucred_mlock, proc_lck_grp); +#if CONFIG_DTRACE + lck_mtx_destroy(&p->p_dtrace_sprlock, proc_lck_grp); +#endif + lck_spin_destroy(&p->p_slock, proc_lck_grp); +#endif /* CONFIG_FINE_LOCK_GROUPS */ + /* Release the credential reference */ kauth_cred_unref(&p->p_ucred); proc_list_lock(); /* Decrement the count of processes in the system */ nprocs--; + + /* Take it out of process hash */ + LIST_REMOVE(p, p_hash); + proc_list_unlock(); thread_call_free(p->p_rcall); diff --git a/bsd/kern/sys_generic.c b/bsd/kern/sys_generic.c index 1fb49eb91..41e3d8b69 100644 --- a/bsd/kern/sys_generic.c +++ b/bsd/kern/sys_generic.c @@ -3163,6 +3163,9 @@ ledger(struct proc *p, struct ledger_args *args, __unused int32_t *retval) else if (args->cmd == LEDGER_LIMIT) error = copyin(args->arg2, (char *)&lla, sizeof (lla)); #endif + else if ((args->cmd < 0) || (args->cmd > LEDGER_MAX_CMD)) + return (EINVAL); + if (error) return (error); if (len < 0) @@ -3239,6 +3242,8 @@ ledger(struct proc *p, struct ledger_args *args, __unused int32_t *retval) } default: + panic("ledger syscall logic error -- command type %d", args->cmd); + proc_rele(proc); rval = EINVAL; } diff --git a/bsd/net/route.h b/bsd/net/route.h index 8ddef833d..3ee46dfe0 100644 --- a/bsd/net/route.h +++ b/bsd/net/route.h @@ -168,6 +168,7 @@ struct route { * gateway rather than the ultimate destination. */ +#define NRTT_HIST 10 /* * Kernel routing entry structure. */ @@ -205,6 +206,9 @@ struct rtentry { uint64_t rt_expire; /* expiration time in uptime seconds */ uint64_t base_calendartime; /* calendar time upon entry creation */ uint64_t base_uptime; /* uptime upon entry creation */ + u_int32_t rtt_hist[NRTT_HIST]; /* RTT history sample by TCP connections */ + u_int32_t rtt_expire_ts; /* RTT history expire timestamp */ + u_int8_t rtt_index; /* Index into RTT history */ }; /* diff --git a/bsd/netinet/dhcp_options.c b/bsd/netinet/dhcp_options.c index 29835c7b9..db54e59d5 100644 --- a/bsd/netinet/dhcp_options.c +++ b/bsd/netinet/dhcp_options.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002-2007 Apple Inc. All rights reserved. + * Copyright (c) 2002-2016 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -45,11 +45,12 @@ #include #include #include -#include - #include #include +#ifndef TEST_DHCP_OPTIONS +#include + #ifdef DHCP_DEBUG #define dprintf(x) printf x; #else /* !DHCP_DEBUG */ @@ -80,6 +81,19 @@ my_realloc(void * oldptr, int oldsize, int newsize) my_free(oldptr); return (data); } +#else +/* + * To build: + * xcrun -sdk macosx.internal cc -DTEST_DHCP_OPTIONS -o /tmp/dhcp_options dhcp_options.c -I .. + */ +#include +#include +#include +#define my_free free +#define my_malloc malloc +#define my_realloc(ptr, old_size, new_size) realloc(ptr, new_size) +#define dprintf(x) printf x; +#endif /* * Functions: ptrlist_* @@ -244,7 +258,8 @@ dhcpol_parse_buffer(dhcpol_t * list, const void * buffer, int length) len = length; tag = dhcptag_pad_e; - for (scan = (const uint8_t *)buffer; tag != dhcptag_end_e && len > 0; ) { + for (scan = (const uint8_t *)buffer; + tag != dhcptag_end_e && len > DHCP_TAG_OFFSET; ) { tag = scan[DHCP_TAG_OFFSET]; @@ -259,19 +274,24 @@ dhcpol_parse_buffer(dhcpol_t * list, const void * buffer, int length) scan++; len--; break; - default: { - uint8_t option_len = scan[DHCP_LEN_OFFSET]; - - dhcpol_add(list, scan); - len -= (option_len + 2); - scan += (option_len + 2); + default: + if (len > DHCP_LEN_OFFSET) { + uint8_t option_len; + + option_len = scan[DHCP_LEN_OFFSET]; + dhcpol_add(list, scan); + len -= (option_len + DHCP_OPTION_OFFSET); + scan += (option_len + DHCP_OPTION_OFFSET); + } + else { + len = -1; + } break; - } } } if (len < 0) { /* ran off the end */ - dprintf(("dhcp_options: parse failed near tag %d", tag)); + dprintf(("dhcp_options: parse failed near tag %d\n", tag)); dhcpol_free(list); return (FALSE); } @@ -315,50 +335,6 @@ dhcpol_find(dhcpol_t * list, int tag, int * len_p, int * start) return (NULL); } -#if 0 -/* - * Function: dhcpol_get - * - * Purpose: - * Accumulate all occurences of the given option into a - * malloc'd buffer, and return its length. Used to get - * all occurrences of a particular option in a single - * data area. - * Note: - * Use _FREE(val, M_TEMP) to free the returned data area. - */ -void * -dhcpol_get(dhcpol_t * list, int tag, int * len_p) -{ - int i; - char * data = NULL; - int data_len = 0; - - if (tag == dhcptag_end_e || tag == dhcptag_pad_e) - return (NULL); - - for (i = 0; i < dhcpol_count(list); i++) { - const uint8_t * option = dhcpol_element(list, i); - - if (option[DHCP_TAG_OFFSET] == tag) { - int len = option[DHCP_LEN_OFFSET]; - - if (data_len == 0) { - data = my_malloc(len); - } - else { - data = my_realloc(data, data_len, data_len + len); - } - FIX ME: test data NULL - bcopy(option + DHCP_OPTION_OFFSET, data + data_len, len); - data_len += len; - } - } - *len_p = data_len; - return (data); -} -#endif - /* * Function: dhcpol_parse_packet * @@ -420,163 +396,17 @@ dhcpol_parse_packet(dhcpol_t * options, const struct dhcp * pkt, int len) return (TRUE); } -/* - * Module: dhcpoa - * - * Purpose: - * Types and functions to create new dhcp option areas. - */ - -/* - * Function: dhcpoa_{init_common, init_no_end, init} - * - * Purpose: - * Initialize an option area structure so that it can be used - * in calling the dhcpoa_* routines. - */ -static void -dhcpoa_init_common(dhcpoa_t * oa_p, void * buffer, int size, int reserve) -{ - bzero(oa_p, sizeof(*oa_p)); - oa_p->oa_buffer = buffer; - oa_p->oa_size = size; - oa_p->oa_reserve = reserve; -} - -void -dhcpoa_init_no_end(dhcpoa_t * oa_p, void * buffer, int size) -{ - dhcpoa_init_common(oa_p, buffer, size, 0); - return; -} - -int -dhcpoa_size(dhcpoa_t * oa_p) -{ - return (oa_p->oa_size); -} - -void -dhcpoa_init(dhcpoa_t * oa_p, void * buffer, int size) -{ - /* initialize the area, reserve space for the end tag */ - dhcpoa_init_common(oa_p, buffer, size, 1); - return; -} -/* - * Function: dhcpoa_add - * - * Purpose: - * Add an option to the option area. - */ -dhcpoa_ret_t -dhcpoa_add(dhcpoa_t * oa_p, dhcptag_t tag, int len, const void * option) -{ - if (len > DHCP_OPTION_SIZE_MAX) { - dprintf(("tag %d option %d > %d\n", tag, len, DHCP_OPTION_SIZE_MAX)); - return (dhcpoa_failed_e); - } - - if (oa_p->oa_end_tag) { - dprintf(("attempt to add data after end tag\n")); - return (dhcpoa_failed_e); - } - - switch (tag) { - case dhcptag_end_e: - if ((oa_p->oa_offset + 1) > oa_p->oa_size) { - /* this can't happen since we're careful to leave space */ - dprintf(("can't add end tag %d > %d\n", - oa_p->oa_offset + oa_p->oa_reserve, oa_p->oa_size)); - return (dhcpoa_failed_e); - } - ((uint8_t *)oa_p->oa_buffer)[oa_p->oa_offset + DHCP_TAG_OFFSET] = tag; - oa_p->oa_offset++; - oa_p->oa_end_tag = 1; - break; - - case dhcptag_pad_e: - /* 1 for pad tag */ - if ((oa_p->oa_offset + oa_p->oa_reserve + 1) > oa_p->oa_size) { - dprintf(("can't add pad tag %d > %d\n", - oa_p->oa_offset + oa_p->oa_reserve + 1, oa_p->oa_size)); - return (dhcpoa_full_e); - } - ((uint8_t *)oa_p->oa_buffer)[oa_p->oa_offset + DHCP_TAG_OFFSET] = tag; - oa_p->oa_offset++; - break; - - default: - /* 2 for tag/len */ - if ((oa_p->oa_offset + len + 2 + oa_p->oa_reserve) > oa_p->oa_size) { - dprintf(("can't add tag %d (%d > %d)\n", tag, - oa_p->oa_offset + len + 2 + oa_p->oa_reserve, - oa_p->oa_size)); - return (dhcpoa_full_e); - } - ((uint8_t *)oa_p->oa_buffer)[oa_p->oa_offset + DHCP_TAG_OFFSET] = tag; - ((uint8_t *)oa_p->oa_buffer)[oa_p->oa_offset + DHCP_LEN_OFFSET] = (uint8_t)len; - if (len) { - memcpy(oa_p->oa_buffer + (DHCP_OPTION_OFFSET + oa_p->oa_offset), - option, len); - } - oa_p->oa_offset += len + DHCP_OPTION_OFFSET; - break; - } - oa_p->oa_option_count++; - return (dhcpoa_success_e); -} - -/* - * Function: dhcpoa_add_dhcpmsg - * - * Purpose: - * Add a dhcp message option to the option area. - */ -dhcpoa_ret_t -dhcpoa_add_dhcpmsg(dhcpoa_t * oa_p, dhcp_msgtype_t msgtype) -{ - return (dhcpoa_add(oa_p, dhcptag_dhcp_message_type_e, - sizeof(msgtype), &msgtype)); -} - -int -dhcpoa_used(dhcpoa_t * oa_p) -{ - return (oa_p->oa_offset); -} - -int -dhcpoa_freespace(dhcpoa_t * oa_p) -{ - int freespace; - - freespace = oa_p->oa_size - oa_p->oa_offset - oa_p->oa_reserve; - if (freespace < 0) { - freespace = 0; - } - return (freespace); -} - -int -dhcpoa_count(dhcpoa_t * oa_p) -{ - return (oa_p->oa_option_count); -} - -void * -dhcpoa_buffer(dhcpoa_t * oa_p) -{ - return (oa_p->oa_buffer); -} - - #ifdef TEST_DHCP_OPTIONS char test_empty[] = { 99, 130, 83, 99, 255, }; +char test_short[] = { + 99, 130, 83, 99, + 1, +}; + char test_simple[] = { 99, 130, 83, 99, 1, 4, 255, 255, 252, 0, @@ -604,7 +434,7 @@ char test_no_end[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; -char test_too_short[] = { +char test_no_magic[] = { 0x1 }; struct test { @@ -619,7 +449,8 @@ struct test tests[] = { { "simple", test_simple, sizeof(test_simple), TRUE }, { "vendor", test_vendor, sizeof(test_vendor), TRUE }, { "no_end", test_no_end, sizeof(test_no_end), TRUE }, - { "too_short", test_too_short, sizeof(test_too_short), FALSE }, + { "no magic", test_no_magic, sizeof(test_no_magic), FALSE }, + { "short", test_short, sizeof(test_short), FALSE }, { NULL, NULL, 0, FALSE }, }; diff --git a/bsd/netinet/dhcp_options.h b/bsd/netinet/dhcp_options.h index 14e7eda8a..6986a9ce6 100644 --- a/bsd/netinet/dhcp_options.h +++ b/bsd/netinet/dhcp_options.h @@ -3,7 +3,7 @@ #define _NETINET_DHCP_OPTIONS_H #include /* - * Copyright (c) 1999-2007 Apple Inc. All rights reserved. + * Copyright (c) 1999-2016 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -194,85 +194,8 @@ boolean_t dhcpol_parse_buffer(dhcpol_t * list, int length); const void * dhcpol_find(dhcpol_t * list, int tag, int * len_p, int * start); -#if 0 -void * dhcpol_get(dhcpol_t * list, int tag, int * len_p); -#endif boolean_t dhcpol_parse_packet(dhcpol_t * options, const struct dhcp * pkt, int len); -void dhcpol_print(dhcpol_t * list); -/* - * Module: dhcpoa (dhcp options area) - * - * Purpose: - * Types and functions to create new dhcp option areas. - */ - -/* - * Struct: dhcpoa_s - * Purpose: - * To record information about a dhcp option data area. - */ -struct dhcpoa_s { - uint8_t * oa_buffer; /* data area to hold options */ - int oa_size; /* size of buffer */ - int oa_offset; /* offset of next option to write */ - int oa_end_tag; /* to mark when options are terminated */ - int oa_option_count;/* number of options present */ - int oa_reserve; /* space to reserve, either 0 or 1 */ -}; - -/* - * Type: dhcpoa_t - * - * Purpose: - * To record information about a dhcp option data area. - */ -typedef struct dhcpoa_s dhcpoa_t; - -/* - * Type:dhcpoa_ret_t - * - * Purpose: - * outine return codes - */ -typedef enum { - dhcpoa_success_e = 0, - dhcpoa_failed_e, - dhcpoa_full_e, -} dhcpoa_ret_t; - -void -dhcpoa_init(dhcpoa_t * opt, void * buffer, int size); - -void -dhcpoa_init_no_end(dhcpoa_t * opt, void * buffer, int size); - -dhcpoa_ret_t -dhcpoa_add(dhcpoa_t * oa_p, dhcptag_t tag, int len, const void * data); - -dhcpoa_ret_t -dhcpoa_add_dhcpmsg(dhcpoa_t * oa_p, dhcp_msgtype_t msgtype); - -#if 0 -dhcpoa_ret_t -dhcpoa_vendor_add(dhcpoa_t * oa_p, dhcpoa_t * vendor_oa_p, - dhcptag_t tag, int len, void * option); -#endif - -int -dhcpoa_used(dhcpoa_t * oa_p); - -int -dhcpoa_count(dhcpoa_t * oa_p); - -void * -dhcpoa_buffer(dhcpoa_t * oa_p); - -int -dhcpoa_freespace(dhcpoa_t * oa_p); - -int -dhcpoa_size(dhcpoa_t * oa_p); #endif /* BSD_KERNEL_PRIVATE */ #endif /* _NETINET_DHCP_OPTIONS_H */ diff --git a/bsd/netinet/flow_divert.c b/bsd/netinet/flow_divert.c index cc2e2c8fe..d9f02927f 100644 --- a/bsd/netinet/flow_divert.c +++ b/bsd/netinet/flow_divert.c @@ -103,6 +103,7 @@ #define GROUP_COUNT_MAX 32 #define FLOW_DIVERT_MAX_NAME_SIZE 4096 #define FLOW_DIVERT_MAX_KEY_SIZE 1024 +#define FLOW_DIVERT_MAX_TRIE_MEMORY (1024 * 1024) #define DNS_SERVICE_GROUP_UNIT (GROUP_COUNT_MAX + 1) @@ -2178,6 +2179,7 @@ flow_divert_handle_app_map_create(mbuf_t packet, int offset) size_t nodes_mem_size; int prefix_count = 0; int signing_id_count = 0; + size_t trie_memory_size = 0; lck_rw_lock_exclusive(&g_flow_divert_group_lck); @@ -2193,6 +2195,11 @@ flow_divert_handle_app_map_create(mbuf_t packet, int offset) /* Get the number of shared prefixes in the new set of signing ID strings */ flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_PREFIX_COUNT, sizeof(prefix_count), &prefix_count, NULL); + if (prefix_count < 0) { + lck_rw_done(&g_flow_divert_group_lck); + return; + } + /* Compute the number of signing IDs and the total amount of bytes needed to store them */ for (cursor = flow_divert_packet_find_tlv(packet, offset, FLOW_DIVERT_TLV_SIGNING_ID, &error, 0); cursor >= 0; @@ -2219,10 +2226,18 @@ flow_divert_handle_app_map_create(mbuf_t packet, int offset) child_maps_mem_size = (sizeof(*new_trie.child_maps) * CHILD_MAP_SIZE * new_trie.child_maps_count); bytes_mem_size = (sizeof(*new_trie.bytes) * new_trie.bytes_count); - MALLOC(new_trie.memory, void *, nodes_mem_size + child_maps_mem_size + bytes_mem_size, M_TEMP, M_WAITOK); + trie_memory_size = nodes_mem_size + child_maps_mem_size + bytes_mem_size; + if (trie_memory_size > FLOW_DIVERT_MAX_TRIE_MEMORY) { + FDLOG(LOG_ERR, &nil_pcb, "Trie memory size (%u) is too big (maximum is %u)", trie_memory_size, FLOW_DIVERT_MAX_TRIE_MEMORY); + lck_rw_done(&g_flow_divert_group_lck); + return; + } + + MALLOC(new_trie.memory, void *, trie_memory_size, M_TEMP, M_WAITOK); if (new_trie.memory == NULL) { FDLOG(LOG_ERR, &nil_pcb, "Failed to allocate %lu bytes of memory for the signing ID trie", nodes_mem_size + child_maps_mem_size + bytes_mem_size); + lck_rw_done(&g_flow_divert_group_lck); return; } diff --git a/bsd/netinet/mptcp_subr.c b/bsd/netinet/mptcp_subr.c index d9a35da30..ac55c8a2d 100644 --- a/bsd/netinet/mptcp_subr.c +++ b/bsd/netinet/mptcp_subr.c @@ -1963,13 +1963,11 @@ mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts) MPT_UNLOCK(mp_tp); mpt_mbuf = sb_mb; - mpt_dsn = mpt_mbuf->m_pkthdr.mp_dsn; while (mpt_mbuf && ((mpt_mbuf->m_pkthdr.mp_rlen == 0) || (mpt_mbuf->m_pkthdr.mp_rlen <= (u_int32_t)off))) { off -= mpt_mbuf->m_pkthdr.mp_rlen; mpt_mbuf = mpt_mbuf->m_next; - mpt_dsn = mpt_mbuf->m_pkthdr.mp_dsn; } if (mpts->mpts_flags & MPTSF_MP_DEGRADED) mptcplog((LOG_DEBUG, "MPTCP Sender: %s cid = %d " @@ -1979,7 +1977,7 @@ mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts) mpts->mpts_probecnt), MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG); - VERIFY(mpt_mbuf && (mpt_mbuf->m_pkthdr.pkt_flags & PKTF_MPTCP)); + VERIFY((mpt_mbuf == NULL) || (mpt_mbuf->m_pkthdr.pkt_flags & PKTF_MPTCP)); head = tail = NULL; diff --git a/bsd/netinet/tcp_cc.c b/bsd/netinet/tcp_cc.c index fdb4f8fbf..ade6b7d03 100644 --- a/bsd/netinet/tcp_cc.c +++ b/bsd/netinet/tcp_cc.c @@ -59,6 +59,9 @@ struct tcp_cc_debug_state { uint32_t ccd_sndcc; uint32_t ccd_sndhiwat; uint32_t ccd_bytes_acked; + u_int8_t ccd_cc_index; + u_int8_t ccd_unused_1__; + u_int16_t ccd_unused_2__; union { struct { uint32_t ccd_last_max; @@ -67,6 +70,9 @@ struct tcp_cc_debug_state { uint32_t ccd_avg_lastmax; uint32_t ccd_mean_deviation; } cubic_state; + struct { + u_int32_t led_base_rtt; + } ledbat_state; } u; }; @@ -240,6 +246,7 @@ tcp_ccdbg_trace(struct tcpcb *tp, struct tcphdr *th, int32_t event) dbg_state.ccd_sndcc = inp->inp_socket->so_snd.sb_cc; dbg_state.ccd_sndhiwat = inp->inp_socket->so_snd.sb_hiwat; dbg_state.ccd_bytes_acked = tp->t_bytes_acked; + dbg_state.ccd_cc_index = tp->tcp_cc_index; switch (tp->tcp_cc_index) { case TCP_CC_ALGO_CUBIC_INDEX: dbg_state.u.cubic_state.ccd_last_max = @@ -253,6 +260,10 @@ tcp_ccdbg_trace(struct tcpcb *tp, struct tcphdr *th, int32_t event) dbg_state.u.cubic_state.ccd_mean_deviation = tp->t_ccstate->cub_mean_dev; break; + case TCP_CC_ALGO_BACKGROUND_INDEX: + dbg_state.u.ledbat_state.led_base_rtt = + get_base_rtt(tp); + break; default: break; } diff --git a/bsd/netinet/tcp_input.c b/bsd/netinet/tcp_input.c index 38e988614..24450b276 100644 --- a/bsd/netinet/tcp_input.c +++ b/bsd/netinet/tcp_input.c @@ -152,6 +152,9 @@ struct tcphdr tcp_savetcp; #define DBG_FNC_TCP_INPUT NETDBG_CODE(DBG_NETTCP, (3 << 8)) #define DBG_FNC_TCP_NEWCONN NETDBG_CODE(DBG_NETTCP, (7 << 8)) +#define TCP_RTT_HISTORY_EXPIRE_TIME (60 * TCP_RETRANSHZ) +#define TCP_RECV_THROTTLE_WIN (5 * TCP_RETRANSHZ) + tcp_cc tcp_ccgen; struct tcpstat tcpstat; @@ -5161,12 +5164,17 @@ uint32_t get_base_rtt(struct tcpcb *tp) { uint32_t base_rtt = 0, i; - for (i = 0; i < N_RTT_BASE; ++i) { - if (tp->rtt_hist[i] != 0 && - (base_rtt == 0 || tp->rtt_hist[i] < base_rtt)) - base_rtt = tp->rtt_hist[i]; + struct rtentry *rt = tp->t_inpcb->inp_route.ro_rt; + + if (rt != NULL) { + for (i = 0; i < NRTT_HIST; ++i) { + if (rt->rtt_hist[i] != 0 && + (base_rtt == 0 || rt->rtt_hist[i] < base_rtt)) + base_rtt = rt->rtt_hist[i]; + } } - return base_rtt; + + return (base_rtt); } /* Each value of RTT base represents the minimum RTT seen in a minute. @@ -5175,31 +5183,59 @@ get_base_rtt(struct tcpcb *tp) void update_base_rtt(struct tcpcb *tp, uint32_t rtt) { - int32_t i, qdelay; u_int32_t base_rtt; + struct rtentry *rt; - if (++tp->rtt_count >= rtt_samples_per_slot) { + if ((rt = tp->t_inpcb->inp_route.ro_rt) == NULL) + return; + if (rt->rtt_expire_ts == 0) { + RT_LOCK_SPIN(rt); + /* check again to avoid any race */ + if (rt->rtt_expire_ts != 0) { + RT_UNLOCK(rt); + goto update; + } + rt->rtt_expire_ts = tcp_now; + rt->rtt_index = 0; + rt->rtt_hist[0] = rtt; + RT_UNLOCK(rt); + return; + } +update: #if TRAFFIC_MGT - /* - * If the recv side is being throttled, check if the - * current RTT is closer to the base RTT seen in - * first (recent) two slots. If so, unthrottle the stream. - */ - if (tp->t_flagsext & TF_RECV_THROTTLE) { - base_rtt = min(tp->rtt_hist[0], tp->rtt_hist[1]); - qdelay = tp->t_rttcur - base_rtt; - if (qdelay < target_qdelay) - tp->t_flagsext &= ~(TF_RECV_THROTTLE); + /* + * If the recv side is being throttled, check if the + * current RTT is closer to the base RTT seen in + * first (recent) two slots. If so, unthrottle the stream. + */ + if ((tp->t_flagsext & TF_RECV_THROTTLE) && + (int)(tcp_now - tp->t_recv_throttle_ts) >= TCP_RECV_THROTTLE_WIN) { + base_rtt = get_base_rtt(tp); + if (tp->t_rttcur <= (base_rtt + target_qdelay)) { + tp->t_flagsext &= ~TF_RECV_THROTTLE; + tp->t_recv_throttle_ts = 0; } + } #endif /* TRAFFIC_MGT */ - - for (i = (N_RTT_BASE-1); i > 0; --i) { - tp->rtt_hist[i] = tp->rtt_hist[i-1]; + if ((int)(tcp_now - rt->rtt_expire_ts) >= + TCP_RTT_HISTORY_EXPIRE_TIME) { + RT_LOCK_SPIN(rt); + /* check the condition again to avoid race */ + if ((int)(tcp_now - rt->rtt_expire_ts) >= + TCP_RTT_HISTORY_EXPIRE_TIME) { + rt->rtt_index++; + if (rt->rtt_index >= NRTT_HIST) + rt->rtt_index = 0; + rt->rtt_hist[rt->rtt_index] = rtt; + rt->rtt_expire_ts = tcp_now; + } else { + rt->rtt_hist[rt->rtt_index] = + min(rt->rtt_hist[rt->rtt_index], rtt); } - tp->rtt_hist[0] = rtt; - tp->rtt_count = 0; + RT_UNLOCK(rt); } else { - tp->rtt_hist[0] = min(tp->rtt_hist[0], rtt); + rt->rtt_hist[rt->rtt_index] = + min(rt->rtt_hist[rt->rtt_index], rtt); } } @@ -5444,9 +5480,9 @@ tcp_mss(tp, offer, input_ifscope) int offer; unsigned int input_ifscope; { - register struct rtentry *rt; + struct rtentry *rt; struct ifnet *ifp; - register int rtt, mss; + int rtt, mss; u_int32_t bufsize; struct inpcb *inp; struct socket *so; diff --git a/bsd/netinet/tcp_ledbat.c b/bsd/netinet/tcp_ledbat.c index aa2d32dbc..9e18bf9eb 100644 --- a/bsd/netinet/tcp_ledbat.c +++ b/bsd/netinet/tcp_ledbat.c @@ -134,8 +134,7 @@ extern int rtt_samples_per_slot; static void update_cwnd(struct tcpcb *tp, uint32_t incr) { uint32_t max_allowed_cwnd = 0, flight_size = 0; - uint32_t qdelay, base_rtt; - int32_t off_target; + uint32_t base_rtt; base_rtt = get_base_rtt(tp); @@ -146,12 +145,10 @@ static void update_cwnd(struct tcpcb *tp, uint32_t incr) { tp->snd_cwnd += incr; goto check_max; } - - qdelay = tp->t_rttcur - base_rtt; - off_target = (int32_t)(target_qdelay - qdelay); - if (off_target >= 0) { - /* Delay decreased or remained the same, we can increase + if (tp->t_rttcur <= (base_rtt + target_qdelay)) { + /* + * Delay decreased or remained the same, we can increase * the congestion window according to RFC 3465. * * Move background slow-start threshold to current @@ -333,28 +330,6 @@ tcp_ledbat_post_fr(struct tcpcb *tp, struct tcphdr *th) { */ void tcp_ledbat_after_idle(struct tcpcb *tp) { - int32_t n = N_RTT_BASE, i = (N_RTT_BASE - 1); - - /* Decide how many base history entries have to be cleared - * based on how long the connection has been idle. - */ - - if (tp->t_rttcur > 0) { - int32_t nrtt, idle_time; - - idle_time = tcp_now - tp->t_rcvtime; - nrtt = idle_time / tp->t_rttcur; - n = nrtt / rtt_samples_per_slot; - if (n > N_RTT_BASE) - n = N_RTT_BASE; - } - for (i = (N_RTT_BASE - 1); n > 0; --i, --n) { - tp->rtt_hist[i] = 0; - } - for (n = (N_RTT_BASE - 1); i >= 0; --i, --n) { - tp->rtt_hist[n] = tp->rtt_hist[i]; - tp->rtt_hist[i] = 0; - } /* Reset the congestion window */ tp->snd_cwnd = tp->t_maxseg * bg_ss_fltsz; diff --git a/bsd/netinet/tcp_output.c b/bsd/netinet/tcp_output.c index 4dfd0bc8f..2dda8cb0f 100644 --- a/bsd/netinet/tcp_output.c +++ b/bsd/netinet/tcp_output.c @@ -3043,7 +3043,6 @@ static int tcp_recv_throttle (struct tcpcb *tp) { uint32_t base_rtt, newsize; - int32_t qdelay; struct sockbuf *sbrcv = &tp->t_inpcb->inp_socket->so_rcv; if (tcp_use_rtt_recvbg == 1 && @@ -3063,14 +3062,14 @@ tcp_recv_throttle (struct tcpcb *tp) base_rtt = get_base_rtt(tp); if (base_rtt != 0 && tp->t_rttcur != 0) { - qdelay = tp->t_rttcur - base_rtt; /* * if latency increased on a background flow, * return 1 to start throttling. */ - if (qdelay > target_qdelay) { + if (tp->t_rttcur > (base_rtt + target_qdelay)) { tp->t_flagsext |= TF_RECV_THROTTLE; - + if (tp->t_recv_throttle_ts == 0) + tp->t_recv_throttle_ts = tcp_now; /* * Reduce the recv socket buffer size to * minimize latecy. diff --git a/bsd/netinet/tcp_var.h b/bsd/netinet/tcp_var.h index 510d1cd84..7cce3784f 100644 --- a/bsd/netinet/tcp_var.h +++ b/bsd/netinet/tcp_var.h @@ -93,12 +93,6 @@ struct name { \ #define N_TIME_WAIT_SLOTS 128 /* must be power of 2 */ -/* Base RTT is stored for N_MIN_RTT_HISTORY slots. This is used to - * estimate expected minimum RTT for delay based congestion control - * algorithms. - */ -#define N_RTT_BASE 5 - /* Always allow at least 4 packets worth of recv window when adjusting * recv window using inter-packet arrival jitter. */ @@ -451,8 +445,6 @@ struct tcpcb { } t_stat; /* Background congestion related state */ - uint32_t rtt_hist[N_RTT_BASE]; /* history of minimum RTT */ - uint32_t rtt_count; /* Number of RTT samples in recent base history */ uint32_t bg_ssthresh; /* Slow start threshold until delay increases */ uint32_t t_flagsext; /* Another field to accommodate more flags */ #define TF_RXTFINDROP 0x1 /* Drop conn after retransmitting FIN 3 times */ @@ -605,6 +597,7 @@ struct tcpcb { u_int32_t t_reordered_pkts; /* packets reorderd */ u_int32_t t_dsack_sent; /* Sent DSACK notification */ u_int32_t t_dsack_recvd; /* Received a valid DSACK option */ + u_int32_t t_recv_throttle_ts; }; #define IN_FASTRECOVERY(tp) (tp->t_flags & TF_FASTRECOVERY) diff --git a/bsd/netkey/key.c b/bsd/netkey/key.c index 2d91aa70b..7a0c50b89 100644 --- a/bsd/netkey/key.c +++ b/bsd/netkey/key.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2014 Apple Inc. All rights reserved. + * Copyright (c) 2008-2016 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -588,6 +588,7 @@ static int key_setsaval2(struct secasvar *sav, u_int32_t pid, struct sadb_lifetime *lifetime_hard, struct sadb_lifetime *lifetime_soft); +static void bzero_keys(const struct sadb_msghdr *); extern int ipsec_bypass; extern int esp_udp_encap_port; @@ -7192,6 +7193,7 @@ key_add( /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "key_add: invalid satype is passed.\n")); + bzero_keys(mhp); return key_senderror(so, m, EINVAL); } @@ -7207,6 +7209,7 @@ key_add( (mhp->ext[SADB_EXT_LIFETIME_HARD] == NULL && mhp->ext[SADB_EXT_LIFETIME_SOFT] != NULL)) { ipseclog((LOG_DEBUG, "key_add: invalid message is passed.\n")); + bzero_keys(mhp); return key_senderror(so, m, EINVAL); } if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa) || @@ -7214,6 +7217,7 @@ key_add( mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) { /* XXX need more */ ipseclog((LOG_DEBUG, "key_add: invalid message is passed.\n")); + bzero_keys(mhp); return key_senderror(so, m, EINVAL); } if (mhp->ext[SADB_X_EXT_SA2] != NULL) { @@ -7242,6 +7246,7 @@ key_add( if ((newsah = key_newsah(&saidx, ipsec_if, key_get_outgoing_ifindex_from_message(mhp, SADB_X_EXT_IPSECIF), IPSEC_DIR_OUTBOUND)) == NULL) { lck_mtx_unlock(sadb_mutex); ipseclog((LOG_DEBUG, "key_add: No more memory.\n")); + bzero_keys(mhp); return key_senderror(so, m, ENOBUFS); } } @@ -7251,6 +7256,7 @@ key_add( error = key_setident(newsah, m, mhp); if (error) { lck_mtx_unlock(sadb_mutex); + bzero_keys(mhp); return key_senderror(so, m, error); } @@ -7259,11 +7265,13 @@ key_add( if (key_getsavbyspi(newsah, sa0->sadb_sa_spi)) { lck_mtx_unlock(sadb_mutex); ipseclog((LOG_DEBUG, "key_add: SA already exists.\n")); + bzero_keys(mhp); return key_senderror(so, m, EEXIST); } newsav = key_newsav(m, mhp, newsah, &error, so); if (newsav == NULL) { lck_mtx_unlock(sadb_mutex); + bzero_keys(mhp); return key_senderror(so, m, error); } @@ -7280,6 +7288,7 @@ key_add( if ((error = key_mature(newsav)) != 0) { key_freesav(newsav, KEY_SADB_LOCKED); lck_mtx_unlock(sadb_mutex); + bzero_keys(mhp); return key_senderror(so, m, error); } @@ -7297,9 +7306,13 @@ key_add( n = key_getmsgbuf_x1(m, mhp); if (n == NULL) { ipseclog((LOG_DEBUG, "key_update: No more memory.\n")); + bzero_keys(mhp); return key_senderror(so, m, ENOBUFS); } + // mh.ext points to the mbuf content. + // Zero out Encryption and Integrity keys if present. + bzero_keys(mhp); m_freem(m); return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); } @@ -9214,7 +9227,7 @@ bzero_mbuf(struct mbuf *m) } static void -bzero_keys(struct sadb_msghdr *mh) +bzero_keys(const struct sadb_msghdr *mh) { int extlen = 0; int offset = 0; @@ -9514,10 +9527,6 @@ key_parse( error = (*key_typesw[msg->sadb_msg_type])(so, m, &mh); - // mh.ext points to the mbuf content. - // Zero out Encryption and Integrity keys if present. - bzero_keys(&mh); - return error; senderror: diff --git a/bsd/nfs/nfs_vfsops.c b/bsd/nfs/nfs_vfsops.c index 90e20e774..bb1b425f3 100644 --- a/bsd/nfs/nfs_vfsops.c +++ b/bsd/nfs/nfs_vfsops.c @@ -2942,7 +2942,11 @@ mountnfs( error = ENOMEM; xb_get_32(error, &xb, nmp->nm_fh->fh_len); nfsmerr_if(error); - error = xb_get_bytes(&xb, (char*)&nmp->nm_fh->fh_data[0], nmp->nm_fh->fh_len, 0); + if (nmp->nm_fh->fh_len < 0 || + (size_t)nmp->nm_fh->fh_len > sizeof(nmp->nm_fh->fh_data)) + error = EINVAL; + else + error = xb_get_bytes(&xb, (char*)&nmp->nm_fh->fh_data[0], nmp->nm_fh->fh_len, 0); } nfsmerr_if(error); if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_FS_LOCATIONS)) { diff --git a/bsd/sys/dtrace_impl.h b/bsd/sys/dtrace_impl.h index 02065c15a..e74def4ec 100644 --- a/bsd/sys/dtrace_impl.h +++ b/bsd/sys/dtrace_impl.h @@ -1366,16 +1366,21 @@ extern boolean_t dtrace_can_attach_to_proc(proc_t); /* * DTrace Assertions * - * DTrace calls ASSERT from probe context. To assure that a failed ASSERT - * does not induce a markedly more catastrophic failure (e.g., one from which - * a dump cannot be gleaned), DTrace must define its own ASSERT to be one that - * may safely be called from probe context. This header file must thus be - * included by any DTrace component that calls ASSERT from probe context, and - * _only_ by those components. (The only exception to this is kernel - * debugging infrastructure at user-level that doesn't depend on calling - * ASSERT.) + * DTrace calls ASSERT and VERIFY from probe context. To assure that a failed + * ASSERT or VERIFYdoes not induce a markedly more catastrophic failure (e.g., + * one from which a dump cannot be gleaned), DTrace must define its own ASSERT + * and VERIFY macros to be ones that may safely be called from probe context. + * This header file must thus be included by any DTrace component that calls + * ASSERT and/or VERIFY from probe context, and _only_ by those components. + * (The only exception to this is kernel debugging infrastructure at user-level + * that doesn't depend on calling ASSERT.) */ #undef ASSERT +#undef VERIFY + +#define VERIFY(EX) ((void)((EX) || \ + dtrace_assfail(#EX, __FILE__, __LINE__))) + #if DEBUG #define ASSERT(EX) ((void)((EX) || \ dtrace_assfail(#EX, __FILE__, __LINE__))) diff --git a/config/MasterVersion b/config/MasterVersion index 1d3e76814..aad4b5154 100644 --- a/config/MasterVersion +++ b/config/MasterVersion @@ -1,4 +1,4 @@ -15.4.0 +15.5.0 # The first line of this file contains the master version number for the kernel. # All other instances of the kernel version in xnu are derived from this file. diff --git a/iokit/IOKit/IOPolledInterface.h b/iokit/IOKit/IOPolledInterface.h index bec682028..3b3a663ec 100644 --- a/iokit/IOKit/IOPolledInterface.h +++ b/iokit/IOKit/IOPolledInterface.h @@ -38,6 +38,8 @@ enum kIOPolledPreflightCoreDumpState = 5, kIOPolledPostflightCoreDumpState = 6, + + kIOPolledBeforeSleepStateAborted = 7, }; #if defined(__cplusplus) diff --git a/iokit/Kernel/IOHibernateIO.cpp b/iokit/Kernel/IOHibernateIO.cpp index 867251b27..168a6a039 100644 --- a/iokit/Kernel/IOHibernateIO.cpp +++ b/iokit/Kernel/IOHibernateIO.cpp @@ -1987,7 +1987,7 @@ hibernate_write_image(void) svPageCount, zvPageCount, wiredPagesEncrypted, wiredPagesClear, dirtyPagesEncrypted); if (pollerOpen) - IOPolledFilePollersClose(vars->fileVars, kIOPolledBeforeSleepState); + IOPolledFilePollersClose(vars->fileVars, (kIOReturnSuccess == err) ? kIOPolledBeforeSleepState : kIOPolledBeforeSleepStateAborted ); if (vars->consoleMapping) ProgressUpdate(gIOHibernateGraphicsInfo, diff --git a/iokit/Kernel/IOMemoryDescriptor.cpp b/iokit/Kernel/IOMemoryDescriptor.cpp index 0cff9ef1d..947461eaf 100644 --- a/iokit/Kernel/IOMemoryDescriptor.cpp +++ b/iokit/Kernel/IOMemoryDescriptor.cpp @@ -672,8 +672,7 @@ IOMemoryDescriptorMapAlloc(vm_map_t map, void * _ref) (((ref->options & kIOMapAnywhere) ? VM_FLAGS_ANYWHERE : VM_FLAGS_FIXED) - | VM_MAKE_TAG(ref->tag) - | VM_FLAGS_IOKIT_ACCT), /* iokit accounting */ + | VM_MAKE_TAG(ref->tag)), IPC_PORT_NULL, (memory_object_offset_t) 0, false, /* copy */ @@ -2875,8 +2874,9 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) pageInfo = getPageList(dataP); upl_page_list_ptr_t baseInfo = &pageInfo[pageIndex]; - upl_size_t ioplSize = round_page(numBytes); - unsigned int numPageInfo = atop_32(ioplSize); + mach_vm_size_t _ioplSize = round_page(numBytes); + upl_size_t ioplSize = (_ioplSize <= MAX_UPL_SIZE_BYTES) ? _ioplSize : MAX_UPL_SIZE_BYTES; + unsigned int numPageInfo = atop_32(ioplSize); if ((theMap == kernel_map) && (kernelStart >= io_kernel_static_start) @@ -2890,7 +2890,7 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) &highPage); } else if (_memRef) { - memory_object_offset_t entryOffset; + memory_object_offset_t entryOffset; entryOffset = mdOffset; entryOffset = (entryOffset - iopl.fPageOffset - memRefEntry->offset); @@ -2919,10 +2919,11 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) &ioplFlags); } - assert(ioplSize); if (error != KERN_SUCCESS) goto abortExit; + assert(ioplSize); + if (iopl.fIOPL) highPage = upl_get_highest_page(iopl.fIOPL); if (highPage > highestPage) diff --git a/iokit/Kernel/IOPolledInterface.cpp b/iokit/Kernel/IOPolledInterface.cpp index 09a1dd2fb..f0bb31618 100644 --- a/iokit/Kernel/IOPolledInterface.cpp +++ b/iokit/Kernel/IOPolledInterface.cpp @@ -263,6 +263,10 @@ IOPolledFilePollersClose(IOPolledFileIOVars * filevars, uint32_t state) idx++) { err = poller->close(state); + if ((kIOReturnSuccess != err) && (kIOPolledBeforeSleepStateAborted == state)) + { + err = poller->close(kIOPolledBeforeSleepState); + } if (err) HIBLOG("IOPolledInterface::close[%d] 0x%x\n", idx, err); } diff --git a/libkern/c++/OSDictionary.cpp b/libkern/c++/OSDictionary.cpp index c511e9d14..a53f23ad7 100644 --- a/libkern/c++/OSDictionary.cpp +++ b/libkern/c++/OSDictionary.cpp @@ -313,7 +313,7 @@ void OSDictionary::flushCollection() } bool OSDictionary:: -setObject(const OSSymbol *aKey, const OSMetaClassBase *anObject) +setObject(const OSSymbol *aKey, const OSMetaClassBase *anObject, bool onlyAdd) { unsigned int i; bool exists; @@ -331,6 +331,9 @@ setObject(const OSSymbol *aKey, const OSMetaClassBase *anObject) } if (exists) { + + if (onlyAdd) return false; + const OSMetaClassBase *oldObject = dictionary[i].value; haveUpdated(); @@ -359,6 +362,12 @@ setObject(const OSSymbol *aKey, const OSMetaClassBase *anObject) return true; } +bool OSDictionary:: +setObject(const OSSymbol *aKey, const OSMetaClassBase *anObject) +{ + return (setObject(aKey, anObject, false)); +} + void OSDictionary::removeObject(const OSSymbol *aKey) { unsigned int i; diff --git a/libkern/c++/OSSerializeBinary.cpp b/libkern/c++/OSSerializeBinary.cpp index accbbf224..66436dfe3 100644 --- a/libkern/c++/OSSerializeBinary.cpp +++ b/libkern/c++/OSSerializeBinary.cpp @@ -396,7 +396,7 @@ OSUnserializeBinary(const char *buffer, size_t bufferSize, OSString **errorStrin if (sym) { DEBG("%s = %s\n", sym->getCStringNoCopy(), o->getMetaClass()->getClassName()); - if (o != dict) ok = dict->setObject(sym, o); + if (o != dict) ok = dict->setObject(sym, o, true); o->release(); sym->release(); sym = 0; diff --git a/libkern/libkern/c++/OSDictionary.h b/libkern/libkern/c++/OSDictionary.h index 5bc256278..7a515f416 100644 --- a/libkern/libkern/c++/OSDictionary.h +++ b/libkern/libkern/c++/OSDictionary.h @@ -903,6 +903,9 @@ public: */ OSCollection * copyCollection(OSDictionary * cycleDict = 0) APPLE_KEXT_OVERRIDE; +#if XNU_KERNEL_PRIVATE + bool setObject(const OSSymbol *aKey, const OSMetaClassBase *anObject, bool onlyAdd); +#endif /* XNU_KERNEL_PRIVATE */ OSMetaClassDeclareReservedUnused(OSDictionary, 0); OSMetaClassDeclareReservedUnused(OSDictionary, 1); diff --git a/osfmk/bank/bank.c b/osfmk/bank/bank.c index 875380934..b4293d952 100644 --- a/osfmk/bank/bank.c +++ b/osfmk/bank/bank.c @@ -358,13 +358,12 @@ bank_get_value( if (bank_merchant == BANK_TASK_NULL) return KERN_RESOURCE_SHORTAGE; - /* Check if trying to redeem for self task, return the bank task */ + /* Check if trying to redeem for self task, return the default bank task */ if (bank_holder == bank_merchant && bank_holder == bank_secureoriginator && bank_holder == bank_proximateprocess) { - bank_task_reference(bank_holder); - bank_task_made_reference(bank_holder); - *out_value = BANK_ELEMENT_TO_HANDLE(bank_holder); + *out_value = BANK_ELEMENT_TO_HANDLE(BANK_DEFAULT_TASK_VALUE); + *out_flags = MACH_VOUCHER_ATTR_VALUE_FLAGS_PERSIST; return kr; } diff --git a/osfmk/kern/ledger.h b/osfmk/kern/ledger.h index b7f1eb0d3..2ebb8facf 100644 --- a/osfmk/kern/ledger.h +++ b/osfmk/kern/ledger.h @@ -36,6 +36,8 @@ #define LEDGER_ENTRY_INFO 1 #define LEDGER_TEMPLATE_INFO 2 #define LEDGER_LIMIT 3 +/* LEDGER_MAX_CMD always tracks the index of the last ledger command. */ +#define LEDGER_MAX_CMD LEDGER_LIMIT #define LEDGER_NAME_MAX 32 diff --git a/osfmk/kern/thread.c b/osfmk/kern/thread.c index 64b5ea604..cc8e391b1 100644 --- a/osfmk/kern/thread.c +++ b/osfmk/kern/thread.c @@ -1728,6 +1728,22 @@ set_vm_privilege(boolean_t privileged) return (was_vmpriv); } +void +set_thread_rwlock_boost(void) +{ + current_thread()->rwlock_count++; +} + +void +clear_thread_rwlock_boost(void) +{ + thread_t thread = current_thread(); + + if ((thread->rwlock_count-- == 1) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) { + + lck_rw_clear_promotion(thread); + } +} /* * XXX assuming current thread only, for now... diff --git a/osfmk/kern/thread.h b/osfmk/kern/thread.h index bec1a5105..44918bbcf 100644 --- a/osfmk/kern/thread.h +++ b/osfmk/kern/thread.h @@ -1007,6 +1007,9 @@ extern void thread_update_io_stats(thread_t thread, int size, int io_flags); extern kern_return_t thread_set_voucher_name(mach_port_name_t name); extern kern_return_t thread_get_current_voucher_origin_pid(int32_t *pid); +extern void set_thread_rwlock_boost(void); +extern void clear_thread_rwlock_boost(void); + extern void thread_enable_send_importance(thread_t thread, boolean_t enable); #endif /* XNU_KERNEL_PRIVATE */ diff --git a/osfmk/vm/vm_compressor.c b/osfmk/vm/vm_compressor.c index 64b7d7bd9..b4f2df76b 100644 --- a/osfmk/vm/vm_compressor.c +++ b/osfmk/vm/vm_compressor.c @@ -2710,6 +2710,15 @@ c_seg_swapin(c_segment_t c_seg, boolean_t force_minor_compaction) C_SEG_BUSY(c_seg); c_seg->c_busy_swapping = 1; + + /* + * This thread is likely going to block for I/O. + * Make sure it is ready to run when the I/O completes because + * it needs to clear the busy bit on the c_seg so that other + * waiting threads can make progress too. To do that, boost + * the rwlock_count so that the priority is boosted. + */ + set_thread_rwlock_boost(); lck_mtx_unlock_always(&c_seg->c_lock); PAGE_REPLACEMENT_DISALLOWED(FALSE); @@ -2751,6 +2760,12 @@ c_seg_swapin(c_segment_t c_seg, boolean_t force_minor_compaction) c_seg_swapin_requeue(c_seg, TRUE); } C_SEG_WAKEUP_DONE(c_seg); + + /* + * Drop the rwlock_count so that the thread priority + * is returned back to where it is supposed to be. + */ + clear_thread_rwlock_boost(); } diff --git a/osfmk/vm/vm_map.c b/osfmk/vm/vm_map.c index 773ab89ff..840c4babf 100644 --- a/osfmk/vm/vm_map.c +++ b/osfmk/vm/vm_map.c @@ -2514,12 +2514,20 @@ StartAgain: ; */ new_entry->use_pmap = FALSE; } else if (!is_submap && - iokit_acct) { + iokit_acct && + object != VM_OBJECT_NULL && + object->internal) { /* alternate accounting */ assert(!new_entry->iokit_acct); assert(new_entry->use_pmap); new_entry->iokit_acct = TRUE; new_entry->use_pmap = FALSE; + DTRACE_VM4( + vm_map_iokit_mapped_region, + vm_map_t, map, + vm_map_offset_t, new_entry->vme_start, + vm_map_offset_t, new_entry->vme_end, + int, VME_ALIAS(new_entry)); vm_map_iokit_mapped_region( map, (new_entry->vme_end - @@ -6462,6 +6470,11 @@ vm_map_delete( if (entry->iokit_acct) { /* alternate accounting */ + DTRACE_VM4(vm_map_iokit_unmapped_region, + vm_map_t, map, + vm_map_offset_t, entry->vme_start, + vm_map_offset_t, entry->vme_end, + int, VME_ALIAS(entry)); vm_map_iokit_unmapped_region(map, (entry->vme_end - entry->vme_start)); -- 2.45.2