@param seed_nbytes Length of the seed in bytes
@param seed Pointer to a high-entropy seed
@param nonce_nbytes Length of the nonce in bytes
- @param seed Pointer to a single-use nonce
@discussion @p max_ngens should be set based on an upper bound of CPUs available on the device. The entropy buffer should be managed outside the PRNG and updated continuously (e.g. by an interrupt handler). The count of samples in the entropy buffer needn't be better than a rough estimate.
*/
--- /dev/null
+//
+// CoreTrust.h
+// CoreTrust
+//
+// Copyright © 2017-2020 Apple Inc. All rights reserved.
+//
+
+#ifndef _CORETRUST_EVALUATE_H_
+#define _CORETRUST_EVALUATE_H_
+
+#include <stdint.h>
+#include <stdbool.h>
+
+__BEGIN_DECLS
+
+typedef struct x509_octet_string {
+ const uint8_t *data;
+ size_t length;
+} CTAsn1Item;
+
+int CTParseCertificateSet(const uint8_t *der, const uint8_t *der_end, // Input: binary representation of concatenated DER-encoded certs
+ CTAsn1Item *certStorage, size_t certStorageLen, // Output: An array of certStorageLen CTAsn1Items that will be populated with the
+ // CTAsn1Item for each parsed cert (in the same order as input)
+ size_t *numParsedCerts); // Output: number of successfully parsed certs
+
+int CTEvaluateSavageCerts(const uint8_t *certsData, size_t certsLen,
+ const uint8_t *rootKeyData, size_t rootKeyLen,
+ const uint8_t **leafKeyData, size_t *leafKeyLen,
+ bool *isProdCert);
+
+int CTEvaluateSavageCertsWithUID(const uint8_t *certsData, size_t certsLen,
+ const uint8_t *rootKeyData, size_t rootKeyLen,
+ const uint8_t **leafKeyData, size_t *leafKeyLen, // Output: points to the leaf key data in the input certsData
+ uint8_t *UIDData, size_t UIDLen, // Output: a pre-allocated buffer of UIDLen
+ bool *isProdCert);
+
+int CTEvaluateYonkersCerts(const uint8_t *certsData, size_t certsLen,
+ const uint8_t *rootKeyData, size_t rootKeyLen,
+ const uint8_t **leafKeyData, size_t *leafKeyLen, // Output: points to the leaf key data in the input certsData
+ uint8_t *UIDData, size_t UIDLen, // Output: a pre-allocated buffer of UIDLen
+ bool *isProdCert);
+
+int CTEvaluateAcrt(const uint8_t *certsData, size_t certsLen, // Input: binary representation of at most 3 concatenated certs
+ // with leaf first (root may be omitted)
+ const uint8_t **leafKeyData, size_t *leafKeyLen); // Output: points to the leaf key data in the input certsData
+
+int CTEvaluateUcrt(const uint8_t *certsData, size_t certsLen, // Input: binary representation of exactly 3 concatenated
+ // DER-encoded certs, with leaf first
+ const uint8_t **leafKeyData, size_t *leafKeyLen); // Output: points to the leaf key data in the input certsData)
+
+int CTEvaluateUcrtTestRoot(const uint8_t *certsData, size_t certsLen, // Input: binary representation of exactly 3 concatenated
+ // DER-encoded certs, with leaf first
+ const uint8_t *rootKeyData, size_t rootKeyLen, // Input: Root public key, if not specified production root will be used
+ const uint8_t **leafKeyData, size_t *leafKeyLen); // Output: points to the leaf key data in the input certsData)
+
+int CTEvaluateBAASystem(const uint8_t *certsData, size_t certsLen, // Input: binary representation of exactly 3 concatenated
+ // DER-encoded certs, with leaf first
+ const uint8_t **leafKeyData, size_t *leafKeyLen); // Output: points to the leaf key data in the input certsData
+
+typedef struct baa_identity {
+ uint32_t chipId;
+ uint64_t ecid;
+ bool productionStatus;
+ bool securityMode;
+ uint8_t securityDomain;
+ CTAsn1Item img4;
+} CTBAAIdentity;
+
+int CTEvaluateBAASystemWithId(const uint8_t *certsData, size_t certsLen, // Input: binary representation of exactly 3 concatenated
+ // DER-encoded certs, with leaf first
+ const uint8_t **leafKeyData, size_t *leafKeyLen, // Output: points to the leaf key data in the input certsData
+ CTBAAIdentity *identity); // Output from identity field in leaf certificate
+
+int CTEvaluateBAASystemTestRoot(const uint8_t *certsData, size_t certsLen, // Input: binary representation of exactly 3 concatenated
+ // DER-encoded certs, with leaf first
+ const uint8_t *rootKeyData, size_t rootKeyLen, // Input: Root public key, if not specified production root will be used
+ const uint8_t **leafKeyData, size_t *leafKeyLen,// Output: points to the leaf key data in the input certsData
+ CTBAAIdentity *identity); // Output from identity field in leaf certificate
+
+int CTEvaluateBAAUser(const uint8_t *certsData, size_t certsLen, // Input: binary representation of exactly 3 concatenated
+ // DER-encoded certs, with leaf first
+ const uint8_t **leafKeyData, size_t *leafKeyLen, // Output: points to the leaf key data in the input certsData
+ CTBAAIdentity *identity); // Output from identity field in leaf certificate
+
+int CTEvaluateBAAUserTestRoot(const uint8_t *certsData, size_t certsLen, // Input: binary representation of exactly 3 concatenated
+ // DER-encoded certs, with leaf first
+ const uint8_t *rootKeyData, size_t rootKeyLen, // Input: Root public key, if not specified production root will be used
+ const uint8_t **leafKeyData, size_t *leafKeyLen, // Output: points to the leaf key data in the input certsData
+ CTBAAIdentity *identity); // Output from identity field in leaf certificate
+
+int CTEvaluateSatori(const uint8_t *certsData, size_t certsLen, // Input: binary (DER) representation of 3 concatenated certs
+ // with leaf first
+ bool allowTestRoot, // Input: whether to allow the Test Apple Roots
+ const uint8_t **leafKeyData, size_t *leafKeyLen); // Output: points to the leaf key data in the input certsData
+
+int CTEvaluatePragueSignatureCMS(const uint8_t *cmsData, size_t cmsLen, // Input: CMS signature blob
+ const uint8_t *detachedData, size_t detachedDataLen, // Input: data signed by CMS blob
+ bool allowTestRoot, // Input: permit use of test hierarchy
+ const uint8_t **leafKeyData, size_t *leafKeyLen); // Output: points to leaf key data in input cmsData
+
+int CTEvaluateKDLSignatureCMS(const uint8_t *cmsData, size_t cmsLen, // Input: CMS signature blob
+ const uint8_t *detachedData, size_t detachedDataLen, // Input: data signed by CMS blob
+ bool allowTestRoot, // Input: permit use of test hierarchy
+ const uint8_t **leafKeyData, size_t *leafKeyLen); // Output: points to leaf key data in input cmsData
+
+typedef uint64_t CoreTrustPolicyFlags;
+enum {
+ CORETRUST_POLICY_BASIC = 0,
+ CORETRUST_POLICY_SAVAGE_DEV = 1 << 0,
+ CORETRUST_POLICY_SAVAGE_PROD = 1 << 1,
+ CORETRUST_POLICY_MFI_AUTHV3 = 1 << 2,
+ CORETRUST_POLICY_MAC_PLATFORM = 1 << 3,
+ CORETRUST_POLICY_MAC_DEVELOPER = 1 << 4,
+ CORETRUST_POLICY_DEVELOPER_ID = 1 << 5,
+ CORETRUST_POLICY_MAC_APP_STORE = 1 << 6,
+ CORETRUST_POLICY_IPHONE_DEVELOPER = 1 << 7,
+ CORETRUST_POLICY_IPHONE_APP_PROD = 1 << 8,
+ CORETRUST_POLICY_IPHONE_APP_DEV = 1 << 9,
+ CORETRUST_POLICY_IPHONE_VPN_PROD = 1 << 10,
+ CORETRUST_POLICY_IPHONE_VPN_DEV = 1 << 11,
+ CORETRUST_POLICY_TVOS_APP_PROD = 1 << 12,
+ CORETRUST_POLICY_TVOS_APP_DEV = 1 << 13,
+ CORETRUST_POLICY_TEST_FLIGHT_PROD = 1 << 14,
+ CORETRUST_POLICY_TEST_FLIGHT_DEV = 1 << 15,
+ CORETRUST_POLICY_IPHONE_DISTRIBUTION = 1 << 16,
+ CORETRUST_POLICY_MAC_SUBMISSION = 1 << 17,
+ CORETRUST_POLICY_YONKERS_DEV = 1 << 18,
+ CORETRUST_POLICY_YONKERS_PROD = 1 << 19,
+ CORETRUST_POLICY_MAC_PLATFORM_G2 = 1 << 20,
+ CORETRUST_POLICY_ACRT = 1 << 21,
+ CORETRUST_POLICY_SATORI = 1 << 22,
+ CORETRUST_POLICY_BAA = 1 << 23,
+ CORETRUST_POLICY_UCRT = 1 << 24,
+ CORETRUST_POLICY_PRAGUE = 1 << 25,
+ CORETRUST_POLICY_KDL = 1 << 26,
+ CORETRUST_POLICY_MFI_AUTHV2 = 1 << 27,
+ CORETRUST_POLICY_MFI_SW_AUTH_PROD = 1 << 28,
+ CORETRUST_POLICY_MFI_SW_AUTH_DEV = 1 << 29,
+ CORETRUST_POLICY_COMPONENT = 1 << 30,
+ CORETRUST_POLICY_IMG4 = 1ULL << 31,
+ CORETRUST_POLICY_SERVER_AUTH = 1ULL << 32,
+ CORETRUST_POLICY_SERVER_AUTH_STRING = 1ULL << 33,
+};
+
+typedef uint32_t CoreTrustDigestType;
+enum {
+ CORETRUST_DIGEST_TYPE_SHA1 = 1,
+ CORETRUST_DIGEST_TYPE_SHA224 = 2,
+ CORETRUST_DIGEST_TYPE_SHA256 = 4,
+ CORETRUST_DIGEST_TYPE_SHA384 = 8,
+ CORETRUST_DIGEST_TYPE_SHA512 = 16
+};
+
+int CTEvaluateAMFICodeSignatureCMS(const uint8_t *cmsData, size_t cmsLen, // Input: CMS blob
+ const uint8_t *detachedData, size_t detachedDataLen, // Input: data signed by CMS blob
+ bool allow_test_hierarchy, // Input: permit use of test hierarchy
+ const uint8_t **leafCert, size_t *leafCertLen, // Output: signing certificate
+ CoreTrustPolicyFlags *policyFlags, // Output: policy met by signing certificate
+ CoreTrustDigestType *cmsDigestType, // Output: digest used to sign the CMS blob
+ CoreTrustDigestType *hashAgilityDigestType, // Output: highest stregth digest type
+ // from hash agility attribute
+ const uint8_t **digestData, size_t *digestLen); // Output: pointer to hash agility value
+ // in CMS blob (with digest type above)
+/* Returns non-zero if there's a standards-based problem with the CMS or certificates.
+ * Policy matching of the certificates is only reflected in the policyFlags output. Namely, if the only problem is that
+ * the certificates don't match a policy, the returned integer will be 0 (success) and the policyFlags will be 0 (no matching policies).
+ * Some notes about hash agility outputs:
+ * - hashAgilityDigestType is only non-zero for HashAgilityV2
+ * - If hashAgilityDigestType is non-zero, digestData/Len provides the digest value
+ * - If hashAgilityDigestType is zero, digestData/Len provides the content of the HashAgilityV1 attribute (if present)
+ * - If neither HashAgilityV1 nor HashAgilityV2 attributes are found, these outputs will all be NULL.
+ */
+
+int CTParseAccessoryCerts(const uint8_t *certsData, size_t certsLen, // Input: CMS or binary representation of DER-encoded certs
+ const uint8_t **leafCertData, size_t *leafCertLen, // Output: points to leaf cert data in input certsData
+ const uint8_t **subCACertData, size_t *subCACertLen, // Output: points to subCA cert data (1st of 2) in input certsData, if present. Is set to NULL if only one cert present in input.
+ CoreTrustPolicyFlags *flags); // Output: policy flags set by this leaf
+
+
+int CTEvaluateAccessoryCert(const uint8_t *leafCertData, size_t leafCertLen, // Input: binary representation of DER-encoded leaf cert
+ const uint8_t *subCACertData, size_t subCACertLen, // Input: (optional) binary representation of DER-encoded subCA cert
+ const uint8_t *anchorCertData, size_t anchorCertLen, // Input: binary representation of DER-encoded anchor cert
+ CoreTrustPolicyFlags policy, // Input: policy to use when evaluating chain
+ const uint8_t **leafKeyData, size_t *leafKeyLen, // Output: points to the leaf key data in the input leafCertData
+ const uint8_t **extensionValueData, size_t *extensionValueLen); // Output: points to the extension value in the input leafCertData
+/* Which extension value is returned is based on which policy the cert was verified against:
+ * - For MFI AuthV3, this is the value of the extension with OID 1.2.840.113635.100.6.36
+ * - For SW Auth, this is the value of the extension with OID 1.2.840.113635.100.6.59.1 (GeneralCapabilities extension)
+ * - For Component certs, this si the value of the extension with OID 1.2.840.113635.100.11.1 (Component Type)
+ *
+ * The following CoreTrustPolicyFlags are accepted:
+ * - CORETRUST_POLICY_BASIC
+ * - CORETRUST_POLICY_MFI_AUTHV2
+ * - CORETRUST_POLICY_MFI_AUTHV3
+ * - CORETRUST_POLICY_MFI_SW_AUTH_DEV
+ * - CORETRUST_POLICY_MFI_SW_AUTH_PROD
+ * - CORETRUST_POLICY_COMPONENT
+ */
+
+int CTEvaluateAppleSSL(const uint8_t *certsData, size_t certsLen, // Input: binary representation of up to 3 concatenated
+ // DER-encoded certificates, with leaf first
+ const uint8_t *hostnameData, size_t hostnameLen, // Input: The hostname of the TLS server being connected to
+ uint64_t leafMarker, // Input: The last decimal of the marker OID for this project
+ // (e.g. 32 for 1.2.840.113635.100.6.27.32
+ bool allowTestRoots); // Input: permit use of test hierarchy
+
+int CTEvaluateAppleSSLWithOptionalTemporalCheck(const uint8_t *certsData, size_t certsLen,
+ const uint8_t *hostnameData, size_t hostnameLen,
+ uint64_t leafMarker,
+ bool allowTestRoots,
+ bool checkTemporalValidity);
+
+__END_DECLS
+
+#endif /* _CORETRUST_EVALUATE_H_ */
/* 7.18.1.5 Greatest-width integer types */
-typedef long long intmax_t;
-typedef unsigned long long uintmax_t;
+#ifdef __INTMAX_TYPE__
+typedef __INTMAX_TYPE__ intmax_t;
+#else
+#ifdef __LP64__
+typedef long int intmax_t;
+#else
+typedef long long int intmax_t;
+#endif /* __LP64__ */
+#endif /* __INTMAX_TYPE__ */
+#ifdef __UINTMAX_TYPE__
+typedef __UINTMAX_TYPE__ uintmax_t;
+#else
+#ifdef __LP64__
+typedef long unsigned int uintmax_t;
+#else
+typedef long long unsigned int uintmax_t;
+#endif /* __LP64__ */
+#endif /* __UINTMAX_TYPE__ */
+
+/* 7.18.4 Macros for integer constants */
+#define INT8_C(v) (v)
+#define INT16_C(v) (v)
+#define INT32_C(v) (v)
+#define INT64_C(v) (v ## LL)
+
+#define UINT8_C(v) (v)
+#define UINT16_C(v) (v)
+#define UINT32_C(v) (v ## U)
+#define UINT64_C(v) (v ## ULL)
+
+#ifdef __LP64__
+#define INTMAX_C(v) (v ## L)
+#define UINTMAX_C(v) (v ## UL)
+#else
+#define INTMAX_C(v) (v ## LL)
+#define UINTMAX_C(v) (v ## ULL)
+#endif
/* 7.18.2 Limits of specified-width integer types:
* These #defines specify the minimum and maximum limits
* of each of the types declared above.
+ *
+ * They must have "the same type as would an expression that is an
+ * object of the corresponding type converted according to the integer
+ * promotion".
*/
/* 7.18.2.4 Limits of integer types capable of holding object pointers */
#if __WORDSIZE == 64
-#define INTPTR_MIN INT64_MIN
-#define INTPTR_MAX INT64_MAX
+#define INTPTR_MAX 9223372036854775807L
#else
-#define INTPTR_MIN INT32_MIN
-#define INTPTR_MAX INT32_MAX
+#define INTPTR_MAX 2147483647L
#endif
+#define INTPTR_MIN (-INTPTR_MAX-1)
#if __WORDSIZE == 64
-#define UINTPTR_MAX UINT64_MAX
+#define UINTPTR_MAX 18446744073709551615UL
#else
-#define UINTPTR_MAX UINT32_MAX
+#define UINTPTR_MAX 4294967295UL
#endif
/* 7.18.2.5 Limits of greatest-width integer types */
-#define INTMAX_MIN INT64_MIN
-#define INTMAX_MAX INT64_MAX
-
-#define UINTMAX_MAX UINT64_MAX
+#define INTMAX_MAX INTMAX_C(9223372036854775807)
+#define UINTMAX_MAX UINTMAX_C(18446744073709551615)
+#define INTMAX_MIN (-INTMAX_MAX-1)
/* 7.18.3 "Other" */
#if __WORDSIZE == 64
-#define PTRDIFF_MIN INT64_MIN
-#define PTRDIFF_MAX INT64_MAX
+#define PTRDIFF_MIN INTMAX_MIN
+#define PTRDIFF_MAX INTMAX_MAX
#else
#define PTRDIFF_MIN INT32_MIN
#define PTRDIFF_MAX INT32_MAX
#endif
-/* We have no sig_atomic_t yet, so no SIG_ATOMIC_{MIN,MAX}.
- Should end up being {-127,127} or {0,255} ... or bigger.
- My bet would be on one of {U}INT32_{MIN,MAX}. */
-
-#if __WORDSIZE == 64
-#define SIZE_MAX UINT64_MAX
-#else
-#define SIZE_MAX UINT32_MAX
-#endif
+#define SIZE_MAX UINTPTR_MAX
#if defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1
#define RSIZE_MAX (SIZE_MAX >> 1)
#define SIG_ATOMIC_MIN INT32_MIN
#define SIG_ATOMIC_MAX INT32_MAX
-/* 7.18.4 Macros for integer constants */
-#define INT8_C(v) (v)
-#define INT16_C(v) (v)
-#define INT32_C(v) (v)
-#define INT64_C(v) (v ## LL)
-
-#define UINT8_C(v) (v ## U)
-#define UINT16_C(v) (v ## U)
-#define UINT32_C(v) (v ## U)
-#define UINT64_C(v) (v ## ULL)
-
-#define INTMAX_C(v) (v ## LL)
-#define UINTMAX_C(v) (v ## ULL)
-
#endif /* KERNEL */
#endif /* _KERNEL_STDINT_H_ */
SRCROOT=$(SRCROOT)/tests/driverkit
+include $(MakeInc_cmd)
+
#
# The "analyze" target defined below invokes Clang Static Analyzer
# with a predefined set of checks and options for the project.
STATIC_ANALYZER_EXTRA_FLAGS ?=
analyze:
- # This is where the reports are going to be available.
- # Old reports are deleted on make clean only.
- mkdir -p $(STATIC_ANALYZER_OUTPUT_DIR)
-
- # Recursively build the requested target under scan-build.
- # Exclude checks that weren't deemed to be security critical,
- # like null pointer dereferences.
- xcrun scan-build -o $(STATIC_ANALYZER_OUTPUT_DIR) \
+# This is where the reports are going to be available.
+# Old reports are deleted on make clean only.
+ $(_v)$(MKDIR) $(STATIC_ANALYZER_OUTPUT_DIR)
+
+# Recursively build the requested target under scan-build.
+# Exclude checks that weren't deemed to be security critical,
+# like null pointer dereferences.
+ $(_v)$(XCRUN) $(SCAN_BUILD) -o $(STATIC_ANALYZER_OUTPUT_DIR) \
-disable-checker deadcode.DeadStores \
-disable-checker core.NullDereference \
-disable-checker core.DivideZero \
$(STATIC_ANALYZER_EXTRA_FLAGS) \
- make $(STATIC_ANALYZER_TARGET)
+ $(MAKE) $(STATIC_ANALYZER_TARGET) QUIET=1 2>&1 | $(GREP) "^scan-build:"
+
+.PHONY: analyze
static void
usage(void)
{
- fprintf(stderr, "Usage: %s [-s OLDSEGNAME] -n NEWSEGNAME input -o output\n", getprogname());
+ fprintf(stderr, "Usage: %s [-s OLDSEGNAME] [-i IGNORESEGNAME] -n NEWSEGNAME input -o output\n", getprogname());
exit(1);
}
const char * output_name = NULL;
const char * input_name = NULL;
const char * oldseg_name = NULL;
+ const char * ignoreseg_name = NULL;
const char * newseg_name = NULL;
struct mach_header * hdr;
struct mach_header_64 * hdr64;
int ch;
- while ((ch = getopt(argc, argv, "s:n:o:")) != -1) {
+ while ((ch = getopt(argc, argv, "s:i:n:o:")) != -1) {
switch (ch) {
case 's':
oldseg_name = optarg;
break;
+ case 'i':
+ ignoreseg_name = optarg;
+ break;
case 'n':
newseg_name = optarg;
break;
attr = OSSwapInt32(attr);
}
- if (!(S_ATTR_DEBUG & attr)) {
+ if (!(S_ATTR_DEBUG & attr) && (!ignoreseg_name ||
+ 0 != strncmp(ignoreseg_name, (char *)names, sizeof(*names)))) {
if (!oldseg_name ||
0 == strncmp(oldseg_name, (char *)names, sizeof(*names))) {
memset(names, 0x0, sizeof(*names));
#ifndef DFLSSIZ
/* XXX stack size default is a platform property: use getrlimit(2) */
#if (defined(TARGET_OS_OSX) && (TARGET_OS_OSX != 0)) || \
- (defined(KERNEL) && !defined(CONFIG_EMBEDDED) || (CONFIG_EMBEDDED == 0))
+ (defined(KERNEL) && XNU_TARGET_OS_OSX)
#define DFLSSIZ (8*1024*1024 - 16*1024)
#else
#define DFLSSIZ (1024*1024 - 16*1024) /* initial stack size limit */
#ifndef MAXSSIZ
/* XXX stack size limit is a platform property: use getrlimit(2) */
#if (defined(TARGET_OS_OSX) && (TARGET_OS_OSX != 0)) || \
- (defined(KERNEL) && !defined(CONFIG_EMBEDDED) || (CONFIG_EMBEDDED == 0))
+ (defined(KERNEL) && XNU_TARGET_OS_OSX)
#define MAXSSIZ (64*1024*1024) /* max stack size */
#else
#define MAXSSIZ (1024*1024) /* max stack size */
bsd/vfs/vfs_conf.c optional config_nfs4
bsd/vfs/vfs_fslog.c standard
bsd/vfs/vfs_init.c standard
+bsd/vfs/vfs_io_compression_stats.c optional config_io_compression_stats
bsd/vfs/vfs_lookup.c standard
bsd/vfs/vfs_quota.c optional quota
bsd/vfs/vfs_subr.c standard
bsd/kern/subr_prf.c standard
bsd/kern/subr_sbuf.c standard
bsd/kern/subr_xxx.c standard
+bsd/kern/counter_test.c optional development
bsd/kern/sys_eventlink.c standard
bsd/kern/sys_generic.c standard
bsd/kern/sys_pipe.c standard
include $(MakeInc_cmd)
include $(MakeInc_def)
-DATAFILES = \
- entropy_sysctl.h
-
INSTALL_MI_LIST =
EXPORT_MI_LIST = ${DATAFILES}
*/
#include <sys/sysctl.h>
+#include <pexpert/pexpert.h>
#include <kern/zalloc.h>
#include <kern/percpu.h>
-#include <crypto/entropy/entropy_sysctl.h>
#include <prng/entropy.h>
#include <libkern/section_keywords.h>
SYSCTL_UINT(_kern_entropy_health_adaptive_proportion_test, OID_AUTO, max_observation_count, CTLFLAG_RD, &entropy_health_apt_stats.max_observation_count, 0, NULL);
static int
-sysctl_entropy_collect(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
+sysctl_entropy_collect SYSCTL_HANDLER_ARGS
{
if (!req->oldptr || req->oldlen > entropy_analysis_buffer_size) {
return EINVAL;
// Get current size of entropy buffer in bytes
SYSCTL_UINT(_kern_entropy, OID_AUTO, entropy_buffer_size, CTLFLAG_RD | CTLFLAG_MASKED | CTLFLAG_NOAUTO, &entropy_analysis_buffer_size, 0, NULL);
// Collect contents from entropy buffer
-SYSCTL_PROC(_kern_entropy, OID_AUTO, entropy_collect, CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MASKED | CTLFLAG_NOAUTO, NULL, 0, sysctl_entropy_collect, "-", NULL);
+SYSCTL_PROC(_kern_entropy, OID_AUTO, entropy_collect,
+ CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MASKED | CTLFLAG_NOAUTO,
+ NULL, 0, sysctl_entropy_collect, "-", NULL);
-void
-entropy_analysis_register_sysctls(void)
+__startup_func
+static void
+entropy_analysis_sysctl_startup(void)
{
- sysctl_register_oid(&sysctl__kern_entropy_entropy_buffer_size);
- sysctl_register_oid(&sysctl__kern_entropy_entropy_collect);
+ uint32_t sample_count = 0;
+ if (__improbable(PE_parse_boot_argn("entropy-analysis-sample-count", &sample_count, sizeof(sample_count)))) {
+ sysctl_register_oid_early(&sysctl__kern_entropy_entropy_buffer_size);
+ sysctl_register_oid_early(&sysctl__kern_entropy_entropy_collect);
+ } else if (__improbable(PE_parse_boot_argn("ebsz", &sample_count, sizeof(sample_count)))) {
+ sysctl_register_oid_early(&sysctl__kern_entropy_entropy_buffer_size);
+ sysctl_register_oid_early(&sysctl__kern_entropy_entropy_collect);
+ }
}
+STARTUP(SYSCTL, STARTUP_RANK_MIDDLE, entropy_analysis_sysctl_startup);
+++ /dev/null
-/*
- * Copyright (c) 2019 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- *
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- *
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#ifndef _SYS_CRYPTO_ENTROPY_ENTROPYSYSCTL_H_
-#define _SYS_CRYPTO_ENTROPY_ENTROPYSYSCTL_H_
-
-// This function is used only for test purposes. We collect a large
-// number of entropy samples during boot and analyze them offline.
-//
-// See entropy.c to understand the initialization of this module via
-// boot arg and the collection of the samples.
-//
-// See entropy_sysctl.c to understand the semantics of the sysctl
-// that exposes the samples for analysis.
-void entropy_analysis_register_sysctls(void);
-
-#endif
extern dtrace_id_t dtrace_probeid_error; /* special ERROR probe */
typedef arm_saved_state_t savearea_t;
-extern lck_attr_t *dtrace_lck_attr;
-extern lck_grp_t *dtrace_lck_grp;
-
int dtrace_arm_condition_true(int condition, int cpsr);
/*
* MP coordination
*/
-decl_lck_mtx_data(static, dt_xc_lock);
+static LCK_MTX_DECLARE_ATTR(dt_xc_lock, &dtrace_lck_grp, &dtrace_lck_attr);
static uint32_t dt_xc_sync;
typedef struct xcArg {
return;
}
-/*
- * Initialization
- */
-void
-dtrace_isa_init(void)
-{
- lck_mtx_init(&dt_xc_lock, dtrace_lck_grp, dtrace_lck_attr);
- return;
-}
-
/*
* Runtime and ABI
*/
/*
* We start 32 bytes after sp since 4 registers are pushed onto the stack
- * in the userspace syscall handler, and the first 4 stack argumnets are moved
+ * in the userspace syscall handler, and the first 4 stack arguments are moved
* into registers already
*/
#define ARG_SP_BYTE_OFFSET 32
#include <sys/kauth.h>
#include <sys/ucred.h>
#include <sys/proc_internal.h>
+#include <sys/sysproto.h>
#include <sys/user.h>
#include <kern/task.h>
#include <kern/thread.h>
#include <vm/vm_map.h>
+
/*
* copy a null terminated string from the kernel address space into the user
* address space. - if the user is denied write access, return EFAULT. - if
bcopy(src, dst, count);
return 0;
}
+
extern dtrace_id_t dtrace_probeid_error; /* special ERROR probe */
typedef arm_saved_state_t savearea_t;
-extern lck_attr_t *dtrace_lck_attr;
-extern lck_grp_t *dtrace_lck_grp;
-
#if XNU_MONITOR
extern void * pmap_stacks_start;
extern void * pmap_stacks_end;
* MP coordination
*/
-decl_lck_mtx_data(static, dt_xc_lock);
+static LCK_MTX_DECLARE_ATTR(dt_xc_lock, &dtrace_lck_grp, &dtrace_lck_attr);
static uint32_t dt_xc_sync;
typedef struct xcArg {
return;
}
-/*
- * Initialization
- */
-void
-dtrace_isa_init(void)
-{
- lck_mtx_init(&dt_xc_lock, dtrace_lck_grp, dtrace_lck_attr);
- return;
-}
-
/**
* Register definitions
* LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED);
*
*/
-static lck_mtx_t dtrace_lock; /* probe state lock */
-static lck_mtx_t dtrace_provider_lock; /* provider state lock */
-static lck_mtx_t dtrace_meta_lock; /* meta-provider state lock */
-static lck_rw_t dtrace_dof_mode_lock; /* dof mode lock */
+static LCK_MTX_DECLARE_ATTR(dtrace_lock,
+ &dtrace_lck_grp, &dtrace_lck_attr); /* probe state lock */
+static LCK_MTX_DECLARE_ATTR(dtrace_provider_lock,
+ &dtrace_lck_grp, &dtrace_lck_attr); /* provider state lock */
+static LCK_MTX_DECLARE_ATTR(dtrace_meta_lock,
+ &dtrace_lck_grp, &dtrace_lck_attr); /* meta-provider state lock */
+static LCK_RW_DECLARE_ATTR(dtrace_dof_mode_lock,
+ &dtrace_lck_grp, &dtrace_lck_attr); /* dof mode lock */
/*
* DTrace Provider Variables
static dtrace_errhash_t dtrace_errhash[DTRACE_ERRHASHSZ];
static const char *dtrace_errlast;
static kthread_t *dtrace_errthread;
-static lck_mtx_t dtrace_errlock;
+static LCK_MTX_DECLARE_ATTR(dtrace_errlock, &dtrace_lck_grp, &dtrace_lck_attr);
#endif
/*
.d_reserved_2 = eno_putc,
};
-lck_attr_t* dtrace_lck_attr;
-lck_grp_attr_t* dtrace_lck_grp_attr;
-lck_grp_t* dtrace_lck_grp;
+LCK_ATTR_DECLARE(dtrace_lck_attr, 0, 0);
+LCK_GRP_DECLARE(dtrace_lck_grp, "dtrace");
static int gMajDevNo;
return;
}
- /*
- * Create the dtrace lock group and attrs.
- */
- dtrace_lck_attr = lck_attr_alloc_init();
- dtrace_lck_grp_attr= lck_grp_attr_alloc_init();
- dtrace_lck_grp = lck_grp_alloc_init("dtrace", dtrace_lck_grp_attr);
-
- /*
- * We have to initialize all locks explicitly
- */
- lck_mtx_init(&dtrace_lock, dtrace_lck_grp, dtrace_lck_attr);
- lck_mtx_init(&dtrace_provider_lock, dtrace_lck_grp, dtrace_lck_attr);
- lck_mtx_init(&dtrace_meta_lock, dtrace_lck_grp, dtrace_lck_attr);
- lck_mtx_init(&dtrace_procwaitfor_lock, dtrace_lck_grp, dtrace_lck_attr);
-#if DEBUG
- lck_mtx_init(&dtrace_errlock, dtrace_lck_grp, dtrace_lck_attr);
-#endif
- lck_rw_init(&dtrace_dof_mode_lock, dtrace_lck_grp, dtrace_lck_attr);
-
/*
* The cpu_core structure consists of per-CPU state available in any context.
* On some architectures, this may mean that the page(s) containing the
* is up to the platform to assure that this is performed properly. Note that
* the structure is sized to avoid false sharing.
*/
- lck_mtx_init(&cpu_lock, dtrace_lck_grp, dtrace_lck_attr);
- lck_mtx_init(&cyc_lock, dtrace_lck_grp, dtrace_lck_attr);
- lck_mtx_init(&mod_lock, dtrace_lck_grp, dtrace_lck_attr);
/*
* Initialize the CPU offline/online hooks.
cpu_core = (cpu_core_t *)kmem_zalloc( ncpu * sizeof(cpu_core_t), KM_SLEEP );
for (i = 0; i < ncpu; ++i) {
- lck_mtx_init(&cpu_core[i].cpuc_pid_lock, dtrace_lck_grp, dtrace_lck_attr);
+ lck_mtx_init(&cpu_core[i].cpuc_pid_lock, &dtrace_lck_grp, &dtrace_lck_attr);
}
cpu_list = (dtrace_cpu_t *)kmem_zalloc( ncpu * sizeof(dtrace_cpu_t), KM_SLEEP );
cpu_list[i].cpu_id = (processorid_t)i;
cpu_list[i].cpu_next = &(cpu_list[(i+1) % ncpu]);
LIST_INIT(&cpu_list[i].cpu_cyc_list);
- lck_rw_init(&cpu_list[i].cpu_ft_lock, dtrace_lck_grp, dtrace_lck_attr);
+ lck_rw_init(&cpu_list[i].cpu_ft_lock, &dtrace_lck_grp, &dtrace_lck_attr);
}
lck_mtx_lock(&cpu_lock);
offsetof(dtrace_string_t, dtst_next),
offsetof(dtrace_string_t, dtst_prev));
- dtrace_isa_init();
/*
* See dtrace_impl.h for a description of dof modes.
* The default is lazy dof.
/*
* cpuvar
*/
-lck_mtx_t cpu_lock;
-lck_mtx_t cyc_lock;
-lck_mtx_t mod_lock;
+LCK_MTX_DECLARE_ATTR(cpu_lock, &dtrace_lck_grp, &dtrace_lck_attr);
+LCK_MTX_DECLARE_ATTR(cyc_lock, &dtrace_lck_grp, &dtrace_lck_attr);
+LCK_MTX_DECLARE_ATTR(mod_lock, &dtrace_lck_grp, &dtrace_lck_attr);
dtrace_cpu_t *cpu_list;
cpu_core_t *cpu_core; /* XXX TLB lockdown? */
* duty to resume the task.
*/
-lck_mtx_t dtrace_procwaitfor_lock;
+LCK_MTX_DECLARE_ATTR(dtrace_procwaitfor_lock, &dtrace_lck_grp, &dtrace_lck_attr);
typedef struct dtrace_proc_awaited_entry {
struct dtrace_procdesc *pdesc;
static thread_t fasttrap_cleanup_thread;
-static lck_mtx_t fasttrap_cleanup_mtx;
+static LCK_GRP_DECLARE(fasttrap_lck_grp, "fasttrap");
+static LCK_ATTR_DECLARE(fasttrap_lck_attr, 0, 0);
+static LCK_MTX_DECLARE_ATTR(fasttrap_cleanup_mtx,
+ &fasttrap_lck_grp, &fasttrap_lck_attr);
#define FASTTRAP_CLEANUP_PROVIDER 0x1
static fasttrap_hash_t fasttrap_procs;
static uint64_t fasttrap_pid_count; /* pid ref count */
-static lck_mtx_t fasttrap_count_mtx; /* lock on ref count */
+static LCK_MTX_DECLARE_ATTR(fasttrap_count_mtx, /* lock on ref count */
+ &fasttrap_lck_grp, &fasttrap_lck_attr);
#define FASTTRAP_ENABLE_FAIL 1
#define FASTTRAP_ENABLE_PARTIAL 2
"dtrace.fasttrap_probe_t[3]"
};
-/*
- * APPLE NOTE: We have to manage locks explicitly
- */
-lck_grp_t* fasttrap_lck_grp;
-lck_grp_attr_t* fasttrap_lck_grp_attr;
-lck_attr_t* fasttrap_lck_attr;
-
static int
fasttrap_highbit(ulong_t i)
{
static fasttrap_tracepoint_spec_t *fasttrap_retired_spec;
static size_t fasttrap_cur_retired = 0, fasttrap_retired_size;
-static lck_mtx_t fasttrap_retired_mtx;
+static LCK_MTX_DECLARE_ATTR(fasttrap_retired_mtx,
+ &fasttrap_lck_grp, &fasttrap_lck_attr);
#define DEFAULT_RETIRED_SIZE 256
sprunlock(p);
p = PROC_NULL;
- mac_proc_check_get_task(state->dts_cred.dcr_cred, &pident);
+ (void) mac_proc_check_get_task(state->dts_cred.dcr_cred, &pident, TASK_FLAVOR_CONTROL);
p = sprlock(pident.p_pid);
if (p == PROC_NULL) {
/*
* APPLE NOTE: We have to initialize all locks explicitly
*/
- lck_mtx_init(&new_fprc->ftpc_mtx, fasttrap_lck_grp, fasttrap_lck_attr);
+ lck_mtx_init(&new_fprc->ftpc_mtx, &fasttrap_lck_grp, &fasttrap_lck_attr);
new_fprc->ftpc_next = bucket->ftb_data;
bucket->ftb_data = new_fprc;
* APPLE NOTE: explicit lock management. Not 100% certain we need this, the
* memory is freed even without the destroy. Maybe accounting cleanup?
*/
- lck_mtx_destroy(&fprc->ftpc_mtx, fasttrap_lck_grp);
+ lck_mtx_destroy(&fprc->ftpc_mtx, &fasttrap_lck_grp);
kmem_free(fprc, sizeof (fasttrap_proc_t));
}
/*
* APPLE NOTE: locks require explicit init
*/
- lck_mtx_init(&new_fp->ftp_mtx, fasttrap_lck_grp, fasttrap_lck_attr);
- lck_mtx_init(&new_fp->ftp_cmtx, fasttrap_lck_grp, fasttrap_lck_attr);
+ lck_mtx_init(&new_fp->ftp_mtx, &fasttrap_lck_grp, &fasttrap_lck_attr);
+ lck_mtx_init(&new_fp->ftp_cmtx, &fasttrap_lck_grp, &fasttrap_lck_attr);
ASSERT(new_fp->ftp_proc != NULL);
* APPLE NOTE: explicit lock management. Not 100% certain we need this, the
* memory is freed even without the destroy. Maybe accounting cleanup?
*/
- lck_mtx_destroy(&provider->ftp_mtx, fasttrap_lck_grp);
- lck_mtx_destroy(&provider->ftp_cmtx, fasttrap_lck_grp);
+ lck_mtx_destroy(&provider->ftp_mtx, &fasttrap_lck_grp);
+ lck_mtx_destroy(&provider->ftp_cmtx, &fasttrap_lck_grp);
kmem_free(provider, sizeof (fasttrap_provider_t));
ASSERT(fasttrap_tpoints.fth_table != NULL);
for (i = 0; i < fasttrap_tpoints.fth_nent; i++) {
- lck_mtx_init(&fasttrap_tpoints.fth_table[i].ftb_mtx, fasttrap_lck_grp, fasttrap_lck_attr);
+ lck_mtx_init(&fasttrap_tpoints.fth_table[i].ftb_mtx, &fasttrap_lck_grp,
+ &fasttrap_lck_attr);
}
/*
ASSERT(fasttrap_provs.fth_table != NULL);
for (i = 0; i < fasttrap_provs.fth_nent; i++) {
- lck_mtx_init(&fasttrap_provs.fth_table[i].ftb_mtx, fasttrap_lck_grp, fasttrap_lck_attr);
+ lck_mtx_init(&fasttrap_provs.fth_table[i].ftb_mtx, &fasttrap_lck_grp,
+ &fasttrap_lck_attr);
}
/*
#ifndef illumos
for (i = 0; i < fasttrap_procs.fth_nent; i++) {
- lck_mtx_init(&fasttrap_procs.fth_table[i].ftb_mtx, fasttrap_lck_grp, fasttrap_lck_attr);
+ lck_mtx_init(&fasttrap_procs.fth_table[i].ftb_mtx, &fasttrap_lck_grp,
+ &fasttrap_lck_attr);
}
#endif
}
- /*
- * Create the fasttrap lock group. Must be done before fasttrap_attach()!
- */
- fasttrap_lck_attr = lck_attr_alloc_init();
- fasttrap_lck_grp_attr= lck_grp_attr_alloc_init();
- fasttrap_lck_grp = lck_grp_alloc_init("fasttrap", fasttrap_lck_grp_attr);
-
- /*
- * Initialize global locks
- */
- lck_mtx_init(&fasttrap_cleanup_mtx, fasttrap_lck_grp, fasttrap_lck_attr);
- lck_mtx_init(&fasttrap_count_mtx, fasttrap_lck_grp, fasttrap_lck_attr);
-
fasttrap_attach();
/*
fasttrap_retired_size = DEFAULT_RETIRED_SIZE;
fasttrap_retired_spec = kmem_zalloc(fasttrap_retired_size * sizeof(*fasttrap_retired_spec),
KM_SLEEP);
- lck_mtx_init(&fasttrap_retired_mtx, fasttrap_lck_grp, fasttrap_lck_attr);
fasttrap_inited = 1;
}
for (seg = firstsegfromheader(mh); seg != NULL; seg = nextsegfromheader(mh, seg)) {
kernel_section_t *sect = firstsect(seg);
- if (strcmp(seg->segname, "__KLD") == 0) {
+ if (strcmp(seg->segname, "__KLD") == 0 || strcmp(seg->segname, "__KLDDATA") == 0) {
continue;
}
CLOSURE(prf)
CLOSURE(proc_best_name)
CLOSURE(proc_is64bit)
+ X86_ONLY(proc_require)
CRITICAL(rbtrace_bt)
CRITICAL(register_cpu_setup_func)
CRITICAL(ret64_iret)
CRITICAL(uread)
CRITICAL(uwrite)
CRITICAL(vstart)
+ X86_ONLY(zone_has_index)
+ X86_ONLY(zone_id_require)
+ X86_ONLY(zone_id_require_panic)
+ X86_ONLY(zone_range_contains)
+ X86_ONLY(zone_require_panic)
};
#define BLACKLIST_COUNT (sizeof(fbt_blacklist)/sizeof(fbt_blacklist[0]))
};
dtrace_id_t lockstat_probemap[LS_NPROBES];
-void (*lockstat_probe)(dtrace_id_t, uint64_t, uint64_t,
- uint64_t, uint64_t, uint64_t);
static dtrace_provider_id_t lockstat_id;
return DDI_FAILURE;
}
- lockstat_probe = dtrace_probe;
- membar_producer();
-
return DDI_SUCCESS;
}
{"hv", "guest-enter", 1, 1, "uint64_t *", "guest_regs_t *" },
{"hv", "guest-exit", 0, 0, "uint32_t", "uint32_t" },
{"hv", "guest-exit", 1, 1, "uint64_t *", "guest_regs_t *" },
+ {"hv", "guest-error", 0, 0, "uint32_t", "uint32_t" },
+ {"hv", "guest-error", 1, 1, "uint64_t *", "guest_regs_t *" },
+ {"hv", "guest-error", 2, 2, "uint32_t", "uint32_t" },
{ NULL, NULL, 0, 0, NULL, NULL }
};
#define LOADABLE_SYSCALL(a) 0 /* Not pertinent to Darwin. */
#define LOADED_SYSCALL(a) 1 /* Not pertinent to Darwin. */
-extern lck_attr_t* dtrace_lck_attr;
-extern lck_grp_t* dtrace_lck_grp;
-static lck_mtx_t dtrace_systrace_lock; /* probe state lock */
+static LCK_MTX_DECLARE_ATTR(dtrace_systrace_lock,
+ &dtrace_lck_grp, &dtrace_lck_attr); /* probe state lock */
systrace_sysent_t *systrace_sysent = NULL;
void (*systrace_probe)(dtrace_id_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);
s->stsy_underlying = a->sy_callc;
s->stsy_return_type = a->sy_return_type;
}
- lck_mtx_init(&dtrace_systrace_lock, dtrace_lck_grp, dtrace_lck_attr);
}
lck_mtx_lock(&dtrace_systrace_lock);
if (sysent[sysnum].sy_callc == systrace_sysent[sysnum].stsy_underlying) {
+ /* It is not possible to write to sysent[] directly because it is const. */
vm_offset_t dss = ptrauth_nop_cast(vm_offset_t, &dtrace_systrace_syscall);
ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(vm_offset_t));
}
lck_mtx_unlock(&dtrace_systrace_lock);
+
return 0;
}
systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
if (disable) {
+ /*
+ * Usage of volatile protects the if statement below from being optimized away.
+ *
+ * Compilers are clever and know that const array values can't change in time
+ * and the if below is always false. That is because it can't see that DTrace
+ * injects dtrace_systrace_syscall dynamically and violates constness of the
+ * array.
+ */
+ volatile const struct sysent *syscallent = &sysent[sysnum];
+
lck_mtx_lock(&dtrace_systrace_lock);
- if (sysent[sysnum].sy_callc == dtrace_systrace_syscall) {
- ml_nofault_copy((vm_offset_t)&systrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(systrace_sysent[sysnum].stsy_underlying));
+ if (syscallent->sy_callc == dtrace_systrace_syscall) {
+ ml_nofault_copy((vm_offset_t)&systrace_sysent[sysnum].stsy_underlying,
+ (vm_offset_t)&syscallent->sy_callc, sizeof(vm_offset_t));
}
lck_mtx_unlock(&dtrace_systrace_lock);
}
#endif /* MACH_ASSERT */
} mach_trap_t;
-extern const mach_trap_t mach_trap_table[]; /* syscall_sw.h now declares this as const */
-extern int mach_trap_count;
+extern const mach_trap_t mach_trap_table[]; /* syscall_sw.h now declares this as const */
+extern const int mach_trap_count;
-extern const char *mach_syscall_name_table[];
+extern const char *const mach_syscall_name_table[];
/* XXX From osfmk/i386/bsd_i386.c */
struct mach_call_args {
lck_mtx_lock(&dtrace_systrace_lock);
if (mach_trap_table[sysnum].mach_trap_function == machtrace_sysent[sysnum].stsy_underlying) {
+ /* It is not possible to write to mach_trap_table[] directly because it is const. */
vm_offset_t dss = ptrauth_nop_cast(vm_offset_t, &dtrace_machtrace_syscall);
ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t));
}
machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
if (disable) {
- lck_mtx_lock(&dtrace_systrace_lock);
+ /*
+ * Usage of volatile protects the if statement below from being optimized away.
+ *
+ * Compilers are clever and know that const array values can't change in time
+ * and the if below is always false. That is because it can't see that DTrace
+ * injects dtrace_machtrace_syscall dynamically and violates constness of the
+ * array.
+ */
+ volatile const mach_trap_t *machtrap = &mach_trap_table[sysnum];
- if (mach_trap_table[sysnum].mach_trap_function == (mach_call_t)dtrace_machtrace_syscall) {
- ml_nofault_copy((vm_offset_t)&machtrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t));
+ lck_mtx_lock(&dtrace_systrace_lock);
+ if (machtrap->mach_trap_function == (mach_call_t)dtrace_machtrace_syscall) {
+ ml_nofault_copy((vm_offset_t)&machtrace_sysent[sysnum].stsy_underlying,
+ (vm_offset_t)&machtrap->mach_trap_function, sizeof(vm_offset_t));
}
lck_mtx_unlock(&dtrace_systrace_lock);
}
}
}
-/*
- * Initialization
- */
-void
-dtrace_isa_init(void)
-{
- return;
-}
-
/*
* Runtime and ABI
*/
#include <i386/mp.h>
#include <kern/kalloc.h>
+#if DEBUG || DEVELOPMENT
+#include <kern/hvg_hypercall.h>
+#endif
+
static int
_i386_cpu_info SYSCTL_HANDLER_ARGS
CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED,
&traptrace_enabled, 0, "Enabled/disable trap trace");
+
+/*
+ * Trigger a guest kernel core dump (internal only)
+ * Usage: sysctl kern.trigger_kernel_coredump = 1
+ * (option selector must be 1, other values reserved)
+ */
+
+static int
+sysctl_trigger_kernel_coredump(struct sysctl_oid *oidp __unused, void *arg1, int arg2, struct sysctl_req *req)
+{
+ int error = 0;
+ hvg_hcall_return_t hv_ret;
+ char buf[2]; // 1 digit for dump option + 1 '\0'
+
+ if (req->newptr) {
+ // Write request
+ if (req->newlen > 1) {
+ return EINVAL;
+ }
+ error = SYSCTL_IN(req, buf, req->newlen);
+ buf[req->newlen] = '\0';
+ if (!error) {
+ if (strcmp(buf, "1") != 0) {
+ return EINVAL;
+ }
+ /* Issue hypercall to trigger a dump */
+ hv_ret = hvg_hcall_trigger_dump(arg1, HVG_HCALL_DUMP_OPTION_REGULAR);
+
+ /* Translate hypercall error code to syscall error code */
+ switch (hv_ret) {
+ case HVG_HCALL_SUCCESS:
+ error = SYSCTL_OUT(req, arg1, 41);
+ break;
+ case HVG_HCALL_ACCESS_DENIED:
+ error = EPERM;
+ break;
+ case HVG_HCALL_INVALID_CODE:
+ case HVG_HCALL_INVALID_PARAMETER:
+ error = EINVAL;
+ break;
+ case HVG_HCALL_IO_FAILED:
+ error = EIO;
+ break;
+ case HVG_HCALL_FEAT_DISABLED:
+ case HVG_HCALL_UNSUPPORTED:
+ error = ENOTSUP;
+ break;
+ default:
+ error = ENODEV;
+ }
+ }
+ } else {
+ // Read request
+ error = SYSCTL_OUT(req, arg1, arg2);
+ }
+ return error;
+}
+
+
+static hvg_hcall_vmcore_file_t sysctl_vmcore;
+
+void
+hvg_bsd_init(void)
+{
+ if (!cpuid_vmm_present()) {
+ return;
+ }
+
+ if ((cpuid_vmm_get_applepv_features() & CPUID_LEAF_FEATURE_COREDUMP) != 0) {
+ /* Register an OID in the sysctl MIB tree for kern.trigger_kernel_coredump */
+ struct sysctl_oid *hcall_trigger_dump_oid = zalloc_permanent(sizeof(struct sysctl_oid), ZALIGN(struct sysctl_oid));
+ struct sysctl_oid oid = SYSCTL_STRUCT_INIT(_kern,
+ OID_AUTO,
+ trigger_kernel_coredump,
+ CTLTYPE_STRING | CTLFLAG_RW,
+ &sysctl_vmcore, sizeof(sysctl_vmcore),
+ sysctl_trigger_kernel_coredump,
+ "A", "Request that the hypervisor take a live kernel dump");
+ *hcall_trigger_dump_oid = oid;
+ sysctl_register_oid(hcall_trigger_dump_oid);
+ }
+}
+
#endif /* DEVELOPMENT || DEBUG */
#include <sys/conf.h>
#include <sys/vm.h>
#include <sys/uio_internal.h>
-#include <sys/malloc.h>
+
+#include <kern/zalloc.h>
#include <mach/vm_types.h>
#include <mach/vm_param.h>
#include <vm/vm_kern.h> /* for kernel_map */
+#include <libkern/section_keywords.h>
#include <pexpert/pexpert.h> /* for PE_parse_boot_argn */
#endif
-static caddr_t devzerobuf;
+static SECURITY_READ_ONLY_LATE(caddr_t) devzerobuf;
int mmread(dev_t dev, struct uio *uio);
int mmwrite(dev_t dev, struct uio *uio);
error = 0; /* Always succeeds, always consumes all input */
break;
case 3:
- if (devzerobuf == NULL) {
- MALLOC(devzerobuf, caddr_t, PAGE_SIZE, M_TEMP, M_WAITOK);
- bzero(devzerobuf, PAGE_SIZE);
- }
+ assert(devzerobuf != NULL);
+
if (uio->uio_rw == UIO_WRITE) {
c = uio_curriovlen(uio);
#endif
}
+__startup_func
+static void
+devzerobuf_init(void)
+{
+ devzerobuf = zalloc_permanent(PAGE_SIZE, ZALIGN_NONE); /* zeroed */
+}
+STARTUP(ZALLOC, STARTUP_RANK_LAST, devzerobuf_init);
+
#if CONFIG_DEV_KMEM
void
dev_kmem_init(void)
/*
* Written at initialization, read-only thereafter.
*/
-lck_grp_t *mt_lock_grp = NULL;
+LCK_GRP_DECLARE(mt_lock_grp, MT_NODE);
static int mt_dev_major;
static mt_device_t
int
mt_dev_init(void)
{
- mt_lock_grp = lck_grp_alloc_init(MT_NODE, LCK_GRP_ATTR_NULL);
- assert(mt_lock_grp != NULL);
-
mt_dev_major = cdevsw_add(-1 /* allocate a major number */, &mt_cdevsw);
if (mt_dev_major < 0) {
panic("monotonic: cdevsw_add failed: %d", mt_dev_major);
__builtin_unreachable();
}
- lck_mtx_init(&mt_devices[i].mtd_lock, mt_lock_grp, LCK_ATTR_NULL);
+ lck_mtx_init(&mt_devices[i].mtd_lock, &mt_lock_grp, LCK_ATTR_NULL);
}
return 0;
/* Nothing to do, already all 64-bit */
}
+void
+munge_llll(void *args __unused)
+{
+ /* Nothing to do, already all 64-bit */
+}
+
void
munge_ll(void *args __unused)
{
#include <dev/busvar.h> /* for pseudo_inits */
#include <sys/kdebug.h>
#include <sys/monotonic.h>
-#include <sys/reason.h>
#include <mach/mach_types.h>
#include <mach/vm_prot.h>
#include <mach/semaphore.h>
#include <mach/sync_policy.h>
#include <kern/clock.h>
+#include <sys/csr.h>
#include <mach/kern_return.h>
#include <mach/thread_act.h> /* for thread_resume() */
#include <sys/mcache.h> /* for mcache_init() */
#include <net/if_gif.h> /* for gif_init() */
#include <miscfs/devfs/devfsdefs.h> /* for devfs_kernel_mount() */
#include <vm/vm_kern.h> /* for kmem_suballoc() */
-#include <sys/semaphore.h> /* for psem_lock_init() */
#include <sys/msgbuf.h> /* for log_setsize() */
-#include <sys/tty.h> /* for tty_init() */
#include <sys/proc_uuid_policy.h> /* proc_uuid_policy_init() */
#include <netinet/flow_divert.h> /* flow_divert_init() */
#include <net/content_filter.h> /* for cfil_init() */
void *swapmap;
struct swdevt swdevt[1];
+static LCK_GRP_DECLARE(hostname_lck_grp, "hostname");
+LCK_MTX_DECLARE(hostname_lock, &hostname_lck_grp);
+LCK_MTX_DECLARE(domainname_lock, &hostname_lck_grp);
+
dev_t rootdev; /* device of the root */
dev_t dumpdev; /* device to take dumps on */
long dumplo; /* offset into dumpdev */
long hostid;
char hostname[MAXHOSTNAMELEN];
-lck_mtx_t hostname_lock;
-lck_grp_t *hostname_lck_grp;
char domainname[MAXDOMNAMELEN];
-lck_mtx_t domainname_lock;
-
-char rootdevice[DEVMAXNAMESIZE];
+char rootdevice[DEVMAXNAMESIZE];
struct vnode *rootvp;
bool rootvp_is_ssd = false;
__private_extern__ int proc_ref_tracking_disabled = 0; /* disable panics on leaked proc refs across syscall boundary */
#endif
-#if OS_REASON_DEBUG
-__private_extern__ int os_reason_debug_disabled = 0; /* disable asserts for when we fail to allocate OS reasons */
-#endif
-
extern kern_return_t IOFindBSDRoot(char *, unsigned int, dev_t *, u_int32_t *);
extern void IOSecureBSDRoot(const char * rootName);
extern kern_return_t IOKitBSDInit(void );
extern boolean_t IOSetRecoveryBoot(bsd_bootfail_mode_t, uuid_t, boolean_t);
extern void kminit(void);
-extern void file_lock_init(void);
extern void bsd_bufferinit(void);
extern void oslog_setsize(int size);
extern void throttle_init(void);
-extern void acct_init(void);
#if CONFIG_LOCKERBOOT
#define LOCKER_PROTOBOOT_MOUNT "/protoboot"
#if CONFIG_DEV_KMEM
extern void dev_kmem_init(void);
#endif
-extern void time_zone_slock_init(void);
extern void select_waitq_init(void);
static void process_name(const char *, proc_t);
static void setconf(void);
#if CONFIG_BASESYSTEMROOT
-static int bsd_find_basesystem_dmg(char *bsdmgpath_out, bool *rooted_dmg);
+static int bsd_find_basesystem_dmg(char *bsdmgpath_out, bool *rooted_dmg, bool *skip_signature_check);
static boolean_t bsdmgroot_bootable(void);
#endif // CONFIG_BASESYSTEMROOT
extern thread_t cloneproc(task_t, coalition_t, proc_t, int, int);
extern int (*mountroot)(void);
-lck_grp_t * proc_lck_grp;
-lck_grp_t * proc_slock_grp;
-lck_grp_t * proc_fdmlock_grp;
-lck_grp_t * proc_kqhashlock_grp;
-lck_grp_t * proc_knhashlock_grp;
-lck_grp_t * proc_ucred_mlock_grp;
-lck_grp_t * proc_mlock_grp;
-lck_grp_t * proc_dirslock_grp;
-lck_grp_attr_t * proc_lck_grp_attr;
-lck_attr_t * proc_lck_attr;
-lck_mtx_t * proc_list_mlock;
-lck_mtx_t * proc_klist_mlock;
+LCK_ATTR_DECLARE(proc_lck_attr, 0, 0);
+LCK_GRP_DECLARE(proc_lck_grp, "proc");
+LCK_GRP_DECLARE(proc_slock_grp, "proc-slock");
+LCK_GRP_DECLARE(proc_fdmlock_grp, "proc-fdmlock");
+LCK_GRP_DECLARE(proc_mlock_grp, "proc-mlock");
+LCK_GRP_DECLARE(proc_ucred_mlock_grp, "proc-ucred-mlock");
+LCK_GRP_DECLARE(proc_dirslock_grp, "proc-dirslock");
+LCK_GRP_DECLARE(proc_kqhashlock_grp, "proc-kqhashlock");
+LCK_GRP_DECLARE(proc_knhashlock_grp, "proc-knhashlock");
-#if CONFIG_XNUPOST
-lck_grp_t * sysctl_debug_test_stackshot_owner_grp;
-lck_mtx_t * sysctl_debug_test_stackshot_owner_init_mtx;
-#endif /* !CONFIG_XNUPOST */
-extern lck_mtx_t * execargs_cache_lock;
+LCK_MTX_DECLARE_ATTR(proc_list_mlock, &proc_mlock_grp, &proc_lck_attr);
#if XNU_TARGET_OS_OSX
/* hook called after root is mounted XXX temporary hack */
#endif
void set_rootvnode(vnode_t);
-extern lck_rw_t * rootvnode_rw_lock;
+extern lck_rw_t rootvnode_rw_lock;
/* called with an iocount and usecount on new_rootvnode */
void
return is_ramdisk;
}
-/*
- * This function is called before IOKit initialization, so that globals
- * like the sysctl tree are initialized before kernel extensions
- * are started (since they may want to register sysctls
- */
-void
-bsd_early_init(void)
-{
- sysctl_early_init();
-}
-
/*
* This function is called very early on in the Mach startup, from the
* function start_kernel_threads() in osfmk/kern/startup.c. It's called
bsd_init_kprintf("calling procinit\n");
procinit();
- /* Initialize the ttys (MUST be before kminit()/bsd_autoconf()!)*/
- tty_init();
-
/* kernel_task->proc = kernproc; */
set_bsdtask_info(kernel_task, (void *)kernproc);
bsd_init_kprintf("calling process_name\n");
process_name("kernel_task", kernproc);
- /* allocate proc lock group attribute and group */
- bsd_init_kprintf("calling lck_grp_attr_alloc_init\n");
- proc_lck_grp_attr = lck_grp_attr_alloc_init();
-
- proc_lck_grp = lck_grp_alloc_init("proc", proc_lck_grp_attr);
-
- proc_slock_grp = lck_grp_alloc_init("proc-slock", proc_lck_grp_attr);
- proc_ucred_mlock_grp = lck_grp_alloc_init("proc-ucred-mlock", proc_lck_grp_attr);
- proc_mlock_grp = lck_grp_alloc_init("proc-mlock", proc_lck_grp_attr);
- proc_fdmlock_grp = lck_grp_alloc_init("proc-fdmlock", proc_lck_grp_attr);
- proc_kqhashlock_grp = lck_grp_alloc_init("proc-kqhashlock", proc_lck_grp_attr);
- proc_knhashlock_grp = lck_grp_alloc_init("proc-knhashlock", proc_lck_grp_attr);
- proc_dirslock_grp = lck_grp_alloc_init("proc-dirslock", proc_lck_grp_attr);
-#if CONFIG_XNUPOST
- sysctl_debug_test_stackshot_owner_grp = lck_grp_alloc_init("test-stackshot-owner-grp", LCK_GRP_ATTR_NULL);
- sysctl_debug_test_stackshot_owner_init_mtx = lck_mtx_alloc_init(
- sysctl_debug_test_stackshot_owner_grp,
- LCK_ATTR_NULL);
-#endif /* !CONFIG_XNUPOST */
/* Allocate proc lock attribute */
- proc_lck_attr = lck_attr_alloc_init();
- proc_list_mlock = lck_mtx_alloc_init(proc_mlock_grp, proc_lck_attr);
- proc_klist_mlock = lck_mtx_alloc_init(proc_mlock_grp, proc_lck_attr);
- lck_mtx_init(&kernproc->p_mlock, proc_mlock_grp, proc_lck_attr);
- lck_mtx_init(&kernproc->p_fdmlock, proc_fdmlock_grp, proc_lck_attr);
- lck_mtx_init(&kernproc->p_ucred_mlock, proc_ucred_mlock_grp, proc_lck_attr);
- lck_spin_init(&kernproc->p_slock, proc_slock_grp, proc_lck_attr);
- lck_rw_init(&kernproc->p_dirs_lock, proc_dirslock_grp, proc_lck_attr);
+ lck_mtx_init(&kernproc->p_mlock, &proc_mlock_grp, &proc_lck_attr);
+ lck_mtx_init(&kernproc->p_fdmlock, &proc_fdmlock_grp, &proc_lck_attr);
+ lck_mtx_init(&kernproc->p_ucred_mlock, &proc_ucred_mlock_grp, &proc_lck_attr);
+ lck_spin_init(&kernproc->p_slock, &proc_slock_grp, &proc_lck_attr);
+ lck_rw_init(&kernproc->p_dirs_lock, &proc_dirslock_grp, &proc_lck_attr);
assert(bsd_simul_execs != 0);
- execargs_cache_lock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr);
execargs_cache_size = bsd_simul_execs;
execargs_free_count = bsd_simul_execs;
execargs_cache = zalloc_permanent(bsd_simul_execs * sizeof(vm_offset_t),
ulock_initialize();
- hostname_lck_grp = lck_grp_alloc_init("hostname", LCK_GRP_ATTR_NULL);
- lck_mtx_init(&hostname_lock, hostname_lck_grp, LCK_ATTR_NULL);
- lck_mtx_init(&domainname_lock, hostname_lck_grp, LCK_ATTR_NULL);
-
/*
* Create process 0.
*/
kernproc->p_pgrp = &pgrp0;
LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
LIST_INIT(&pgrp0.pg_members);
- lck_mtx_init(&pgrp0.pg_mlock, proc_mlock_grp, proc_lck_attr);
+ lck_mtx_init(&pgrp0.pg_mlock, &proc_mlock_grp, &proc_lck_attr);
/* There is no other bsd thread this point and is safe without pgrp lock */
LIST_INSERT_HEAD(&pgrp0.pg_members, kernproc, p_pglist);
kernproc->p_listflag |= P_LIST_INPGRP;
session0.s_count = 1;
session0.s_leader = kernproc;
session0.s_listflags = 0;
- lck_mtx_init(&session0.s_mlock, proc_mlock_grp, proc_lck_attr);
+ lck_mtx_init(&session0.s_mlock, &proc_mlock_grp, &proc_lck_attr);
LIST_INSERT_HEAD(SESSHASH(0), &session0, s_hash);
proc_list_unlock();
TAILQ_INIT(&kernproc->p_aio_doneq);
kernproc->p_aio_total_count = 0;
- bsd_init_kprintf("calling file_lock_init\n");
- file_lock_init();
-
#if CONFIG_MACF
mac_cred_label_associate_kernel(kernproc->p_ucred);
#endif
filedesc0.fd_knlist = NULL;
filedesc0.fd_knhash = NULL;
filedesc0.fd_knhashmask = 0;
- lck_mtx_init(&filedesc0.fd_kqhashlock, proc_kqhashlock_grp, proc_lck_attr);
- lck_mtx_init(&filedesc0.fd_knhashlock, proc_knhashlock_grp, proc_lck_attr);
+ lck_mtx_init(&filedesc0.fd_kqhashlock, &proc_kqhashlock_grp, &proc_lck_attr);
+ lck_mtx_init(&filedesc0.fd_knhashlock, &proc_knhashlock_grp, &proc_lck_attr);
/* Create the limits structures. */
kernproc->p_limit = &limit0;
}
}
- bsd_init_kprintf("calling fpxlog_init\n");
- fpxlog_init();
-
/*
* Initialize buffers and hash links for buffers
*
bsd_init_kprintf("calling vfsinit\n");
vfsinit();
- /* Initialize file locks. */
- bsd_init_kprintf("calling lf_init\n");
- lf_init();
-
#if CONFIG_PROC_UUID_POLICY
/* Initial proc_uuid_policy subsystem */
bsd_init_kprintf("calling proc_uuid_policy_init()\n");
bsd_init_kprintf("calling aio_init\n");
aio_init();
- /* Initialize SysV shm subsystem locks; the subsystem proper is
- * initialized through a sysctl.
- */
-#if SYSV_SHM
- bsd_init_kprintf("calling sysv_shm_lock_init\n");
- sysv_shm_lock_init();
-#endif
-#if SYSV_SEM
- bsd_init_kprintf("calling sysv_sem_lock_init\n");
- sysv_sem_lock_init();
-#endif
-#if SYSV_MSG
- bsd_init_kprintf("sysv_msg_lock_init\n");
- sysv_msg_lock_init();
-#endif
- bsd_init_kprintf("calling pshm_lock_init\n");
- pshm_lock_init();
- bsd_init_kprintf("calling psem_lock_init\n");
- psem_lock_init();
-
pthread_init();
/* POSIX Shm and Sem */
bsd_init_kprintf("calling pshm_cache_init\n");
pshm_cache_init();
bsd_init_kprintf("calling psem_cache_init\n");
psem_cache_init();
- bsd_init_kprintf("calling time_zone_slock_init\n");
- time_zone_slock_init();
bsd_init_kprintf("calling select_waitq_init\n");
select_waitq_init();
kernproc->p_fd->fd_cdir = NULL;
kernproc->p_fd->fd_rdir = NULL;
+#if defined (__x86_64__) && (DEBUG || DEVELOPMENT)
+ hvg_bsd_init();
+#endif /* DEBUG || DEVELOPMENT */
+
#if CONFIG_FREEZE
#ifndef CONFIG_MEMORYSTATUS
#error "CONFIG_FREEZE defined without matching CONFIG_MEMORYSTATUS"
memorystatus_init();
#endif /* CONFIG_MEMORYSTATUS */
- bsd_init_kprintf("calling acct_init\n");
- acct_init();
-
bsd_init_kprintf("calling sysctl_mib_init\n");
sysctl_mib_init();
bsd_init_kprintf("calling bsd_autoconf\n");
bsd_autoconf();
- bsd_init_kprintf("calling os_reason_init\n");
- os_reason_init();
-
#if CONFIG_DTRACE
dtrace_postinit();
#endif
(void)vnode_ref(init_rootvnode);
(void)vnode_put(init_rootvnode);
- lck_rw_lock_exclusive(rootvnode_rw_lock);
+ lck_rw_lock_exclusive(&rootvnode_rw_lock);
set_rootvnode(init_rootvnode);
- lck_rw_unlock_exclusive(rootvnode_rw_lock);
+ lck_rw_unlock_exclusive(&rootvnode_rw_lock);
init_rootvnode = NULLVP; /* use rootvnode after this point */
if (bsdmgroot_bootable()) {
int error;
bool rooted_dmg = false;
+ bool skip_signature_check = false;
printf("trying to find and mount BaseSystem dmg as root volume\n");
#if DEVELOPMENT || DEBUG
panic("%s: M_NAMEI zone exhausted", __FUNCTION__);
}
- error = bsd_find_basesystem_dmg(dmgpath, &rooted_dmg);
+ error = bsd_find_basesystem_dmg(dmgpath, &rooted_dmg, &skip_signature_check);
if (error) {
bsd_init_kprintf("failed to to find BaseSystem dmg: error = %d\n", error);
} else {
bsd_init_kprintf("found BaseSystem dmg at: %s\n", dmgpath);
- error = imageboot_pivot_image(dmgpath, IMAGEBOOT_DMG, "/System/Volumes/BaseSystem", "System/Volumes/macOS", rooted_dmg);
+ error = imageboot_pivot_image(dmgpath, IMAGEBOOT_DMG, "/System/Volumes/BaseSystem", "System/Volumes/macOS", rooted_dmg, skip_signature_check);
if (error) {
bsd_init_kprintf("couldn't mount BaseSystem dmg: error = %d", error);
}
consider_zone_gc(FALSE);
#endif
- /* Initialize System Override call */
- init_system_override();
-
bsd_init_kprintf("done\n");
}
panic("bsd_utaskbootstrap: initproc not set\n");
}
#endif
+
+ zalloc_first_proc_made();
+
/*
* Since we aren't going back out the normal way to our parent,
* we have to drop the transition locks explicitly.
}
#endif
-#if OS_REASON_DEBUG
- if (PE_parse_boot_argn("-disable_osreason_debug", namep, sizeof(namep))) {
- os_reason_debug_disabled = 1;
- }
-#endif
-
PE_parse_boot_argn("sigrestrict", &sigrestrict_arg, sizeof(sigrestrict_arg));
#if DEVELOPMENT || DEBUG
// BaseSystem.dmg into its argument (which must be a char[MAXPATHLEN]).
static
int
-bsd_find_basesystem_dmg(char *bsdmgpath_out, bool *rooted_dmg)
+bsd_find_basesystem_dmg(char *bsdmgpath_out, bool *rooted_dmg, bool *skip_signature_check)
{
int error;
size_t len;
char *dmgbasepath;
char *dmgpath;
+ bool allow_rooted_dmg = false;
dmgbasepath = zalloc_flags(ZV_NAMEI, Z_ZERO | Z_WAITOK);
dmgpath = zalloc_flags(ZV_NAMEI, Z_ZERO | Z_WAITOK);
vnode_t imagevp = NULLVP;
+#if DEVELOPMENT || DEBUG
+ allow_rooted_dmg = true;
+#endif
+
//must provide output bool
- if (rooted_dmg) {
+ if (rooted_dmg && skip_signature_check) {
*rooted_dmg = false;
+ *skip_signature_check = false;
} else {
error = EINVAL;
goto done;
goto done;
}
+ if (csr_check(CSR_ALLOW_ANY_RECOVERY_OS) == 0) {
+ *skip_signature_check = true;
+ allow_rooted_dmg = true;
+ }
+
#if defined(__arm64__)
const char *boot_obj_path = IOGetBootObjectsPath();
if (boot_obj_path) {
goto done;
}
-#if DEVELOPMENT || DEBUG
- len = strlcpy(dmgpath, dmgbasepath, MAXPATHLEN);
- if (len > MAXPATHLEN) {
- error = ENAMETOOLONG;
- goto done;
- }
+ if (allow_rooted_dmg) {
+ len = strlcpy(dmgpath, dmgbasepath, MAXPATHLEN);
+ if (len > MAXPATHLEN) {
+ error = ENAMETOOLONG;
+ goto done;
+ }
- len = strlcat(dmgpath, "arm64eBaseSystem.rooted.dmg", MAXPATHLEN);
- if (len > MAXPATHLEN) {
- error = ENAMETOOLONG;
- goto done;
- }
+ len = strlcat(dmgpath, "arm64eBaseSystem.rooted.dmg", MAXPATHLEN);
+ if (len > MAXPATHLEN) {
+ error = ENAMETOOLONG;
+ goto done;
+ }
- error = vnode_lookup(dmgpath, 0, &imagevp, vfs_context_kernel());
- if (error == 0) {
- *rooted_dmg = true;
- goto done;
+ error = vnode_lookup(dmgpath, 0, &imagevp, vfs_context_kernel());
+ if (error == 0) {
+ *rooted_dmg = true;
+ *skip_signature_check = true;
+ goto done;
+ }
+ memset(dmgpath, 0, MAXPATHLEN);
}
- memset(dmgpath, 0, MAXPATHLEN);
-#endif // DEVELOPMENT || DEBUG
len = strlcpy(dmgpath, dmgbasepath, MAXPATHLEN);
if (len > MAXPATHLEN) {
goto done;
}
-#if DEVELOPMENT || DEBUG
- // Try BaseSystem.rooted.dmg
- len = strlcpy(dmgpath, dmgbasepath, MAXPATHLEN);
- if (len > MAXPATHLEN) {
- error = ENAMETOOLONG;
- goto done;
- }
+ if (allow_rooted_dmg) {
+ // Try BaseSystem.rooted.dmg
+ len = strlcpy(dmgpath, dmgbasepath, MAXPATHLEN);
+ if (len > MAXPATHLEN) {
+ error = ENAMETOOLONG;
+ goto done;
+ }
- len = strlcat(dmgpath, "/BaseSystem.rooted.dmg", MAXPATHLEN);
- if (len > MAXPATHLEN) {
- error = ENAMETOOLONG;
- goto done;
- }
+ len = strlcat(dmgpath, "/BaseSystem.rooted.dmg", MAXPATHLEN);
+ if (len > MAXPATHLEN) {
+ error = ENAMETOOLONG;
+ goto done;
+ }
- error = vnode_lookup(dmgpath, 0, &imagevp, vfs_context_kernel());
- if (error == 0) {
- // we found it! success!
- *rooted_dmg = true;
- goto done;
+ error = vnode_lookup(dmgpath, 0, &imagevp, vfs_context_kernel());
+ if (error == 0) {
+ // we found it! success!
+ *rooted_dmg = true;
+ *skip_signature_check = true;
+ goto done;
+ }
}
-#endif // DEVELOPMENT || DEBUG
// Try BaseSystem.dmg
len = strlcpy(dmgpath, dmgbasepath, MAXPATHLEN);
/* XXX these should be in a common header somwhere, but aren't */
extern int chrtoblk_set(int, int);
-extern vm_offset_t kmem_mb_alloc(vm_map_t, int, int, kern_return_t *);
/* XXX most of these just exist to export; there's no good header for them*/
void pcb_synch(void);
-TAILQ_HEAD(, devsw_lock) devsw_locks;
-lck_mtx_t devsw_lock_list_mtx;
-lck_grp_t * devsw_lock_grp;
+typedef struct devsw_lock {
+ TAILQ_ENTRY(devsw_lock) dl_list;
+ thread_t dl_thread;
+ dev_t dl_dev;
+ int dl_mode;
+ int dl_waiters;
+} *devsw_lock_t;
+
+static LCK_GRP_DECLARE(devsw_lock_grp, "devsw");
+static LCK_MTX_DECLARE(devsw_lock_list_mtx, &devsw_lock_grp);
+static TAILQ_HEAD(, devsw_lock) devsw_locks = TAILQ_HEAD_INITIALIZER(devsw_locks);
/* Just to satisfy pstat command */
int dmmin, dmmax, dmtext;
-vm_offset_t
-kmem_mb_alloc(vm_map_t mbmap, int size, int physContig, kern_return_t *err)
-{
- vm_offset_t addr = 0;
- kern_return_t kr = KERN_SUCCESS;
-
- if (!physContig) {
- kr = kernel_memory_allocate(mbmap, &addr, size, 0, KMA_KOBJECT | KMA_LOMEM, VM_KERN_MEMORY_MBUF);
- } else {
- kr = kmem_alloc_contig(mbmap, &addr, size, PAGE_MASK, 0xfffff, 0, KMA_KOBJECT | KMA_LOMEM, VM_KERN_MEMORY_MBUF);
- }
-
- if (kr != KERN_SUCCESS) {
- addr = 0;
- }
- if (err) {
- *err = kr;
- }
-
- return addr;
-}
-
/*
* XXX this function only exists to be exported and do nothing.
*/
return ret;
}
+static devsw_lock_t
+devsw_lock_find_locked(dev_t dev, int mode)
+{
+ devsw_lock_t lock;
+
+ TAILQ_FOREACH(lock, &devsw_locks, dl_list) {
+ if (lock->dl_dev == dev && lock->dl_mode == mode) {
+ return lock;
+ }
+ }
+
+ return NULL;
+}
+
void
devsw_lock(dev_t dev, int mode)
{
- devsw_lock_t newlock, tmplock;
- int res;
+ devsw_lock_t newlock, curlock;
assert(0 <= major(dev) && major(dev) < nchrdev);
assert(mode == S_IFCHR || mode == S_IFBLK);
- MALLOC(newlock, devsw_lock_t, sizeof(struct devsw_lock), M_TEMP, M_WAITOK | M_ZERO);
+ newlock = kalloc_flags(sizeof(struct devsw_lock), Z_WAITOK | Z_ZERO);
newlock->dl_dev = dev;
newlock->dl_thread = current_thread();
newlock->dl_mode = mode;
lck_mtx_lock_spin(&devsw_lock_list_mtx);
-retry:
- TAILQ_FOREACH(tmplock, &devsw_locks, dl_list)
- {
- if (tmplock->dl_dev == dev && tmplock->dl_mode == mode) {
- res = msleep(tmplock, &devsw_lock_list_mtx, PVFS, "devsw_lock", NULL);
- assert(res == 0);
- goto retry;
- }
+
+ curlock = devsw_lock_find_locked(dev, mode);
+ if (curlock == NULL) {
+ TAILQ_INSERT_TAIL(&devsw_locks, newlock, dl_list);
+ } else {
+ curlock->dl_waiters++;
+ lck_mtx_sleep_with_inheritor(&devsw_lock_list_mtx,
+ LCK_SLEEP_SPIN, curlock, curlock->dl_thread,
+ THREAD_UNINT | THREAD_WAIT_NOREPORT,
+ TIMEOUT_WAIT_FOREVER);
+ assert(curlock->dl_thread == current_thread());
+ curlock->dl_waiters--;
}
- TAILQ_INSERT_TAIL(&devsw_locks, newlock, dl_list);
lck_mtx_unlock(&devsw_lock_list_mtx);
+
+ if (curlock != NULL) {
+ kfree(newlock, sizeof(struct devsw_lock));
+ }
}
+
void
devsw_unlock(dev_t dev, int mode)
{
- devsw_lock_t tmplock;
+ devsw_lock_t lock;
+ thread_t inheritor_thread = NULL;
assert(0 <= major(dev) && major(dev) < nchrdev);
lck_mtx_lock_spin(&devsw_lock_list_mtx);
- TAILQ_FOREACH(tmplock, &devsw_locks, dl_list)
- {
- if (tmplock->dl_dev == dev && tmplock->dl_mode == mode) {
- break;
- }
- }
+ lock = devsw_lock_find_locked(dev, mode);
- if (tmplock == NULL) {
- panic("Trying to unlock, and couldn't find lock.");
+ if (lock == NULL || lock->dl_thread != current_thread()) {
+ panic("current thread doesn't own the lock (%p)", lock);
}
- if (tmplock->dl_thread != current_thread()) {
- panic("Trying to unlock, but I don't hold the lock.");
+ if (lock->dl_waiters) {
+ wakeup_one_with_inheritor(lock, THREAD_AWAKENED,
+ LCK_WAKE_DEFAULT, &lock->dl_thread);
+ inheritor_thread = lock->dl_thread;
+ lock = NULL;
+ } else {
+ TAILQ_REMOVE(&devsw_locks, lock, dl_list);
}
- wakeup(tmplock);
- TAILQ_REMOVE(&devsw_locks, tmplock, dl_list);
-
lck_mtx_unlock(&devsw_lock_list_mtx);
- FREE(tmplock, M_TEMP);
-}
-
-void
-devsw_init()
-{
- devsw_lock_grp = lck_grp_alloc_init("devsw", NULL);
- assert(devsw_lock_grp != NULL);
-
- lck_mtx_init(&devsw_lock_list_mtx, devsw_lock_grp, NULL);
- TAILQ_INIT(&devsw_locks);
+ if (inheritor_thread) {
+ thread_deallocate(inheritor_thread);
+ }
+ kfree(lock, sizeof(struct devsw_lock));
}
--- /dev/null
+/* * Copyright (c) 2020 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/* sysctl interface for testing percpu counters in DEBUG or DEVELOPMENT kernel only. */
+#if !(DEVELOPMENT || DEBUG)
+#error "Counter testing is not enabled on RELEASE configurations"
+#endif
+
+#include <sys/sysctl.h>
+#include <kern/counter.h>
+#include <machine/atomic.h>
+#include <libkern/libkern.h>
+#include <machine/machine_routines.h>
+#include <kern/cpu_data.h>
+
+#include <os/log.h>
+
+#ifdef CONFIG_XNUPOST
+#include <tests/xnupost.h>
+#endif /* CONFIG_XNUPOST */
+
+static _Atomic boolean_t scalable_counter_test_running = FALSE;
+scalable_counter_t test_scalable_counter;
+
+SCALABLE_COUNTER_DEFINE(test_static_scalable_counter);
+
+#ifdef CONFIG_XNUPOST
+kern_return_t counter_tests(void);
+/*
+ * Sanity test that a counter can be modified before zalloc is initialized.
+ */
+static void
+bump_static_counter(void* arg)
+{
+ (void) arg;
+ counter_inc(&test_static_scalable_counter);
+}
+
+STARTUP_ARG(PMAP_STEAL, STARTUP_RANK_MIDDLE, bump_static_counter, NULL);
+
+kern_return_t
+counter_tests()
+{
+ T_ASSERT_EQ_ULLONG(counter_load(&test_static_scalable_counter), 1, "Counter was incremented");
+ return KERN_SUCCESS;
+}
+#endif /* CONFIG_XNUPOST */
+
+static int
+sysctl_scalable_counter_test_start SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+ int ret_val = 1;
+ int error = 0;
+ boolean_t exclusive;
+ error = sysctl_io_number(req, ret_val, sizeof(int), &ret_val, NULL);
+ if (error || !req->newptr) {
+ return error;
+ }
+ /* The test doesn't support being run multiple times in parallel. */
+ exclusive = os_atomic_cmpxchg(&scalable_counter_test_running, FALSE, TRUE, seq_cst);
+ if (!exclusive) {
+ os_log(OS_LOG_DEFAULT, "scalable_counter_test: Caught attempt to run the test in parallel.");
+ return EINVAL;
+ }
+ counter_alloc(&test_scalable_counter);
+ return 0;
+}
+
+static int
+sysctl_scalable_counter_test_finish SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+ boolean_t exclusive;
+ int ret_val = 0;
+ int error = 0;
+ error = sysctl_io_number(req, ret_val, sizeof(int), &ret_val, NULL);
+ if (error || !req->newptr) {
+ return error;
+ }
+
+ /* The test doesn't support being run multiple times in parallel. */
+ exclusive = os_atomic_cmpxchg(&scalable_counter_test_running, TRUE, FALSE, seq_cst);
+ if (!exclusive) {
+ /* Finish called without start. */
+ return EINVAL;
+ }
+ return 0;
+}
+
+static int
+sysctl_scalable_counter_add SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+ int64_t value = 0;
+ int error = 0;
+ if (!os_atomic_load(&scalable_counter_test_running, seq_cst)) {
+ /* Must call start */
+ return EINVAL;
+ }
+ error = sysctl_io_number(req, value, sizeof(int64_t), &value, NULL);
+ if (error || !req->newptr) {
+ return error;
+ }
+ counter_add(&test_scalable_counter, value);
+ return 0;
+}
+
+static int
+sysctl_static_scalable_counter_add SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+ int64_t value = 0;
+ int error = 0;
+ if (!os_atomic_load(&scalable_counter_test_running, seq_cst)) {
+ /* Must call start */
+ return EINVAL;
+ }
+ error = sysctl_io_number(req, value, sizeof(int64_t), &value, NULL);
+ if (error || !req->newptr) {
+ return error;
+ }
+ counter_add(&test_static_scalable_counter, value);
+ return 0;
+}
+
+static int
+sysctl_scalable_counter_load SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+ uint64_t value;
+ if (!os_atomic_load(&scalable_counter_test_running, seq_cst)) {
+ /* Must call start */
+ return EINVAL;
+ }
+ value = counter_load(&test_scalable_counter);
+ return SYSCTL_OUT(req, &value, sizeof(value));
+}
+
+static int
+sysctl_scalable_counter_write_benchmark SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+ int error;
+ int64_t iterations;
+ int ret_val = 0;
+ if (!os_atomic_load(&scalable_counter_test_running, seq_cst)) {
+ /* Must call start */
+ return EINVAL;
+ }
+ error = sysctl_io_number(req, ret_val, sizeof(int), &iterations, NULL);
+ if (error || !req->newptr) {
+ return error;
+ }
+ for (int64_t i = 0; i < iterations; i++) {
+ counter_inc(&test_scalable_counter);
+ }
+ return 0;
+}
+
+static volatile uint64_t racy_counter;
+
+static int
+sysctl_racy_counter_write_benchmark SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+ int error;
+ int64_t iterations;
+ int ret_val = 0;
+ error = sysctl_io_number(req, ret_val, sizeof(int), &iterations, NULL);
+ if (error || !req->newptr) {
+ return error;
+ }
+ for (int64_t i = 0; i < iterations; i++) {
+ racy_counter++;
+ }
+ return 0;
+}
+
+static int
+sysctl_racy_counter_load SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+ uint64_t value = racy_counter;
+ return SYSCTL_OUT(req, &value, sizeof(value));
+}
+
+static _Atomic uint64_t atomic_counter;
+
+static int
+sysctl_atomic_counter_write_benchmark SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+ int error;
+ int64_t iterations;
+ int ret_val = 0;
+ error = sysctl_io_number(req, ret_val, sizeof(int), &iterations, NULL);
+ if (error || !req->newptr) {
+ return error;
+ }
+ for (int64_t i = 0; i < iterations; i++) {
+ os_atomic_add(&atomic_counter, 1, relaxed);
+ }
+ return 0;
+}
+
+static int
+sysctl_atomic_counter_load SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+ uint64_t value = os_atomic_load_wide(&atomic_counter, relaxed);
+ return SYSCTL_OUT(req, &value, sizeof(value));
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, scalable_counter_test_start,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED | CTLFLAG_LOCKED,
+ 0, 0, sysctl_scalable_counter_test_start, "I", "Setup per-cpu counter test");
+
+SYSCTL_PROC(_kern, OID_AUTO, scalable_counter_test_finish,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED | CTLFLAG_LOCKED,
+ 0, 0, sysctl_scalable_counter_test_finish, "I", "Finish per-cpu counter test");
+
+SYSCTL_PROC(_kern, OID_AUTO, scalable_counter_test_add,
+ CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_MASKED | CTLFLAG_LOCKED,
+ 0, 0, sysctl_scalable_counter_add, "I", "Perform an add on the per-cpu counter");
+
+SYSCTL_PROC(_kern, OID_AUTO, static_scalable_counter_test_add,
+ CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_MASKED | CTLFLAG_LOCKED,
+ 0, 0, sysctl_static_scalable_counter_add, "I", "Perform an add on the static per-cpu counter");
+
+SYSCTL_PROC(_kern, OID_AUTO, scalable_counter_test_load,
+ CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_MASKED | CTLFLAG_LOCKED,
+ 0, 0, sysctl_scalable_counter_load, "I", "Load the current per-cpu counter value.");
+
+SYSCTL_SCALABLE_COUNTER(_kern, static_scalable_counter_test_load,
+ test_static_scalable_counter, "Load the current static per-cpu counter value.");
+
+SYSCTL_PROC(_kern, OID_AUTO, scalable_counter_write_benchmark,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED | CTLFLAG_LOCKED,
+ 0, 0, sysctl_scalable_counter_write_benchmark, "I", "Per-cpu counter write benchmark");
+
+SYSCTL_PROC(_kern, OID_AUTO, scalable_counter_racy_counter_benchmark,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED | CTLFLAG_LOCKED,
+ 0, 0, sysctl_racy_counter_write_benchmark, "I", "Global counter racy benchmark");
+
+SYSCTL_PROC(_kern, OID_AUTO, scalable_counter_racy_counter_load,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED | CTLFLAG_LOCKED,
+ 0, 0, sysctl_racy_counter_load, "I", "Global counter racy load");
+
+SYSCTL_PROC(_kern, OID_AUTO, scalable_counter_atomic_counter_write_benchmark,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED | CTLFLAG_LOCKED,
+ 0, 0, sysctl_atomic_counter_write_benchmark, "I", "Atomic counter write benchmark");
+
+SYSCTL_PROC(_kern, OID_AUTO, scalable_counter_atomic_counter_load,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED | CTLFLAG_LOCKED,
+ 0, 0, sysctl_atomic_counter_load, "I", "Atomic counter load");
#define COMPRESSION_DEBUG_VERBOSE 0
#define MALLOC_DEBUG 0
-static const char *
-baseName(const char *path)
-{
- if (!path) {
- return NULL;
- }
- const char *ret = path;
- int i;
- for (i = 0; path[i] != 0; i++) {
- if (path[i] == '/') {
- ret = &path[i + 1];
- }
- }
- return ret;
-}
-
#if COMPRESSION_DEBUG
static char*
vnpath(vnode_t vp, char *path, int len)
}
#endif
-#define ErrorLog(x, args...) printf("%s:%d:%s: " x, baseName(__FILE__), __LINE__, __FUNCTION__, ## args)
+#define ErrorLog(x, args...) \
+ printf("%s:%d:%s: " x, __FILE_NAME__, __LINE__, __FUNCTION__, ## args)
#if COMPRESSION_DEBUG
-#define ErrorLogWithPath(x, args...) do { char *path; MALLOC(path, char *, PATH_MAX, M_TEMP, M_WAITOK); printf("%s:%d:%s: %s: " x, baseName(__FILE__), __LINE__, __FUNCTION__, vnpath(vp, path, PATH_MAX), ## args); FREE(path, M_TEMP); } while(0)
+#define ErrorLogWithPath(x, args...) do { \
+ char *path = zalloc(ZV_NAMEI); \
+ printf("%s:%d:%s: %s: " x, __FILE_NAME__, __LINE__, __FUNCTION__, \
+ vnpath(vp, path, PATH_MAX), ## args); \
+ zfree(ZV_NAMEI, path); \
+} while(0)
#else
-#define ErrorLogWithPath(x, args...) do { (void*)vp; printf("%s:%d:%s: %s: " x, baseName(__FILE__), __LINE__, __FUNCTION__, "<private>", ## args); } while(0)
+#define ErrorLogWithPath(x, args...) do { \
+ (void*)vp; \
+ printf("%s:%d:%s: %s: " x, __FILE_NAME__, __LINE__, __FUNCTION__, \
+ "<private>", ## args); \
+} while(0)
#endif
#if COMPRESSION_DEBUG
#define VerboseLogWithPath(x...) do { } while(0)
#endif
-#if MALLOC_DEBUG
-
-static SInt32 totalAlloc;
-
-typedef struct {
- uint32_t allocSz;
- uint32_t magic;
- const char *file;
- int line;
-} allocated;
-
-static void *
-_malloc(uint32_t sz, __unused int type, __unused int flags, const char *file, int line)
-{
- uint32_t allocSz = sz + 2 * sizeof(allocated);
-
- allocated *alloc = NULL;
- MALLOC(alloc, allocated *, allocSz, type, flags);
- if (!alloc) {
- ErrorLog("malloc failed\n");
- return NULL;
- }
-
- char *ret = (char*)&alloc[1];
- allocated *alloc2 = (allocated*)(ret + sz);
-
- alloc->allocSz = allocSz;
- alloc->magic = 0xdadadada;
- alloc->file = file;
- alloc->line = line;
-
- *alloc2 = *alloc;
-
- int s = OSAddAtomic(sz, &totalAlloc);
- ErrorLog("malloc(%d) -> %p, total allocations %d\n", sz, ret, s + sz);
-
- return ret;
-}
-
-static void
-_free(char *ret, __unused int type, const char *file, int line)
-{
- if (!ret) {
- ErrorLog("freeing null\n");
- return;
- }
- allocated *alloc = (allocated*)ret;
- alloc--;
- uint32_t sz = alloc->allocSz - 2 * sizeof(allocated);
- allocated *alloc2 = (allocated*)(ret + sz);
-
- if (alloc->magic != 0xdadadada) {
- panic("freeing bad pointer");
- }
-
- if (memcmp(alloc, alloc2, sizeof(*alloc)) != 0) {
- panic("clobbered data");
- }
-
- memset(ret, 0xce, sz);
- alloc2->file = file;
- alloc2->line = line;
- FREE(alloc, type);
- int s = OSAddAtomic(-sz, &totalAlloc);
- ErrorLog("free(%p,%d) -> total allocations %d\n", ret, sz, s - sz);
-}
-
-#undef MALLOC
-#undef FREE
-#define MALLOC(space, cast, size, type, flags) (space) = (cast)_malloc(size, type, flags, __FILE__, __LINE__)
-#define FREE(addr, type) _free((void *)addr, type, __FILE__, __LINE__)
-
-#endif /* MALLOC_DEBUG */
-
#pragma mark --- globals ---
-static lck_grp_t *decmpfs_lockgrp;
+static LCK_GRP_DECLARE(decmpfs_lockgrp, "VFSCOMP");
+static LCK_RW_DECLARE(decompressorsLock, &decmpfs_lockgrp);
+static LCK_MTX_DECLARE(decompress_channel_mtx, &decmpfs_lockgrp);
static const decmpfs_registration *decompressors[CMP_MAX]; /* the registered compressors */
-static lck_rw_t * decompressorsLock;
static int decompress_channel; /* channel used by decompress_file to wake up waiters */
-static lck_mtx_t *decompress_channel_mtx;
vfs_context_t decmpfs_ctx;
snprintf(resourceName, sizeof(resourceName), "com.apple.AppleFSCompression.Type%u", type);
ErrorLogWithPath("waiting for %s\n", resourceName);
while (decompressors[type] == NULL) {
- lck_rw_unlock_shared(decompressorsLock); // we have to unlock to allow the kext to register
+ lck_rw_unlock_shared(&decompressorsLock); // we have to unlock to allow the kext to register
if (IOServiceWaitForMatchingResource(resourceName, delay)) {
- lck_rw_lock_shared(decompressorsLock);
+ lck_rw_lock_shared(&decompressorsLock);
break;
}
if (!IOCatalogueMatchingDriversPresent(providesName)) {
//
ErrorLogWithPath("the kext with %s is no longer present\n", providesName);
- lck_rw_lock_shared(decompressorsLock);
+ lck_rw_lock_shared(&decompressorsLock);
break;
}
ErrorLogWithPath("still waiting for %s\n", resourceName);
delay *= 2;
- lck_rw_lock_shared(decompressorsLock);
+ lck_rw_lock_shared(&decompressorsLock);
}
// IOKit says the kext is loaded, so it should be registered too!
if (decompressors[type] == NULL) {
decmpfs_cnode_init(decmpfs_cnode *cp)
{
memset(cp, 0, sizeof(*cp));
- lck_rw_init(&cp->compressed_data_lock, decmpfs_lockgrp, NULL);
+ lck_rw_init(&cp->compressed_data_lock, &decmpfs_lockgrp, NULL);
}
void
decmpfs_cnode_destroy(decmpfs_cnode *cp)
{
- lck_rw_destroy(&cp->compressed_data_lock, decmpfs_lockgrp);
+ lck_rw_destroy(&cp->compressed_data_lock, &decmpfs_lockgrp);
}
bool
#pragma mark --- decmpfs state routines ---
static int
-decmpfs_fetch_compressed_header(vnode_t vp, decmpfs_cnode *cp, decmpfs_header **hdrOut, int returnInvalid)
+decmpfs_fetch_compressed_header(vnode_t vp, decmpfs_cnode *cp, decmpfs_header **hdrOut, int returnInvalid, size_t *hdr_size)
{
/*
* fetches vp's compression xattr, converting it into a decmpfs_header; returns 0 or errno
size_t read_size = 0;
size_t attr_size = 0;
+ size_t alloc_size = 0;
uio_t attr_uio = NULL;
int err = 0;
char *data = NULL;
if (no_additional_data) {
/* this file's xattr didn't have any extra data when we fetched it, so we can synthesize a header from the data in the cnode */
- MALLOC(data, char *, sizeof(decmpfs_header), M_TEMP, M_WAITOK);
+ alloc_size = sizeof(decmpfs_header);
+ data = kheap_alloc(KHEAP_TEMP, alloc_size, Z_WAITOK);
if (!data) {
err = ENOMEM;
goto out;
if (err != 0) {
goto out;
}
+ alloc_size = attr_size + sizeof(hdr->attr_size);
if (attr_size < sizeof(decmpfs_disk_header) || attr_size > MAX_DECMPFS_XATTR_SIZE) {
err = EINVAL;
}
/* allocation includes space for the extra attr_size field of a compressed_header */
- MALLOC(data, char *, attr_size + sizeof(hdr->attr_size), M_TEMP, M_WAITOK);
+ data = kheap_alloc(KHEAP_TEMP, alloc_size, Z_WAITOK);
if (!data) {
err = ENOMEM;
goto out;
out:
if (err && (err != ERANGE)) {
DebugLogWithPath("err %d\n", err);
- if (data) {
- FREE(data, M_TEMP);
- }
+ kheap_free(KHEAP_TEMP, data, alloc_size);
*hdrOut = NULL;
} else {
*hdrOut = hdr;
+ *hdr_size = alloc_size;
}
/*
* Trace the following parameters on return with event-id 0x03120004.
decmpfs_validate_compressed_file(vnode_t vp, decmpfs_cnode *cp)
{
/* give a compressor a chance to indicate that a compressed file is invalid */
-
decmpfs_header *hdr = NULL;
- errno_t err = decmpfs_fetch_compressed_header(vp, cp, &hdr, 0);
+ size_t alloc_size = 0;
+ errno_t err = decmpfs_fetch_compressed_header(vp, cp, &hdr, 0, &alloc_size);
+
if (err) {
/* we couldn't get the header */
if (decmpfs_fast_get_state(cp) == FILE_IS_NOT_COMPRESSED) {
}
if (!decmpfs_type_is_dataless(hdr->compression_type)) {
- lck_rw_lock_shared(decompressorsLock);
+ lck_rw_lock_shared(&decompressorsLock);
decmpfs_validate_compressed_file_func validate = decmp_get_func(vp, hdr->compression_type, validate);
if (validate) { /* make sure this validation function is valid */
/* is the data okay? */
/* no validate registered, so nothing to do */
err = 0;
}
- lck_rw_unlock_shared(decompressorsLock);
+ lck_rw_unlock_shared(&decompressorsLock);
}
out:
- if (hdr) {
- FREE(hdr, M_TEMP);
+ if (hdr != NULL) {
+ kheap_free(KHEAP_TEMP, hdr, alloc_size);
}
#if COMPRESSION_DEBUG
if (err) {
uint32_t cmp_state;
struct vnode_attr va_fetch;
decmpfs_header *hdr = NULL;
+ size_t alloc_size = 0;
mount_t mp = NULL;
int cnode_locked = 0;
int saveInvalid = 0; // save the header data even though the type was out of range
}
if (va_fetch.va_flags & UF_COMPRESSED) {
/* UF_COMPRESSED is on, make sure the file has the DECMPFS_XATTR_NAME xattr */
- error = decmpfs_fetch_compressed_header(vp, cp, &hdr, 1);
+ error = decmpfs_fetch_compressed_header(vp, cp, &hdr, 1, &alloc_size);
if ((hdr != NULL) && (error == ERANGE)) {
saveInvalid = 1;
}
ubc_setsize(vp, hdr->uncompressed_size);
/* update the decompression flags in the decmpfs cnode */
- lck_rw_lock_shared(decompressorsLock);
+ lck_rw_lock_shared(&decompressorsLock);
decmpfs_get_decompression_flags_func get_flags = decmp_get_func(vp, hdr->compression_type, get_flags);
if (get_flags) {
decompression_flags = get_flags(vp, decmpfs_ctx, hdr);
}
- lck_rw_unlock_shared(decompressorsLock);
+ lck_rw_unlock_shared(&decompressorsLock);
decmpfs_cnode_set_decompression_flags(cp, decompression_flags);
}
} else {
decmpfs_unlock_compressed_data(cp, 1);
}
- if (hdr) {
- FREE(hdr, M_TEMP);
+ if (hdr != NULL) {
+ kheap_free(KHEAP_TEMP, hdr, alloc_size);
}
+
/*
* Trace the following parameters on return with event-id 0x03120014.
*
}
decmpfs_header *hdr = NULL;
- error = decmpfs_fetch_compressed_header(vp, NULL, &hdr, 1);
+ size_t alloc_size = 0;
+ error = decmpfs_fetch_compressed_header(vp, NULL, &hdr, 1, &alloc_size);
if (error == 0) {
/*
* Allow the flag to be set since the decmpfs attribute
/* no DECMPFS_XATTR_NAME attribute, so deny the update */
vap->va_flags &= ~UF_COMPRESSED;
}
- if (hdr) {
- FREE(hdr, M_TEMP);
+ if (hdr != NULL) {
+ kheap_free(KHEAP_TEMP, hdr, alloc_size);
}
}
}
wait_for_decompress(decmpfs_cnode *cp)
{
int state;
- lck_mtx_lock(decompress_channel_mtx);
+ lck_mtx_lock(&decompress_channel_mtx);
do {
state = decmpfs_fast_get_state(cp);
if (state != FILE_IS_CONVERTING) {
/* file is not decompressing */
- lck_mtx_unlock(decompress_channel_mtx);
+ lck_mtx_unlock(&decompress_channel_mtx);
return state;
}
- msleep((caddr_t)&decompress_channel, decompress_channel_mtx, PINOD, "wait_for_decompress", NULL);
+ msleep((caddr_t)&decompress_channel, &decompress_channel_mtx, PINOD, "wait_for_decompress", NULL);
} while (1);
}
goto out;
}
- lck_rw_lock_exclusive(decompressorsLock); locked = 1;
+ lck_rw_lock_exclusive(&decompressorsLock); locked = 1;
/* make sure the registration for this type is zero */
if (decompressors[compression_type] != NULL) {
out:
if (locked) {
- lck_rw_unlock_exclusive(decompressorsLock);
+ lck_rw_unlock_exclusive(&decompressorsLock);
}
return ret;
}
goto out;
}
- lck_rw_lock_exclusive(decompressorsLock); locked = 1;
+ lck_rw_lock_exclusive(&decompressorsLock); locked = 1;
if (decompressors[compression_type] != registration) {
ret = EEXIST;
goto out;
out:
if (locked) {
- lck_rw_unlock_exclusive(decompressorsLock);
+ lck_rw_unlock_exclusive(&decompressorsLock);
}
return ret;
}
int ret = 0;
/* every compressor must have at least a fetch function */
- lck_rw_lock_shared(decompressorsLock);
+ lck_rw_lock_shared(&decompressorsLock);
if (decmp_get_func(vp, hdr->compression_type, fetch) != NULL) {
ret = 1;
}
- lck_rw_unlock_shared(decompressorsLock);
+ lck_rw_unlock_shared(&decompressorsLock);
return ret;
}
*/
DECMPFS_EMIT_TRACE_ENTRY(DECMPDBG_FETCH_UNCOMPRESSED_DATA, vp->v_id,
hdr->compression_type, (int)offset, (int)size);
- lck_rw_lock_shared(decompressorsLock);
+ lck_rw_lock_shared(&decompressorsLock);
decmpfs_fetch_uncompressed_data_func fetch = decmp_get_func(vp, hdr->compression_type, fetch);
if (fetch) {
err = fetch(vp, decmpfs_ctx, hdr, offset, size, nvec, vec, bytes_read);
- lck_rw_unlock_shared(decompressorsLock);
+ lck_rw_unlock_shared(&decompressorsLock);
if (err == 0) {
uint64_t decompression_flags = decmpfs_cnode_get_decompression_flags(cp);
if (decompression_flags & DECMPFS_FLAGS_FORCE_FLUSH_ON_DECOMPRESS) {
}
} else {
err = ENOTSUP;
- lck_rw_unlock_shared(decompressorsLock);
+ lck_rw_unlock_shared(&decompressorsLock);
}
/*
* Trace the following parameters on return with event-id 0x03120008.
size_t verify_block_size = 0;
void *data = NULL;
decmpfs_header *hdr = NULL;
+ size_t alloc_size = 0;
uint64_t cachedSize = 0;
int cmpdata_locked = 0;
bool file_tail_page_valid = false;
DebugLogWithPath("pagein: unknown flags 0x%08x\n", (flags & ~(UPL_IOSYNC | UPL_NOCOMMIT | UPL_NORDAHEAD)));
}
- err = decmpfs_fetch_compressed_header(vp, cp, &hdr, 0);
+ err = decmpfs_fetch_compressed_header(vp, cp, &hdr, 0, &alloc_size);
if (err != 0) {
goto out;
}
if (data) {
ubc_upl_unmap(pl);
}
- if (hdr) {
- FREE(hdr, M_TEMP);
+ if (hdr != NULL) {
+ kheap_free(KHEAP_TEMP, hdr, alloc_size);
}
if (cmpdata_locked) {
decmpfs_unlock_compressed_data(cp, 0);
if (err) {
#if 0
if (err != ENXIO && err != ENOSPC) {
- char *path;
- MALLOC(path, char *, PATH_MAX, M_TEMP, M_WAITOK);
+ char *path = zalloc(ZV_NAMEI);
panic("%s: decmpfs_pagein_compressed: err %d", vnpath(vp, path, PATH_MAX), err);
- FREE(path, M_TEMP);
+ zfree(ZV_NAMEI, path);
}
#endif /* 0 */
ErrorLogWithPath("err %d\n", err);
upl_t upl = NULL;
upl_page_info_t *pli = NULL;
decmpfs_header *hdr = NULL;
+ size_t alloc_size = 0;
uint64_t cachedSize = 0;
off_t uioPos = 0;
user_ssize_t uioRemaining = 0;
goto out;
}
- err = decmpfs_fetch_compressed_header(vp, cp, &hdr, 0);
+ err = decmpfs_fetch_compressed_header(vp, cp, &hdr, 0, &alloc_size);
if (err != 0) {
goto out;
}
DebugLogWithPath("uplPos %lld uplSize %lld\n", (uint64_t)uplPos, (uint64_t)uplSize);
#endif
- lck_rw_lock_shared(decompressorsLock);
+ lck_rw_lock_shared(&decompressorsLock);
decmpfs_adjust_fetch_region_func adjust_fetch = decmp_get_func(vp, hdr->compression_type, adjust_fetch);
if (adjust_fetch) {
/* give the compressor a chance to adjust the portion of the file that we read */
adjust_fetch(vp, decmpfs_ctx, hdr, &uplPos, &uplSize);
VerboseLogWithPath("adjusted uplPos %lld uplSize %lld\n", (uint64_t)uplPos, (uint64_t)uplSize);
}
- lck_rw_unlock_shared(decompressorsLock);
+ lck_rw_unlock_shared(&decompressorsLock);
/* clip the adjusted size to the size of the file */
if ((uint64_t)uplPos + uplSize > cachedSize) {
if (kr != KERN_SUCCESS) {
commit_upl(upl, 0, curUplSize, UPL_ABORT_FREE_ON_EMPTY, 1);
#if 0
- char *path;
- MALLOC(path, char *, PATH_MAX, M_TEMP, M_WAITOK);
+ char *path = zalloc(ZV_NAMEI);
panic("%s: decmpfs_read_compressed: ubc_upl_map error %d", vnpath(vp, path, PATH_MAX), (int)kr);
- FREE(path, M_TEMP);
+ zfree(ZV_NAMEI, path);
#else /* 0 */
ErrorLogWithPath("ubc_upl_map kr=0x%x\n", (int)kr);
#endif /* 0 */
out:
- if (hdr) {
- FREE(hdr, M_TEMP);
+ if (hdr != NULL) {
+ kheap_free(KHEAP_TEMP, hdr, alloc_size);
}
if (cmpdata_locked) {
decmpfs_unlock_compressed_data(cp, 0);
* then delete the file's compression xattr
*/
decmpfs_header *hdr = NULL;
+ size_t alloc_size = 0;
/*
* Trace the following parameters on entry with event-id 0x03120010.
*/
DECMPFS_EMIT_TRACE_ENTRY(DECMPDBG_FREE_COMPRESSED_DATA, vp->v_id);
- int err = decmpfs_fetch_compressed_header(vp, cp, &hdr, 0);
+ int err = decmpfs_fetch_compressed_header(vp, cp, &hdr, 0, &alloc_size);
if (err) {
ErrorLogWithPath("decmpfs_fetch_compressed_header err %d\n", err);
} else {
- lck_rw_lock_shared(decompressorsLock);
+ lck_rw_lock_shared(&decompressorsLock);
decmpfs_free_compressed_data_func free_data = decmp_get_func(vp, hdr->compression_type, free_data);
if (free_data) {
err = free_data(vp, decmpfs_ctx, hdr);
/* nothing to do, so no error */
err = 0;
}
- lck_rw_unlock_shared(decompressorsLock);
+ lck_rw_unlock_shared(&decompressorsLock);
if (err != 0) {
ErrorLogWithPath("decompressor err %d\n", err);
/* delete the xattr */
err = vn_removexattr(vp, DECMPFS_XATTR_NAME, 0, decmpfs_ctx);
- if (err != 0) {
- goto out;
- }
-out:
- if (hdr) {
- FREE(hdr, M_TEMP);
+ if (hdr != NULL) {
+ kheap_free(KHEAP_TEMP, hdr, alloc_size);
}
return err;
}
int update_file_state = 0;
size_t allocSize = 0;
decmpfs_header *hdr = NULL;
+ size_t hdr_size = 0;
int cmpdata_locked = 0;
off_t remaining = 0;
uint64_t uncompressed_size = 0;
}
}
- err = decmpfs_fetch_compressed_header(vp, cp, &hdr, 0);
+ err = decmpfs_fetch_compressed_header(vp, cp, &hdr, 0, &hdr_size);
if (err != 0) {
goto out;
}
}
allocSize = MIN(64 * 1024, (size_t)toSize);
- MALLOC(data, char *, allocSize, M_TEMP, M_WAITOK);
+ data = kheap_alloc(KHEAP_TEMP, allocSize, Z_WAITOK);
if (!data) {
err = ENOMEM;
goto out;
#endif
out:
- if (hdr) {
- FREE(hdr, M_TEMP);
- }
- if (data) {
- FREE(data, M_TEMP);
+ if (hdr != NULL) {
+ kheap_free(KHEAP_TEMP, hdr, hdr_size);
}
+ kheap_free(KHEAP_TEMP, data, allocSize);
if (uio_w) {
uio_free(uio_w);
}
}
if (update_file_state) {
- lck_mtx_lock(decompress_channel_mtx);
+ lck_mtx_lock(&decompress_channel_mtx);
decmpfs_cnode_set_vnode_state(cp, new_state, 1);
wakeup((caddr_t)&decompress_channel); /* wake up anyone who might have been waiting for decompression */
- lck_mtx_unlock(decompress_channel_mtx);
+ lck_mtx_unlock(&decompress_channel_mtx);
}
if (cmpdata_locked) {
#pragma mark --- decmpfs initialization ---
void
-decmpfs_init()
+decmpfs_init(void)
{
static int done = 0;
if (done) {
decmpfs_ctx = vfs_context_create(vfs_context_kernel());
- lck_grp_attr_t *attr = lck_grp_attr_alloc_init();
- decmpfs_lockgrp = lck_grp_alloc_init("VFSCOMP", attr);
- lck_grp_attr_free(attr);
- decompressorsLock = lck_rw_alloc_init(decmpfs_lockgrp, NULL);
- decompress_channel_mtx = lck_mtx_alloc_init(decmpfs_lockgrp, NULL);
-
register_decmpfs_decompressor(CMP_Type1, &Type1Reg);
done = 1;
void *ubc_getobject_from_filename(const char *filename, struct vnode **vpp, off_t *file_size);
-extern lck_rw_t * rootvnode_rw_lock;
+extern lck_rw_t rootvnode_rw_lock;
#define kIBFilePrefix "file://"
* It will be mounted at mount_path.
* The vfs_switch_root operation will be performed.
* After the pivot, the outgoing root filesystem (the filesystem at root when
- * this function begins) will be at outgoing_root_path. If `rooted_dmg` is true,
- * then ignore then chunklisted or authAPFS checks on this image
+ * this function begins) will be at outgoing_root_path. If `skip_signature_check` is true,
+ * then ignore the chunklisted or authAPFS checks on this image
*/
__private_extern__ int
imageboot_pivot_image(const char *image_path, imageboot_type_t type, const char *mount_path,
- const char *outgoing_root_path, const bool rooted_dmg)
+ const char *outgoing_root_path, const bool rooted_dmg, const bool skip_signature_check)
{
int error;
boolean_t authenticated_dmg_chunklist = false;
/*
* If we are using a custom rooted DMG, or if we have already authenticated
* the DMG via chunklist, then it is permissible to use.
+ * Or, if CSR_ALLOW_ANY_RECOVERY_OS is set on Development or Debug build variant.
*/
- if (rooted_dmg || authenticated_dmg_chunklist) {
+ if (rooted_dmg || authenticated_dmg_chunklist || skip_signature_check) {
rootauth = 0;
}
error = rootauth;
vnode_ref(newdp);
vnode_put(newdp);
- lck_rw_lock_exclusive(rootvnode_rw_lock);
+ lck_rw_lock_exclusive(&rootvnode_rw_lock);
/* switch to the new rootvnode */
if (update_rootvnode) {
rootvnode = newdp;
mount_unlock(new_rootfs);
filedesc0.fd_cdir = newdp;
- lck_rw_unlock_exclusive(rootvnode_rw_lock);
+ lck_rw_unlock_exclusive(&rootvnode_rw_lock);
DBG_TRACE("%s: root switched\n", __FUNCTION__);
}
if (err) {
+ if (vp) {
+ vnode_put(vp);
+ }
*errp = err;
vp = NULL;
}
#endif
/* ... and unmount everything */
- vfs_unmountall();
+ vfs_unmountall(FALSE);
- lck_rw_lock_exclusive(rootvnode_rw_lock);
+ lck_rw_lock_exclusive(&rootvnode_rw_lock);
filedesc0.fd_cdir = NULL;
tvp = rootvnode;
rootvnode = NULL;
rootvp = NULLVP;
rootdev = NODEV;
- lck_rw_unlock_exclusive(rootvnode_rw_lock);
+ lck_rw_unlock_exclusive(&rootvnode_rw_lock);
vnode_get_and_drop_always(tvp);
/* Attach the ramfs image ... */
}
vnode_ref(newdp);
- lck_rw_lock_exclusive(rootvnode_rw_lock);
+ lck_rw_lock_exclusive(&rootvnode_rw_lock);
rootvnode = newdp;
rootvnode->v_flag |= VROOT;
new_rootfs = rootvnode->v_mount;
set_fake_bootuuid(new_rootfs);
filedesc0.fd_cdir = newdp;
- lck_rw_unlock_exclusive(rootvnode_rw_lock);
+ lck_rw_unlock_exclusive(&rootvnode_rw_lock);
vnode_put(newdp);
unsigned int kdlog_value3 = 0;
unsigned int kdlog_value4 = 0;
-static lck_spin_t * kdw_spin_lock;
-static lck_spin_t * kds_spin_lock;
+static LCK_GRP_DECLARE(kdebug_lck_grp, "kdebug");
+static LCK_SPIN_DECLARE(kdw_spin_lock, &kdebug_lck_grp);
+static LCK_SPIN_DECLARE(kds_spin_lock, &kdebug_lck_grp);
kd_threadmap *kd_mapptr = 0;
vm_size_t kd_mapsize = 0;
}
}
-static lck_grp_t *kdebug_lck_grp = NULL;
-
static void
kdbg_set_tracing_enabled(bool enabled, uint32_t trace_type)
{
NULL);
int s = ml_set_interrupts_enabled(false);
- lck_spin_lock_grp(kds_spin_lock, kdebug_lck_grp);
+ lck_spin_lock_grp(&kds_spin_lock, &kdebug_lck_grp);
if (enabled) {
/*
kd_ctrl_page.enabled = 0;
commpage_update_kdebug_state();
}
- lck_spin_unlock(kds_spin_lock);
+ lck_spin_unlock(&kds_spin_lock);
ml_set_interrupts_enabled(s);
if (enabled) {
kdbg_set_flags(int slowflag, int enableflag, bool enabled)
{
int s = ml_set_interrupts_enabled(false);
- lck_spin_lock_grp(kds_spin_lock, kdebug_lck_grp);
+ lck_spin_lock_grp(&kds_spin_lock, &kdebug_lck_grp);
if (enabled) {
kd_ctrl_page.kdebug_slowcheck |= slowflag;
kdebug_enable &= ~enableflag;
}
- lck_spin_unlock(kds_spin_lock);
+ lck_spin_unlock(&kds_spin_lock);
ml_set_interrupts_enabled(s);
}
{
bool wrapped;
int s = ml_set_interrupts_enabled(false);
- lck_spin_lock_grp(kds_spin_lock, kdebug_lck_grp);
+ lck_spin_lock_grp(&kds_spin_lock, &kdebug_lck_grp);
*old_slowcheck = kd_ctrl_page.kdebug_slowcheck;
*old_flags = kd_ctrl_page.kdebug_flags;
kd_ctrl_page.kdebug_flags &= ~KDBG_WRAPPED;
kd_ctrl_page.kdebug_flags |= KDBG_NOWRAP;
- lck_spin_unlock(kds_spin_lock);
+ lck_spin_unlock(&kds_spin_lock);
ml_set_interrupts_enabled(s);
return wrapped;
enable_wrap(uint32_t old_slowcheck)
{
int s = ml_set_interrupts_enabled(false);
- lck_spin_lock_grp(kds_spin_lock, kdebug_lck_grp);
+ lck_spin_lock_grp(&kds_spin_lock, &kdebug_lck_grp);
kd_ctrl_page.kdebug_flags &= ~KDBG_NOWRAP;
kd_ctrl_page.kdebug_slowcheck &= ~SLOW_NOLOG;
}
- lck_spin_unlock(kds_spin_lock);
+ lck_spin_unlock(&kds_spin_lock);
ml_set_interrupts_enabled(s);
}
kdsp.raw = kdsp_raw;
s = ml_set_interrupts_enabled(false);
- lck_spin_lock_grp(kds_spin_lock, kdebug_lck_grp);
+ lck_spin_lock_grp(&kds_spin_lock, &kdebug_lck_grp);
kdbp = &kdbip[cpu];
kd_ctrl_page.kds_inuse_count--;
}
- lck_spin_unlock(kds_spin_lock);
+ lck_spin_unlock(&kds_spin_lock);
ml_set_interrupts_enabled(s);
}
int s = 0;
s = ml_set_interrupts_enabled(false);
- lck_spin_lock_grp(kds_spin_lock, kdebug_lck_grp);
+ lck_spin_lock_grp(&kds_spin_lock, &kdebug_lck_grp);
kdbp = &kdbip[cpu];
}
kdbp->kd_list_tail = kdsp;
out:
- lck_spin_unlock(kds_spin_lock);
+ lck_spin_unlock(&kds_spin_lock);
ml_set_interrupts_enabled(s);
return retval;
return 0;
}
-static void
-kdbg_lock_init(void)
-{
- static lck_grp_attr_t *kdebug_lck_grp_attr = NULL;
- static lck_attr_t *kdebug_lck_attr = NULL;
-
- if (kd_ctrl_page.kdebug_flags & KDBG_LOCKINIT) {
- return;
- }
-
- assert(kdebug_lck_grp_attr == NULL);
- kdebug_lck_grp_attr = lck_grp_attr_alloc_init();
- kdebug_lck_grp = lck_grp_alloc_init("kdebug", kdebug_lck_grp_attr);
- kdebug_lck_attr = lck_attr_alloc_init();
-
- kds_spin_lock = lck_spin_alloc_init(kdebug_lck_grp, kdebug_lck_attr);
- kdw_spin_lock = lck_spin_alloc_init(kdebug_lck_grp, kdebug_lck_attr);
-
- kd_ctrl_page.kdebug_flags |= KDBG_LOCKINIT;
-}
-
int
kdbg_bootstrap(bool early_trace)
{
{
ktrace_assert_lock_held();
- kdbg_lock_init();
-
kdbg_clear();
if (kdbg_typefilter) {
typefilter_reject_all(kdbg_typefilter);
if (!s) {
panic("kdbg_wait() called with interrupts disabled");
}
- lck_spin_lock_grp(kdw_spin_lock, kdebug_lck_grp);
+ lck_spin_lock_grp(&kdw_spin_lock, &kdebug_lck_grp);
if (!locked_wait) {
/* drop the mutex to allow others to access trace */
kds_waiter = 1;
if (abstime) {
- wait_result = lck_spin_sleep_deadline(kdw_spin_lock, 0, &kds_waiter, THREAD_ABORTSAFE, abstime);
+ wait_result = lck_spin_sleep_deadline(&kdw_spin_lock, 0, &kds_waiter, THREAD_ABORTSAFE, abstime);
} else {
- wait_result = lck_spin_sleep(kdw_spin_lock, 0, &kds_waiter, THREAD_ABORTSAFE);
+ wait_result = lck_spin_sleep(&kdw_spin_lock, 0, &kds_waiter, THREAD_ABORTSAFE);
}
kds_waiter = 0;
/* check the count under the spinlock */
bool threshold_exceeded = (kd_ctrl_page.kds_inuse_count >= n_storage_threshold);
- lck_spin_unlock(kdw_spin_lock);
+ lck_spin_unlock(&kdw_spin_lock);
ml_set_interrupts_enabled(s);
if (!locked_wait) {
*/
bool s = ml_set_interrupts_enabled(false);
- if (lck_spin_try_lock(kdw_spin_lock)) {
+ if (lck_spin_try_lock(&kdw_spin_lock)) {
if (kds_waiter &&
(kd_ctrl_page.kds_inuse_count >= n_storage_threshold)) {
kds_waiter = 0;
need_kds_wakeup = true;
}
- lck_spin_unlock(kdw_spin_lock);
+ lck_spin_unlock(&kdw_spin_lock);
}
ml_set_interrupts_enabled(s);
value = name[1];
}
- kdbg_lock_init();
- assert(kd_ctrl_page.kdebug_flags & KDBG_LOCKINIT);
-
ktrace_lock();
/*
ktrace_start_single_threaded();
- kdbg_lock_init();
-
ktrace_kernel_configure(KTRACE_KDEBUG);
kdbg_set_nkdbufs(n_events);
*/
comp_t encode_comp_t(uint32_t, uint32_t);
void acctwatch(void *);
-void acct_init(void);
/*
* Accounting vnode pointer, and suspended accounting vnode pointer. States
int acctchkfreq = 15; /* frequency (in seconds) to check space */
-static lck_grp_t *acct_subsys_lck_grp;
-static lck_mtx_t *acct_subsys_mutex;
+static LCK_GRP_DECLARE(acct_subsys_lck_grp, "acct");
+static LCK_MTX_DECLARE(acct_subsys_mutex, &acct_subsys_lck_grp);
-#define ACCT_SUBSYS_LOCK() lck_mtx_lock(acct_subsys_mutex)
-#define ACCT_SUBSYS_UNLOCK() lck_mtx_unlock(acct_subsys_mutex)
-
-void
-acct_init(void)
-{
- acct_subsys_lck_grp = lck_grp_alloc_init("acct", NULL);
- acct_subsys_mutex = lck_mtx_alloc_init(acct_subsys_lck_grp, NULL);
-}
+#define ACCT_SUBSYS_LOCK() lck_mtx_lock(&acct_subsys_mutex)
+#define ACCT_SUBSYS_UNLOCK() lck_mtx_unlock(&acct_subsys_mutex)
/*
* Authorization scopes.
*/
-lck_grp_t *kauth_lck_grp;
-static lck_mtx_t *kauth_scope_mtx;
-#define KAUTH_SCOPELOCK() lck_mtx_lock(kauth_scope_mtx);
-#define KAUTH_SCOPEUNLOCK() lck_mtx_unlock(kauth_scope_mtx);
+LCK_GRP_DECLARE(kauth_lck_grp, "kauth");
+static LCK_MTX_DECLARE(kauth_scope_mtx, &kauth_lck_grp);
+#define KAUTH_SCOPELOCK() lck_mtx_lock(&kauth_scope_mtx);
+#define KAUTH_SCOPEUNLOCK() lck_mtx_unlock(&kauth_scope_mtx);
/*
* We support listeners for scopes that have not been registered yet.
};
typedef struct kauth_local_listener *kauth_local_listener_t;
-static TAILQ_HEAD(, kauth_listener) kauth_dangling_listeners;
+static TAILQ_HEAD(, kauth_listener) kauth_dangling_listeners =
+ TAILQ_HEAD_INITIALIZER(kauth_dangling_listeners);
/*
* Scope listeners need to be reworked to be dynamic.
/* values for kauth_scope.ks_flags */
#define KS_F_HAS_LISTENERS (1 << 0)
-static TAILQ_HEAD(, kauth_scope) kauth_scopes;
+static TAILQ_HEAD(, kauth_scope) kauth_scopes = TAILQ_HEAD_INITIALIZER(kauth_scopes);
static int kauth_add_callback_to_scope(kauth_scope_t sp, kauth_listener_t klp);
static void kauth_scope_init(void);
void
kauth_init(void)
{
- lck_grp_attr_t *grp_attributes;
-
- TAILQ_INIT(&kauth_scopes);
- TAILQ_INIT(&kauth_dangling_listeners);
-
- /* set up our lock group */
- grp_attributes = lck_grp_attr_alloc_init();
- kauth_lck_grp = lck_grp_alloc_init("kauth", grp_attributes);
- lck_grp_attr_free(grp_attributes);
-
/* bring up kauth subsystem components */
kauth_cred_init();
-#if CONFIG_EXT_RESOLVER
- kauth_identity_init();
- kauth_groups_init();
-#endif
kauth_scope_init();
-#if CONFIG_EXT_RESOLVER
- kauth_resolver_init();
-#endif
- /* can't alloc locks after this */
- lck_grp_free(kauth_lck_grp);
- kauth_lck_grp = NULL;
}
static void
kauth_scope_init(void)
{
- kauth_scope_mtx = lck_mtx_alloc_init(kauth_lck_grp, 0 /*LCK_ATTR_NULL*/);
kauth_scope_process = kauth_register_scope(KAUTH_SCOPE_PROCESS, kauth_authorize_process_callback, NULL);
kauth_scope_generic = kauth_register_scope(KAUTH_SCOPE_GENERIC, kauth_authorize_generic_callback, NULL);
kauth_scope_fileop = kauth_register_scope(KAUTH_SCOPE_FILEOP, NULL, NULL);
/*
* Allocate and populate the scope structure.
*/
- MALLOC(sp, kauth_scope_t, sizeof(*sp), M_KAUTH, M_WAITOK | M_ZERO);
+ sp = kheap_alloc(KM_KAUTH, sizeof(*sp), Z_WAITOK | Z_ZERO);
if (sp == NULL) {
return NULL;
}
/*
* Allocate and populate the listener structure.
*/
- MALLOC(lsp, kauth_listener_t, sizeof(*lsp), M_KAUTH, M_WAITOK);
+ lsp = kheap_alloc(KM_KAUTH, sizeof(*lsp), Z_WAITOK);
if (lsp == NULL) {
return NULL;
}
if (strncmp(tsp->ks_identifier, identifier,
strlen(tsp->ks_identifier) + 1) == 0) {
KAUTH_SCOPEUNLOCK();
- FREE(sp, M_KAUTH);
+ kheap_free(KM_KAUTH, sp, sizeof(struct kauth_scope));
return NULL;
}
}
}
}
KAUTH_SCOPEUNLOCK();
- FREE(scope, M_KAUTH);
+ kheap_free(KM_KAUTH, scope, sizeof(struct kauth_scope));
return;
}
}
/* table already full */
KAUTH_SCOPEUNLOCK();
- FREE(klp, M_KAUTH);
+ kheap_free(KM_KAUTH, klp, sizeof(struct kauth_listener));
return NULL;
}
}
sp->ks_flags &= ~KS_F_HAS_LISTENERS;
}
KAUTH_SCOPEUNLOCK();
- FREE(listener, M_KAUTH);
+ kheap_free(KM_KAUTH, listener, sizeof(struct kauth_listener));
return;
}
}
if (klp == listener) {
TAILQ_REMOVE(&kauth_dangling_listeners, klp, kl_link);
KAUTH_SCOPEUNLOCK();
- FREE(listener, M_KAUTH);
+ kheap_free(KM_KAUTH, listener, sizeof(struct kauth_listener));
return;
}
}
return NULL;
}
- MALLOC(fsp, kauth_filesec_t, KAUTH_FILESEC_SIZE(count), M_KAUTH, M_WAITOK);
+ fsp = kheap_alloc(KM_KAUTH, KAUTH_FILESEC_SIZE(count), Z_WAITOK);
if (fsp != NULL) {
fsp->fsec_magic = KAUTH_FILESEC_MAGIC;
fsp->fsec_owner = kauth_null_guid;
panic("freeing KAUTH_FILESEC_WANTED");
}
#endif
- FREE(fsp, M_KAUTH);
+ kheap_free_addr(KM_KAUTH, fsp);
}
/*
return NULL;
}
- MALLOC(aclp, kauth_acl_t, KAUTH_ACL_SIZE(count), M_KAUTH, M_WAITOK);
+ aclp = kheap_alloc(KM_KAUTH, KAUTH_ACL_SIZE(count), Z_WAITOK);
if (aclp != NULL) {
aclp->acl_entrycount = 0;
aclp->acl_flags = 0;
struct ctl_cb {
TAILQ_ENTRY(ctl_cb) next; /* controller chain */
- lck_mtx_t *mtx;
+ lck_mtx_t mtx;
struct socket *so; /* controlling socket */
struct kctl *kctl; /* back pointer to controller */
void *userdata;
*/
const u_int32_t ctl_maxunit = 65536;
-static lck_grp_attr_t *ctl_lck_grp_attr = 0;
-static lck_attr_t *ctl_lck_attr = 0;
-static lck_grp_t *ctl_lck_grp = 0;
-static lck_mtx_t *ctl_mtx;
+static LCK_ATTR_DECLARE(ctl_lck_attr, 0, 0);
+static LCK_GRP_DECLARE(ctl_lck_grp, "Kernel Control Protocol");
+static LCK_MTX_DECLARE_ATTR(ctl_mtx, &ctl_lck_grp, &ctl_lck_attr);
/* all the controllers are chained */
-TAILQ_HEAD(kctl_list, kctl) ctl_head;
+TAILQ_HEAD(kctl_list, kctl) ctl_head = TAILQ_HEAD_INITIALIZER(ctl_head);
static int ctl_attach(struct socket *, int, struct proc *);
static int ctl_detach(struct socket *);
VERIFY(!(dp->dom_flags & DOM_INITIALIZED));
VERIFY(dp == systemdomain);
- ctl_lck_grp_attr = lck_grp_attr_alloc_init();
- if (ctl_lck_grp_attr == NULL) {
- panic("%s: lck_grp_attr_alloc_init failed\n", __func__);
- /* NOTREACHED */
- }
-
- ctl_lck_grp = lck_grp_alloc_init("Kernel Control Protocol",
- ctl_lck_grp_attr);
- if (ctl_lck_grp == NULL) {
- panic("%s: lck_grp_alloc_init failed\n", __func__);
- /* NOTREACHED */
- }
-
- ctl_lck_attr = lck_attr_alloc_init();
- if (ctl_lck_attr == NULL) {
- panic("%s: lck_attr_alloc_init failed\n", __func__);
- /* NOTREACHED */
- }
-
- ctl_mtx = lck_mtx_alloc_init(ctl_lck_grp, ctl_lck_attr);
- if (ctl_mtx == NULL) {
- panic("%s: lck_mtx_alloc_init failed\n", __func__);
- /* NOTREACHED */
- }
- TAILQ_INIT(&ctl_head);
-
for (i = 0, pr = &kctlsw[0]; i < kctl_proto_count; i++, pr++) {
net_add_proto(pr, dp, 1);
}
kcb_delete(struct ctl_cb *kcb)
{
if (kcb != 0) {
- if (kcb->mtx != 0) {
- lck_mtx_free(kcb->mtx, ctl_lck_grp);
- }
- FREE(kcb, M_TEMP);
+ lck_mtx_destroy(&kcb->mtx, &ctl_lck_grp);
+ kheap_free(KHEAP_DEFAULT, kcb, sizeof(struct ctl_cb));
}
}
int error = 0;
struct ctl_cb *kcb = 0;
- MALLOC(kcb, struct ctl_cb *, sizeof(struct ctl_cb), M_TEMP, M_WAITOK);
+ kcb = kheap_alloc(KHEAP_DEFAULT, sizeof(struct ctl_cb), Z_WAITOK | Z_ZERO);
if (kcb == NULL) {
error = ENOMEM;
goto quit;
}
- bzero(kcb, sizeof(struct ctl_cb));
- kcb->mtx = lck_mtx_alloc_init(ctl_lck_grp, ctl_lck_attr);
- if (kcb->mtx == NULL) {
- error = ENOMEM;
- goto quit;
- }
+ lck_mtx_init(&kcb->mtx, &ctl_lck_grp, &ctl_lck_attr);
kcb->so = so;
so->so_pcb = (caddr_t)kcb;
if (kcb != 0) {
struct kctl *kctl;
if ((kctl = kcb->kctl) != 0) {
- lck_mtx_lock(ctl_mtx);
+ lck_mtx_lock(&ctl_mtx);
TAILQ_REMOVE(&kctl->kcb_head, kcb, next);
kctlstat.kcs_pcbcount--;
kctlstat.kcs_gencnt++;
- lck_mtx_unlock(ctl_mtx);
+ lck_mtx_unlock(&ctl_mtx);
}
kcb_delete(kcb);
}
bcopy(nam, &sa, sizeof(struct sockaddr_ctl));
- lck_mtx_lock(ctl_mtx);
+ lck_mtx_lock(&ctl_mtx);
kctl = ctl_find_by_id_unit(sa.sc_id, sa.sc_unit);
if (kctl == NULL) {
- lck_mtx_unlock(ctl_mtx);
+ lck_mtx_unlock(&ctl_mtx);
return ENOENT;
}
(so->so_type != SOCK_STREAM)) ||
(!(kctl->flags & CTL_FLAG_REG_SOCK_STREAM) &&
(so->so_type != SOCK_DGRAM))) {
- lck_mtx_unlock(ctl_mtx);
+ lck_mtx_unlock(&ctl_mtx);
return EPROTOTYPE;
}
if (kctl->flags & CTL_FLAG_PRIVILEGED) {
if (p == 0) {
- lck_mtx_unlock(ctl_mtx);
+ lck_mtx_unlock(&ctl_mtx);
return EINVAL;
}
if (kauth_cred_issuser(kauth_cred_get()) == 0) {
- lck_mtx_unlock(ctl_mtx);
+ lck_mtx_unlock(&ctl_mtx);
return EPERM;
}
}
if ((kctl->flags & CTL_FLAG_REG_ID_UNIT) || sa.sc_unit != 0) {
if (kcb_find(kctl, sa.sc_unit) != NULL) {
- lck_mtx_unlock(ctl_mtx);
+ lck_mtx_unlock(&ctl_mtx);
return EBUSY;
}
} else if (kctl->setup != NULL) {
error = (*kctl->setup)(&sa.sc_unit, &kcb->userdata);
if (error != 0) {
- lck_mtx_unlock(ctl_mtx);
+ lck_mtx_unlock(&ctl_mtx);
return error;
}
} else {
}
if (unit == ctl_maxunit) {
- lck_mtx_unlock(ctl_mtx);
+ lck_mtx_unlock(&ctl_mtx);
return EBUSY;
}
kctlstat.kcs_pcbcount++;
kctlstat.kcs_gencnt++;
kctlstat.kcs_connections++;
- lck_mtx_unlock(ctl_mtx);
+ lck_mtx_unlock(&ctl_mtx);
/*
* rdar://15526688: Limit the send and receive sizes to sb_max
#if DEVELOPMENT || DEBUG
kcb->status = KCTL_DISCONNECTED;
#endif /* DEVELOPMENT || DEBUG */
- lck_mtx_lock(ctl_mtx);
+ lck_mtx_lock(&ctl_mtx);
TAILQ_REMOVE(&kctl->kcb_head, kcb, next);
kcb->kctl = NULL;
kcb->sac.sc_unit = 0;
kctlstat.kcs_pcbcount--;
kctlstat.kcs_gencnt++;
kctlstat.kcs_conn_fail++;
- lck_mtx_unlock(ctl_mtx);
+ lck_mtx_unlock(&ctl_mtx);
}
return error;
}
#if DEVELOPMENT || DEBUG
kcb->status = KCTL_DISCONNECTED;
#endif /* DEVELOPMENT || DEBUG */
- lck_mtx_lock(ctl_mtx);
+ lck_mtx_lock(&ctl_mtx);
TAILQ_REMOVE(&kcb->kctl->kcb_head, kcb, next);
kcb->kctl = NULL;
kcb->sac.sc_unit = 0;
kctlstat.kcs_pcbcount--;
kctlstat.kcs_gencnt++;
kctlstat.kcs_conn_fail++;
- lck_mtx_unlock(ctl_mtx);
+ lck_mtx_unlock(&ctl_mtx);
}
out:
ctl_kcb_done_clearing(kcb);
#endif /* DEVELOPMENT || DEBUG */
socket_unlock(so, 0);
- lck_mtx_lock(ctl_mtx);
+ lck_mtx_lock(&ctl_mtx);
kcb->kctl = 0;
kcb->sac.sc_unit = 0;
while (kcb->usecount != 0) {
- msleep(&kcb->usecount, ctl_mtx, 0, "kcb->usecount", 0);
+ msleep(&kcb->usecount, &ctl_mtx, 0, "kcb->usecount", 0);
}
TAILQ_REMOVE(&kctl->kcb_head, kcb, next);
kctlstat.kcs_pcbcount--;
kctlstat.kcs_gencnt++;
- lck_mtx_unlock(ctl_mtx);
+ lck_mtx_unlock(&ctl_mtx);
socket_lock(so, 0);
ctl_kcb_done_clearing(kcb);
ctl_kcb_decrement_use_count(kcb);
struct kctl *kctl;
int error = 0;
void *data = NULL;
+ size_t data_len = 0;
size_t len;
if (sopt->sopt_level != SYSPROTO_CONTROL) {
goto out;
}
if (sopt->sopt_valsize != 0) {
- MALLOC(data, void *, sopt->sopt_valsize, M_TEMP,
- M_WAITOK | M_ZERO);
+ data_len = sopt->sopt_valsize;
+ data = kheap_alloc(KHEAP_TEMP, data_len, Z_WAITOK | Z_ZERO);
if (data == NULL) {
+ data_len = 0;
error = ENOMEM;
goto out;
}
socket_lock(so, 0);
}
- if (data != NULL) {
- FREE(data, M_TEMP);
- }
+ kheap_free(KHEAP_TEMP, data, data_len);
break;
case SOPT_GET:
}
if (sopt->sopt_valsize && sopt->sopt_val) {
- MALLOC(data, void *, sopt->sopt_valsize, M_TEMP,
- M_WAITOK | M_ZERO);
+ data_len = sopt->sopt_valsize;
+ data = kheap_alloc(KHEAP_TEMP, data_len, Z_WAITOK | Z_ZERO);
if (data == NULL) {
+ data_len = 0;
error = ENOMEM;
goto out;
}
}
}
}
- if (data != NULL) {
- FREE(data, M_TEMP);
- }
+
+ kheap_free(KHEAP_TEMP, data, data_len);
break;
}
struct kctl *kctl;
u_int32_t n = 0;
- lck_mtx_lock(ctl_mtx);
+ lck_mtx_lock(&ctl_mtx);
TAILQ_FOREACH(kctl, &ctl_head, next)
n++;
- lck_mtx_unlock(ctl_mtx);
+ lck_mtx_unlock(&ctl_mtx);
bcopy(&n, data, sizeof(n));
error = 0;
error = EINVAL;
break;
}
- lck_mtx_lock(ctl_mtx);
+ lck_mtx_lock(&ctl_mtx);
kctl = ctl_find_by_name(ctl_info.ctl_name);
- lck_mtx_unlock(ctl_mtx);
+ lck_mtx_unlock(&ctl_mtx);
if (kctl == 0) {
error = ENOENT;
break;
}
static void
-kctl_tbl_grow()
+kctl_tbl_grow(void)
{
struct kctl **new_table;
uintptr_t new_size;
- lck_mtx_assert(ctl_mtx, LCK_MTX_ASSERT_OWNED);
+ lck_mtx_assert(&ctl_mtx, LCK_MTX_ASSERT_OWNED);
if (kctl_tbl_growing) {
/* Another thread is allocating */
kctl_tbl_growing_waiting++;
do {
- (void) msleep((caddr_t) &kctl_tbl_growing, ctl_mtx,
+ (void) msleep((caddr_t) &kctl_tbl_growing, &ctl_mtx,
PSOCK | PCATCH, "kctl_tbl_growing", 0);
} while (kctl_tbl_growing);
kctl_tbl_growing_waiting--;
new_size = kctl_tbl_size + KCTL_TBL_INC;
- lck_mtx_unlock(ctl_mtx);
- new_table = _MALLOC(sizeof(struct kctl *) * new_size,
- M_TEMP, M_WAIT | M_ZERO);
- lck_mtx_lock(ctl_mtx);
+ lck_mtx_unlock(&ctl_mtx);
+ new_table = kheap_alloc(KHEAP_DEFAULT, sizeof(struct kctl *) * new_size,
+ Z_WAITOK | Z_ZERO);
+ lck_mtx_lock(&ctl_mtx);
if (new_table != NULL) {
if (kctl_table != NULL) {
bcopy(kctl_table, new_table,
kctl_tbl_size * sizeof(struct kctl *));
- _FREE(kctl_table, M_TEMP);
+ kheap_free(KHEAP_DEFAULT, kctl_table,
+ sizeof(struct kctl *) * kctl_tbl_size);
}
kctl_table = new_table;
kctl_tbl_size = new_size;
{
uintptr_t i;
- lck_mtx_assert(ctl_mtx, LCK_MTX_ASSERT_OWNED);
+ lck_mtx_assert(&ctl_mtx, LCK_MTX_ASSERT_OWNED);
if (kctl_tbl_count >= kctl_tbl_size) {
kctl_tbl_grow();
*/
uintptr_t i = (((uintptr_t)kctlref) & KCTLREF_INDEX_MASK) - 1;
- lck_mtx_assert(ctl_mtx, LCK_MTX_ASSERT_OWNED);
+ lck_mtx_assert(&ctl_mtx, LCK_MTX_ASSERT_OWNED);
if (i < kctl_tbl_size) {
struct kctl *kctl = kctl_table[i];
uintptr_t i = (((uintptr_t)kctlref) & KCTLREF_INDEX_MASK) - 1;
struct kctl *kctl = NULL;
- lck_mtx_assert(ctl_mtx, LCK_MTX_ASSERT_OWNED);
+ lck_mtx_assert(&ctl_mtx, LCK_MTX_ASSERT_OWNED);
if (i >= kctl_tbl_size) {
kctlstat.kcs_bad_kctlref++;
return EINVAL;
}
- MALLOC(kctl, struct kctl *, sizeof(*kctl), M_TEMP, M_WAITOK);
+ kctl = kheap_alloc(KHEAP_DEFAULT, sizeof(struct kctl), Z_WAITOK | Z_ZERO);
if (kctl == NULL) {
return ENOMEM;
}
- bzero((char *)kctl, sizeof(*kctl));
- lck_mtx_lock(ctl_mtx);
+ lck_mtx_lock(&ctl_mtx);
if (kctl_make_ref(kctl) == NULL) {
- lck_mtx_unlock(ctl_mtx);
- FREE(kctl, M_TEMP);
+ lck_mtx_unlock(&ctl_mtx);
+ kheap_free(KHEAP_DEFAULT, kctl, sizeof(struct kctl));
return ENOMEM;
}
/* Verify the same name isn't already registered */
if (ctl_find_by_name(userkctl->ctl_name) != NULL) {
kctl_delete_ref(kctl->kctlref);
- lck_mtx_unlock(ctl_mtx);
- FREE(kctl, M_TEMP);
+ lck_mtx_unlock(&ctl_mtx);
+ kheap_free(KHEAP_DEFAULT, kctl, sizeof(struct kctl));
return EEXIST;
}
if (ctl_find_by_id_unit(userkctl->ctl_id, userkctl->ctl_unit)) {
kctl_delete_ref(kctl->kctlref);
- lck_mtx_unlock(ctl_mtx);
- FREE(kctl, M_TEMP);
+ lck_mtx_unlock(&ctl_mtx);
+ kheap_free(KHEAP_DEFAULT, kctl, sizeof(struct kctl));
return EEXIST;
}
kctl->id = userkctl->ctl_id;
kctlstat.kcs_reg_count++;
kctlstat.kcs_gencnt++;
- lck_mtx_unlock(ctl_mtx);
+ lck_mtx_unlock(&ctl_mtx);
*kctlref = kctl->kctlref;
{
struct kctl *kctl;
- lck_mtx_lock(ctl_mtx);
+ lck_mtx_lock(&ctl_mtx);
if ((kctl = kctl_from_ref(kctlref)) == NULL) {
kctlstat.kcs_bad_kctlref++;
- lck_mtx_unlock(ctl_mtx);
+ lck_mtx_unlock(&ctl_mtx);
if (ctl_debug != 0) {
printf("%s invalid kctlref %p\n",
__func__, kctlref);
}
if (!TAILQ_EMPTY(&kctl->kcb_head)) {
- lck_mtx_unlock(ctl_mtx);
+ lck_mtx_unlock(&ctl_mtx);
return EBUSY;
}
kctlstat.kcs_gencnt++;
kctl_delete_ref(kctl->kctlref);
- lck_mtx_unlock(ctl_mtx);
+ lck_mtx_unlock(&ctl_mtx);
ctl_post_msg(KEV_CTL_DEREGISTERED, kctl->id);
- FREE(kctl, M_TEMP);
+ kheap_free(KHEAP_DEFAULT, kctl, sizeof(struct kctl));
return 0;
}
{
struct kctl *kctl;
- lck_mtx_assert(ctl_mtx, LCK_MTX_ASSERT_OWNED);
+ lck_mtx_assert(&ctl_mtx, LCK_MTX_ASSERT_OWNED);
TAILQ_FOREACH(kctl, &ctl_head, next)
if (strncmp(kctl->name, name, sizeof(kctl->name)) == 0) {
u_int32_t ctl_id = 0;
struct kctl *kctl;
- lck_mtx_lock(ctl_mtx);
+ lck_mtx_lock(&ctl_mtx);
kctl = ctl_find_by_name(name);
if (kctl) {
ctl_id = kctl->id;
}
- lck_mtx_unlock(ctl_mtx);
+ lck_mtx_unlock(&ctl_mtx);
return ctl_id;
}
int found = 0;
struct kctl *kctl;
- lck_mtx_lock(ctl_mtx);
+ lck_mtx_lock(&ctl_mtx);
TAILQ_FOREACH(kctl, &ctl_head, next) {
if (kctl->id == id) {
break;
strlcpy(out_name, kctl->name, maxsize);
found = 1;
}
- lck_mtx_unlock(ctl_mtx);
+ lck_mtx_unlock(&ctl_mtx);
return found ? 0 : ENOENT;
}
{
struct kctl *kctl;
- lck_mtx_assert(ctl_mtx, LCK_MTX_ASSERT_OWNED);
+ lck_mtx_assert(&ctl_mtx, LCK_MTX_ASSERT_OWNED);
TAILQ_FOREACH(kctl, &ctl_head, next) {
if (kctl->id == id && (kctl->flags & CTL_FLAG_REG_ID_UNIT) == 0) {
{
struct ctl_cb *kcb;
- lck_mtx_assert(ctl_mtx, LCK_MTX_ASSERT_OWNED);
+ lck_mtx_assert(&ctl_mtx, LCK_MTX_ASSERT_OWNED);
TAILQ_FOREACH(kcb, &kctl->kcb_head, next)
if (kcb->sac.sc_unit == unit) {
lr_saved = __builtin_return_address(0);
- lck_mtx_lock(ctl_mtx);
+ lck_mtx_lock(&ctl_mtx);
/*
* First validate the kctlref
*/
if ((kctl = kctl_from_ref(kctlref)) == NULL) {
kctlstat.kcs_bad_kctlref++;
- lck_mtx_unlock(ctl_mtx);
+ lck_mtx_unlock(&ctl_mtx);
if (ctl_debug != 0) {
printf("%s invalid kctlref %p\n",
__func__, kctlref);
kcb = kcb_find(kctl, unit);
if (kcb == NULL || kcb->kctl != kctl || (so = kcb->so) == NULL) {
- lck_mtx_unlock(ctl_mtx);
+ lck_mtx_unlock(&ctl_mtx);
return NULL;
}
/*
/*
* Respect lock ordering: socket before ctl_mtx
*/
- lck_mtx_unlock(ctl_mtx);
+ lck_mtx_unlock(&ctl_mtx);
socket_lock(so, 1);
/*
i = (so->next_lock_lr + SO_LCKDBG_MAX - 1) % SO_LCKDBG_MAX;
so->lock_lr[i] = lr_saved;
- lck_mtx_lock(ctl_mtx);
+ lck_mtx_lock(&ctl_mtx);
if ((kctl = kctl_from_ref(kctlref)) == NULL || kcb->kctl == NULL) {
- lck_mtx_unlock(ctl_mtx);
+ lck_mtx_unlock(&ctl_mtx);
socket_unlock(so, 1);
so = NULL;
- lck_mtx_lock(ctl_mtx);
+ lck_mtx_lock(&ctl_mtx);
} else if (kctlflags != NULL) {
*kctlflags = kctl->flags;
}
wakeup((event_t)&kcb->usecount);
}
- lck_mtx_unlock(ctl_mtx);
+ lck_mtx_unlock(&ctl_mtx);
return so;
}
struct ctl_event_data ctl_ev_data;
struct kev_msg ev_msg;
- lck_mtx_assert(ctl_mtx, LCK_MTX_ASSERT_NOTOWNED);
+ lck_mtx_assert(&ctl_mtx, LCK_MTX_ASSERT_NOTOWNED);
bzero(&ev_msg, sizeof(struct kev_msg));
ev_msg.vendor_code = KEV_VENDOR_APPLE;
}
if (so->so_pcb != NULL) {
- lck_mtx_lock(((struct ctl_cb *)so->so_pcb)->mtx);
+ lck_mtx_lock(&((struct ctl_cb *)so->so_pcb)->mtx);
} else {
panic("ctl_lock: so=%p NO PCB! lr=%p lrh= %s\n",
so, lr_saved, solockhistory_nr(so));
printf("ctl_unlock: so=%llx sopcb=%x lock=%llx ref=%u lr=%llx\n",
(uint64_t)VM_KERNEL_ADDRPERM(so),
(uint64_t)VM_KERNEL_ADDRPERM(so->so_pcb,
- (uint64_t)VM_KERNEL_ADDRPERM(((struct ctl_cb *)so->so_pcb)->mtx),
+ (uint64_t)VM_KERNEL_ADDRPERM(&((struct ctl_cb *)so->so_pcb)->mtx),
so->so_usecount, (uint64_t)VM_KERNEL_ADDRPERM(lr_saved));
#endif /* (MORE_KCTLLOCK_DEBUG && (DEVELOPMENT || DEBUG)) */
if (refcount) {
solockhistory_nr(so));
/* NOTREACHED */
}
- mutex_held = ((struct ctl_cb *)so->so_pcb)->mtx;
+ mutex_held = &((struct ctl_cb *)so->so_pcb)->mtx;
lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
so->unlock_lr[so->next_unlock_lr] = lr_saved;
panic("ctl_getlock: so=%p usecount=%x lrh= %s\n",
so, so->so_usecount, solockhistory_nr(so));
}
- return kcb->mtx;
+ return &kcb->mtx;
} else {
panic("ctl_getlock: so=%p NULL NO so_pcb %s\n",
so, solockhistory_nr(so));
struct kctl *kctl;
size_t item_size = ROUNDUP64(sizeof(struct xkctl_reg));
- buf = _MALLOC(item_size, M_TEMP, M_WAITOK | M_ZERO);
+ buf = kheap_alloc(KHEAP_TEMP, item_size, Z_WAITOK | Z_ZERO);
if (buf == NULL) {
return ENOMEM;
}
- lck_mtx_lock(ctl_mtx);
+ lck_mtx_lock(&ctl_mtx);
n = kctlstat.kcs_reg_count;
}
done:
- lck_mtx_unlock(ctl_mtx);
+ lck_mtx_unlock(&ctl_mtx);
- if (buf != NULL) {
- FREE(buf, M_TEMP);
- }
+ kheap_free(KHEAP_TEMP, buf, item_size);
return error;
}
2 * ROUNDUP64(sizeof(struct xsockbuf_n)) +
ROUNDUP64(sizeof(struct xsockstat_n));
- buf = _MALLOC(item_size, M_TEMP, M_WAITOK | M_ZERO);
+ buf = kheap_alloc(KHEAP_TEMP, item_size, Z_WAITOK | Z_ZERO);
if (buf == NULL) {
return ENOMEM;
}
- lck_mtx_lock(ctl_mtx);
+ lck_mtx_lock(&ctl_mtx);
n = kctlstat.kcs_pcbcount;
}
done:
- lck_mtx_unlock(ctl_mtx);
+ lck_mtx_unlock(&ctl_mtx);
+ kheap_free(KHEAP_TEMP, buf, item_size);
return error;
}
#pragma unused(oidp, arg1, arg2)
int error = 0;
- lck_mtx_lock(ctl_mtx);
+ lck_mtx_lock(&ctl_mtx);
if (req->newptr != USER_ADDR_NULL) {
error = EPERM;
error = SYSCTL_OUT(req, &kctlstat,
MIN(sizeof(struct kctlstat), req->oldlen));
done:
- lck_mtx_unlock(ctl_mtx);
+ lck_mtx_unlock(&ctl_mtx);
return error;
}
(void) task_suspend_internal(task);
- MALLOC(alloced_name, char *, MAXPATHLEN, M_TEMP, M_NOWAIT | M_ZERO);
+ alloced_name = zalloc_flags(ZV_NAMEI, Z_NOWAIT | Z_ZERO);
/* create name according to sysctl'able format string */
/* if name creation fails, fall back to historical behaviour... */
audit_proc_coredump(core_proc, name, error);
#endif
if (alloced_name != NULL) {
- FREE(alloced_name, M_TEMP);
+ zfree(ZV_NAMEI, alloced_name);
}
if (error == 0) {
error = error1;
#include <security/_label.h>
#endif
+#include <os/hash.h>
#include <IOKit/IOBSD.h>
void mach_kauth_cred_uthread_update( void );
*
* Note: Does *NOT* currently include per-thread credential changes
*/
-
#if DEBUG_CRED
#define DEBUG_CRED_ENTER printf
#define DEBUG_CRED_CHANGE printf
-extern void kauth_cred_print(kauth_cred_t cred);
-
-#include <libkern/OSDebug.h> /* needed for get_backtrace( ) */
-
-int is_target_cred( kauth_cred_t the_cred );
-void get_backtrace( void );
-
-static int sysctl_dump_creds( __unused struct sysctl_oid *oidp, __unused void *arg1,
- __unused int arg2, struct sysctl_req *req );
-static int
-sysctl_dump_cred_backtraces( __unused struct sysctl_oid *oidp, __unused void *arg1,
- __unused int arg2, struct sysctl_req *req );
-
-#define MAX_STACK_DEPTH 8
-struct cred_backtrace {
- int depth;
- void * stack[MAX_STACK_DEPTH];
-};
-typedef struct cred_backtrace cred_backtrace;
-
-#define MAX_CRED_BUFFER_SLOTS 200
-struct cred_debug_buffer {
- int next_slot;
- cred_backtrace stack_buffer[MAX_CRED_BUFFER_SLOTS];
-};
-typedef struct cred_debug_buffer cred_debug_buffer;
-cred_debug_buffer * cred_debug_buf_p = NULL;
-
#else /* !DEBUG_CRED */
-
#define DEBUG_CRED_ENTER(fmt, ...) do {} while (0)
#define DEBUG_CRED_CHANGE(fmt, ...) do {} while (0)
-
#endif /* !DEBUG_CRED */
#if CONFIG_EXT_RESOLVER
* times out.
*/
-static lck_mtx_t *kauth_resolver_mtx;
-#define KAUTH_RESOLVER_LOCK() lck_mtx_lock(kauth_resolver_mtx);
-#define KAUTH_RESOLVER_UNLOCK() lck_mtx_unlock(kauth_resolver_mtx);
+static LCK_MTX_DECLARE(kauth_resolver_mtx, &kauth_lck_grp);
+#define KAUTH_RESOLVER_LOCK() lck_mtx_lock(&kauth_resolver_mtx);
+#define KAUTH_RESOLVER_UNLOCK() lck_mtx_unlock(&kauth_resolver_mtx);
static volatile pid_t kauth_resolver_identity;
static int kauth_identitysvc_has_registered;
static int kauth_resolver_registered;
-static uint32_t kauth_resolver_sequence;
+static uint32_t kauth_resolver_sequence = 31337;
static int kauth_resolver_timeout = 30; /* default: 30 seconds */
struct kauth_resolver_work {
int kr_result;
};
-TAILQ_HEAD(kauth_resolver_unsubmitted_head, kauth_resolver_work) kauth_resolver_unsubmitted;
-TAILQ_HEAD(kauth_resolver_submitted_head, kauth_resolver_work) kauth_resolver_submitted;
-TAILQ_HEAD(kauth_resolver_done_head, kauth_resolver_work) kauth_resolver_done;
+TAILQ_HEAD(kauth_resolver_unsubmitted_head, kauth_resolver_work) kauth_resolver_unsubmitted =
+ TAILQ_HEAD_INITIALIZER(kauth_resolver_unsubmitted);
+TAILQ_HEAD(kauth_resolver_submitted_head, kauth_resolver_work) kauth_resolver_submitted =
+ TAILQ_HEAD_INITIALIZER(kauth_resolver_submitted);
+TAILQ_HEAD(kauth_resolver_done_head, kauth_resolver_work) kauth_resolver_done =
+ TAILQ_HEAD_INITIALIZER(kauth_resolver_done);
/* Number of resolver timeouts between logged complaints */
#define KAUTH_COMPLAINT_INTERVAL 1000
time_t ki_ntsid_expiry;
};
-static TAILQ_HEAD(kauth_identity_head, kauth_identity) kauth_identities;
-static lck_mtx_t *kauth_identity_mtx;
-#define KAUTH_IDENTITY_LOCK() lck_mtx_lock(kauth_identity_mtx);
-#define KAUTH_IDENTITY_UNLOCK() lck_mtx_unlock(kauth_identity_mtx);
+static TAILQ_HEAD(kauth_identity_head, kauth_identity) kauth_identities =
+ TAILQ_HEAD_INITIALIZER(kauth_identities);
+static LCK_MTX_DECLARE(kauth_identity_mtx, &kauth_lck_grp);
+#define KAUTH_IDENTITY_LOCK() lck_mtx_lock(&kauth_identity_mtx);
+#define KAUTH_IDENTITY_UNLOCK() lck_mtx_unlock(&kauth_identity_mtx);
#define KAUTH_IDENTITY_CACHEMAX_DEFAULT 100 /* XXX default sizing? */
static int kauth_identity_cachemax = KAUTH_IDENTITY_CACHEMAX_DEFAULT;
static int kauth_identity_count;
#define KAUTH_GROUP_ISMEMBER (1<<0)
};
-TAILQ_HEAD(kauth_groups_head, kauth_group_membership) kauth_groups;
-static lck_mtx_t *kauth_groups_mtx;
-#define KAUTH_GROUPS_LOCK() lck_mtx_lock(kauth_groups_mtx);
-#define KAUTH_GROUPS_UNLOCK() lck_mtx_unlock(kauth_groups_mtx);
+TAILQ_HEAD(kauth_groups_head, kauth_group_membership) kauth_groups =
+ TAILQ_HEAD_INITIALIZER(kauth_groups);
+static LCK_MTX_DECLARE(kauth_groups_mtx, &kauth_lck_grp);
+#define KAUTH_GROUPS_LOCK() lck_mtx_lock(&kauth_groups_mtx);
+#define KAUTH_GROUPS_UNLOCK() lck_mtx_unlock(&kauth_groups_mtx);
#define KAUTH_GROUPS_CACHEMAX_DEFAULT 100 /* XXX default sizing? */
static int kauth_groups_cachemax = KAUTH_GROUPS_CACHEMAX_DEFAULT;
static int kauth_groups_count;
#define KAUTH_CRED_TABLE_SIZE 128
ZONE_DECLARE(ucred_zone, "cred", sizeof(struct ucred), ZC_ZFREE_CLEARMEM);
+
LIST_HEAD(kauth_cred_entry_head, ucred);
static struct kauth_cred_entry_head
kauth_cred_table_anchor[KAUTH_CRED_TABLE_SIZE];
/* we could compute a better timeout here */
ts.tv_sec = kauth_resolver_timeout;
ts.tv_nsec = 0;
- error = msleep(workp, kauth_resolver_mtx, PCATCH, "kr_submit", &ts);
+ error = msleep(workp, &kauth_resolver_mtx, PCATCH, "kr_submit", &ts);
/* request has been completed? */
if ((error == 0) && (workp->kr_flags & KAUTH_REQUEST_DONE)) {
break;
}
-/*
- * kauth_resolver_init
- *
- * Description: Initialize the daemon side of the credential identity resolver
- *
- * Parameters: (void)
- *
- * Returns: (void)
- *
- * Notes: Initialize the credential identity resolver for use; the
- * credential identity resolver is the KPI used by the user
- * space credential identity resolver daemon to communicate
- * with the kernel via the identitysvc() system call..
- *
- * This is how membership in more than 16 groups (1 effective
- * and 15 supplementary) is supported, and also how UID's,
- * UUID's, and so on, are translated to/from POSIX credential
- * values.
- *
- * The credential identity resolver operates by attempting to
- * determine identity first from the credential, then from
- * the kernel credential identity cache, and finally by
- * enqueueing a request to a user space daemon.
- *
- * This function is called from kauth_init() in the file
- * kern_authorization.c.
- */
-void
-kauth_resolver_init(void)
-{
- TAILQ_INIT(&kauth_resolver_unsubmitted);
- TAILQ_INIT(&kauth_resolver_submitted);
- TAILQ_INIT(&kauth_resolver_done);
- kauth_resolver_sequence = 31337;
- kauth_resolver_mtx = lck_mtx_alloc_init(kauth_lck_grp, 0 /*LCK_ATTR_NULL*/);
-}
-
/*
* kauth_resolver_identity_reset
*
}
}
- MALLOC(workp, struct kauth_resolver_work *, sizeof(*workp), M_KAUTH, M_WAITOK);
+ workp = kheap_alloc(KM_KAUTH, sizeof(struct kauth_resolver_work),
+ Z_WAITOK);
if (workp == NULL) {
return ENOMEM;
}
* If we dropped the last reference, free the request.
*/
if (shouldfree) {
- FREE(workp, M_KAUTH);
+ kheap_free(KM_KAUTH, workp, sizeof(struct kauth_resolver_work));
}
KAUTH_DEBUG("RESOLVER - returning %d", error);
if (TAILQ_FIRST(&kauth_resolver_unsubmitted) == NULL) {
int error;
- error = msleep0(&kauth_resolver_unsubmitted, kauth_resolver_mtx, PCATCH, "GRGetWork", 0, kauth_resolver_getwork_continue);
+ error = msleep0(&kauth_resolver_unsubmitted, &kauth_resolver_mtx, PCATCH, "GRGetWork", 0, kauth_resolver_getwork_continue);
/*
* If this is a wakeup from another thread in the resolver
* deregistering it, error out the request-for-work thread
struct uthread *ut = get_bsdthread_info(thread);
ut->uu_save.uus_kauth.message = message;
- error = msleep0(&kauth_resolver_unsubmitted, kauth_resolver_mtx, PCATCH, "GRGetWork", 0, kauth_resolver_getwork_continue);
+ error = msleep0(&kauth_resolver_unsubmitted, &kauth_resolver_mtx, PCATCH, "GRGetWork", 0, kauth_resolver_getwork_continue);
KAUTH_RESOLVER_UNLOCK();
/*
* If this is a wakeup from another thread in the resolver
#define KI_VALID_GROUPS (1<<6)
#if CONFIG_EXT_RESOLVER
-/*
- * kauth_identity_init
- *
- * Description: Initialize the kernel side of the credential identity resolver
- *
- * Parameters: (void)
- *
- * Returns: (void)
- *
- * Notes: Initialize the credential identity resolver for use; the
- * credential identity resolver is the KPI used to communicate
- * with a user space credential identity resolver daemon.
- *
- * This function is called from kauth_init() in the file
- * kern_authorization.c.
- */
-void
-kauth_identity_init(void)
-{
- TAILQ_INIT(&kauth_identities);
- kauth_identity_mtx = lck_mtx_alloc_init(kauth_lck_grp, 0 /*LCK_ATTR_NULL*/);
-}
-
-
/*
* kauth_identity_alloc
*
struct kauth_identity *kip;
/* get and fill in a new identity */
- MALLOC(kip, struct kauth_identity *, sizeof(*kip), M_KAUTH, M_WAITOK | M_ZERO);
+ kip = kheap_alloc(KM_KAUTH, sizeof(struct kauth_identity),
+ Z_WAITOK | Z_ZERO);
if (kip != NULL) {
if (gid != KAUTH_GID_NONE) {
kip->ki_gid = gid;
vfs_removename(ip->ki_name);
}
/* free the expired entry */
- FREE(ip, M_KAUTH);
+ kheap_free(KM_KAUTH, ip, sizeof(struct kauth_identity));
}
}
{
struct kauth_identity *kip;
- lck_mtx_assert(kauth_identity_mtx, LCK_MTX_ASSERT_OWNED);
+ lck_mtx_assert(&kauth_identity_mtx, LCK_MTX_ASSERT_OWNED);
while (kauth_identity_count > newsize) {
kip = TAILQ_LAST(&kauth_identities, kauth_identity_head);
TAILQ_REMOVE(&kauth_identities, kip, ki_link);
kauth_identity_count--;
- FREE(kip, M_KAUTH);
+ kheap_free(KM_KAUTH, kip, sizeof(struct kauth_identity));
}
}
* XXX the linked-list implementation here needs to be optimized.
*/
-/*
- * kauth_groups_init
- *
- * Description: Initialize the groups cache
- *
- * Parameters: (void)
- *
- * Returns: (void)
- *
- * Notes: Initialize the groups cache for use; the group cache is used
- * to avoid unnecessary calls out to user space.
- *
- * This function is called from kauth_init() in the file
- * kern_authorization.c.
- */
-void
-kauth_groups_init(void)
-{
- TAILQ_INIT(&kauth_groups);
- kauth_groups_mtx = lck_mtx_alloc_init(kauth_lck_grp, 0 /*LCK_ATTR_NULL*/);
-}
-
-
/*
* kauth_groups_expired
*
}
/* allocate a new record */
- MALLOC(gm, struct kauth_group_membership *, sizeof(*gm), M_KAUTH, M_WAITOK);
+ gm = kheap_alloc(KM_KAUTH, sizeof(struct kauth_group_membership),
+ Z_WAITOK);
if (gm != NULL) {
gm->gm_uid = el->el_uid;
gm->gm_gid = el->el_gid;
KAUTH_GROUPS_UNLOCK();
/* free expired cache entry */
- if (gm != NULL) {
- FREE(gm, M_KAUTH);
- }
+ kheap_free(KM_KAUTH, gm, sizeof(struct kauth_group_membership));
}
/*
{
struct kauth_group_membership *gm;
- lck_mtx_assert(kauth_groups_mtx, LCK_MTX_ASSERT_OWNED);
+ lck_mtx_assert(&kauth_groups_mtx, LCK_MTX_ASSERT_OWNED);
while (kauth_groups_count > new_size) {
gm = TAILQ_LAST(&kauth_groups, kauth_groups_head);
TAILQ_REMOVE(&kauth_groups, gm, gm_link);
kauth_groups_count--;
- FREE(gm, M_KAUTH);
+ kheap_free(KM_KAUTH, gm, sizeof(struct kauth_group_membership));
}
}
#endif /* CONFIG_EXT_RESOLVER */
*/
/* lock protecting credential hash table */
-static lck_mtx_t kauth_cred_hash_mtx;
+static LCK_MTX_DECLARE(kauth_cred_hash_mtx, &kauth_lck_grp);
#define KAUTH_CRED_HASH_LOCK() lck_mtx_lock(&kauth_cred_hash_mtx);
#define KAUTH_CRED_HASH_UNLOCK() lck_mtx_unlock(&kauth_cred_hash_mtx);
#define KAUTH_CRED_HASH_LOCK_ASSERT() LCK_MTX_ASSERT(&kauth_cred_hash_mtx, LCK_MTX_ASSERT_OWNED)
void
kauth_cred_init(void)
{
- lck_mtx_init(&kauth_cred_hash_mtx, kauth_lck_grp, 0 /*LCK_ATTR_NULL*/);
-
for (int i = 0; i < KAUTH_CRED_TABLE_SIZE; i++) {
LIST_INIT(&kauth_cred_table_anchor[i]);
}
kauth_cred_panic_over_retain(cred);
}
-#if 0 // use this to watch a specific credential
- if (is_target_cred( *credp ) != 0) {
- get_backtrace();
- }
-#endif
-
return true;
}
if (__improbable(old_ref >= KAUTH_CRED_REF_MAX)) {
kauth_cred_panic_over_retain(cred);
}
-
-#if 0 // use this to watch a specific credential
- if (is_target_cred( cred ) != 0) {
- get_backtrace();
- }
-#endif
}
/*
{
u_long old_ref = os_atomic_dec_orig(&cred->cr_ref, relaxed);
-#if 0 // use this to watch a specific credential
- if (is_target_cred( *credp ) != 0) {
- get_backtrace();
- }
-#endif
-
if (__improbable(old_ref <= 0)) {
kauth_cred_panic_over_released(cred);
}
#if CONFIG_MACF
/* Note: we know the flags are equal, so we only need to test one */
if (pcred1->cr_flags & CRF_MAC_ENFORCE) {
- if (!mac_cred_label_compare(cred1->cr_label, cred2->cr_label)) {
+ if (!mac_cred_label_is_equal(cred1->cr_label, cred2->cr_label)) {
return false;
}
}
}
-/*
- * kauth_cred_hash
- *
- * Description: Generates a hash key using data that makes up a credential;
- * based on ElfHash
- *
- * Parameters: datap Pointer to data to hash
- * data_len Count of bytes to hash
- * start_key Start key value
- *
- * Returns: (u_long) Returned hash key
- */
-static inline u_long
-kauth_cred_hash(const uint8_t *datap, int data_len, u_long start_key)
-{
- u_long hash_key = start_key;
- u_long temp;
-
- while (data_len > 0) {
- hash_key = (hash_key << 4) + *datap++;
- temp = hash_key & 0xF0000000;
- if (temp) {
- hash_key ^= temp >> 24;
- }
- hash_key &= ~temp;
- data_len--;
- }
- return hash_key;
-}
-
-
/*
* kauth_cred_get_bucket
*
#if CONFIG_MACF
posix_cred_t pcred = posix_cred_get(cred);
#endif
- u_long hash_key = 0;
-
- hash_key = kauth_cred_hash((uint8_t *)&cred->cr_posix,
- sizeof(struct posix_cred),
- hash_key);
- hash_key = kauth_cred_hash((uint8_t *)&cred->cr_audit,
- sizeof(struct au_session),
- hash_key);
+ uint32_t hash_key = 0;
+
+ hash_key = os_hash_jenkins_update(&cred->cr_posix,
+ sizeof(struct posix_cred), hash_key);
+
+ hash_key = os_hash_jenkins_update(&cred->cr_audit,
+ sizeof(struct au_session), hash_key);
#if CONFIG_MACF
if (pcred->cr_flags & CRF_MAC_ENFORCE) {
- hash_key = kauth_cred_hash((uint8_t *)cred->cr_label,
- sizeof(struct label),
- hash_key);
+ hash_key = mac_cred_label_hash_update(cred->cr_label, hash_key);
}
-#endif
+#endif /* CONFIG_MACF */
+ hash_key = os_hash_jenkins_finish(hash_key);
hash_key %= KAUTH_CRED_TABLE_SIZE;
return &kauth_cred_table_anchor[hash_key];
}
-#ifdef DEBUG_CRED
-/*
- * kauth_cred_print
- *
- * Description: Print out an individual credential's contents for debugging
- * purposes
- *
- * Parameters: cred The credential to print out
- *
- * Returns: (void)
- *
- * Implicit returns: Results in console output
- */
-void
-kauth_cred_print(kauth_cred_t cred)
-{
- int i;
-
- printf("%p - refs %lu flags 0x%08x uids e%d r%d sv%d gm%d ", cred, cred->cr_ref, cred->cr_flags, cred->cr_uid, cred->cr_ruid, cred->cr_svuid, cred->cr_gmuid);
- printf("group count %d gids ", cred->cr_ngroups);
- for (i = 0; i < NGROUPS; i++) {
- if (i == 0) {
- printf("e");
- }
- printf("%d ", cred->cr_groups[i]);
- }
- printf("r%d sv%d ", cred->cr_rgid, cred->cr_svgid);
- printf("auditinfo_addr %d %d %d %d %d %d\n",
- cred->cr_audit.s_aia_p->ai_auid,
- cred->cr_audit.as_mask.am_success,
- cred->cr_audit.as_mask.am_failure,
- cred->cr_audit.as_aia_p->ai_termid.at_port,
- cred->cr_audit.as_aia_p->ai_termid.at_addr[0],
- cred->cr_audit.as_aia_p->ai_asid);
-}
-
-int
-is_target_cred( kauth_cred_t the_cred )
-{
- if (the_cred->cr_uid != 0) {
- return 0;
- }
- if (the_cred->cr_ruid != 0) {
- return 0;
- }
- if (the_cred->cr_svuid != 0) {
- return 0;
- }
- if (the_cred->cr_ngroups != 11) {
- return 0;
- }
- if (the_cred->cr_groups[0] != 11) {
- return 0;
- }
- if (the_cred->cr_groups[1] != 81) {
- return 0;
- }
- if (the_cred->cr_groups[2] != 63947) {
- return 0;
- }
- if (the_cred->cr_groups[3] != 80288) {
- return 0;
- }
- if (the_cred->cr_groups[4] != 89006) {
- return 0;
- }
- if (the_cred->cr_groups[5] != 52173) {
- return 0;
- }
- if (the_cred->cr_groups[6] != 84524) {
- return 0;
- }
- if (the_cred->cr_groups[7] != 79) {
- return 0;
- }
- if (the_cred->cr_groups[8] != 80292) {
- return 0;
- }
- if (the_cred->cr_groups[9] != 80) {
- return 0;
- }
- if (the_cred->cr_groups[10] != 90824) {
- return 0;
- }
- if (the_cred->cr_rgid != 11) {
- return 0;
- }
- if (the_cred->cr_svgid != 11) {
- return 0;
- }
- if (the_cred->cr_gmuid != 3475) {
- return 0;
- }
- if (the_cred->cr_audit.as_aia_p->ai_auid != 3475) {
- return 0;
- }
-/*
- * if ( the_cred->cr_audit.as_mask.am_success != 0 )
- * return( 0 );
- * if ( the_cred->cr_audit.as_mask.am_failure != 0 )
- * return( 0 );
- * if ( the_cred->cr_audit.as_aia_p->ai_termid.at_port != 0 )
- * return( 0 );
- * if ( the_cred->cr_audit.as_aia_p->ai_termid.at_addr[0] != 0 )
- * return( 0 );
- * if ( the_cred->cr_audit.as_aia_p->ai_asid != 0 )
- * return( 0 );
- * if ( the_cred->cr_flags != 0 )
- * return( 0 );
- */
- return -1; // found target cred
-}
-
-void
-get_backtrace( void )
-{
- int my_slot;
- void * my_stack[MAX_STACK_DEPTH];
- int i, my_depth;
-
- if (cred_debug_buf_p == NULL) {
- MALLOC(cred_debug_buf_p, cred_debug_buffer *, sizeof(*cred_debug_buf_p), M_KAUTH, M_WAITOK);
- bzero(cred_debug_buf_p, sizeof(*cred_debug_buf_p));
- }
-
- if (cred_debug_buf_p->next_slot > (MAX_CRED_BUFFER_SLOTS - 1)) {
- /* buffer is full */
- return;
- }
-
- my_depth = OSBacktrace(&my_stack[0], MAX_STACK_DEPTH);
- if (my_depth == 0) {
- printf("%s - OSBacktrace failed \n", __FUNCTION__);
- return;
- }
-
- /* fill new backtrace */
- my_slot = cred_debug_buf_p->next_slot;
- cred_debug_buf_p->next_slot++;
- cred_debug_buf_p->stack_buffer[my_slot].depth = my_depth;
- for (i = 0; i < my_depth; i++) {
- cred_debug_buf_p->stack_buffer[my_slot].stack[i] = my_stack[i];
- }
-
- return;
-}
-
-
-/* subset of struct ucred for use in sysctl_dump_creds */
-struct debug_ucred {
- void *credp;
- u_long cr_ref; /* reference count */
- uid_t cr_uid; /* effective user id */
- uid_t cr_ruid; /* real user id */
- uid_t cr_svuid; /* saved user id */
- u_short cr_ngroups; /* number of groups in advisory list */
- gid_t cr_groups[NGROUPS]; /* advisory group list */
- gid_t cr_rgid; /* real group id */
- gid_t cr_svgid; /* saved group id */
- uid_t cr_gmuid; /* UID for group membership purposes */
- struct auditinfo_addr cr_audit; /* user auditing data. */
- void *cr_label; /* MACF label */
- int cr_flags; /* flags on credential */
-};
-typedef struct debug_ucred debug_ucred;
-
-SYSCTL_PROC(_kern, OID_AUTO, dump_creds, CTLFLAG_RD,
- NULL, 0, sysctl_dump_creds, "S,debug_ucred", "List of credentials in the cred hash");
-
-/* accessed by:
- * err = sysctlbyname( "kern.dump_creds", bufp, &len, NULL, 0 );
- */
-
-static int
-sysctl_dump_creds( __unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req )
-{
- int i, j, counter = 0;
- int error;
- size_t space;
- kauth_cred_t found_cred;
- debug_ucred * cred_listp;
- debug_ucred * nextp;
-
- /* This is a readonly node. */
- if (req->newptr != USER_ADDR_NULL) {
- return EPERM;
- }
-
- /* calculate space needed */
- for (i = 0; i < KAUTH_CRED_TABLE_SIZE; i++) {
- TAILQ_FOREACH(found_cred, &kauth_cred_table_anchor[i], cr_link) {
- counter++;
- }
- }
-
- /* they are querying us so just return the space required. */
- if (req->oldptr == USER_ADDR_NULL) {
- counter += 10; // add in some padding;
- req->oldidx = counter * sizeof(debug_ucred);
- return 0;
- }
-
- MALLOC( cred_listp, debug_ucred *, req->oldlen, M_TEMP, M_WAITOK | M_ZERO);
- if (cred_listp == NULL) {
- return ENOMEM;
- }
-
- /* fill in creds to send back */
- nextp = cred_listp;
- space = 0;
- for (i = 0; i < KAUTH_CRED_TABLE_SIZE; i++) {
- TAILQ_FOREACH(found_cred, &kauth_cred_table_anchor[i], cr_link) {
- nextp->credp = found_cred;
- nextp->cr_ref = found_cred->cr_ref;
- nextp->cr_uid = found_cred->cr_uid;
- nextp->cr_ruid = found_cred->cr_ruid;
- nextp->cr_svuid = found_cred->cr_svuid;
- nextp->cr_ngroups = found_cred->cr_ngroups;
- for (j = 0; j < nextp->cr_ngroups; j++) {
- nextp->cr_groups[j] = found_cred->cr_groups[j];
- }
- nextp->cr_rgid = found_cred->cr_rgid;
- nextp->cr_svgid = found_cred->cr_svgid;
- nextp->cr_gmuid = found_cred->cr_gmuid;
- nextp->cr_audit.ai_auid =
- found_cred->cr_audit.as_aia_p->ai_auid;
- nextp->cr_audit.ai_mask.am_success =
- found_cred->cr_audit.as_mask.am_success;
- nextp->cr_audit.ai_mask.am_failure =
- found_cred->cr_audit.as_mask.am_failure;
- nextp->cr_audit.ai_termid.at_port =
- found_cred->cr_audit.as_aia_p->ai_termid.at_port;
- nextp->cr_audit.ai_termid.at_type =
- found_cred->cr_audit.as_aia_p->ai_termid.at_type;
- nextp->cr_audit.ai_termid.at_addr[0] =
- found_cred->cr_audit.as_aia_p->ai_termid.at_addr[0];
- nextp->cr_audit.ai_termid.at_addr[1] =
- found_cred->cr_audit.as_aia_p->ai_termid.at_addr[1];
- nextp->cr_audit.ai_termid.at_addr[2] =
- found_cred->cr_audit.as_aia_p->ai_termid.at_addr[2];
- nextp->cr_audit.ai_termid.at_addr[3] =
- found_cred->cr_audit.as_aia_p->ai_termid.at_addr[3];
- nextp->cr_audit.ai_asid =
- found_cred->cr_audit.as_aia_p->ai_asid;
- nextp->cr_audit.ai_flags =
- found_cred->cr_audit.as_aia_p->ai_flags;
- nextp->cr_label = found_cred->cr_label;
- nextp->cr_flags = found_cred->cr_flags;
- nextp++;
- space += sizeof(debug_ucred);
- if (space > req->oldlen) {
- FREE(cred_listp, M_TEMP);
- return ENOMEM;
- }
- }
- }
- req->oldlen = space;
- error = SYSCTL_OUT(req, cred_listp, req->oldlen);
- FREE(cred_listp, M_TEMP);
- return error;
-}
-
-
-SYSCTL_PROC(_kern, OID_AUTO, cred_bt, CTLFLAG_RD,
- NULL, 0, sysctl_dump_cred_backtraces, "S,cred_debug_buffer", "dump credential backtrace");
-
-/* accessed by:
- * err = sysctlbyname( "kern.cred_bt", bufp, &len, NULL, 0 );
- */
-
-static int
-sysctl_dump_cred_backtraces( __unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req )
-{
- int i, j;
- int error;
- size_t space;
- cred_debug_buffer * bt_bufp;
- cred_backtrace * nextp;
-
- /* This is a readonly node. */
- if (req->newptr != USER_ADDR_NULL) {
- return EPERM;
- }
-
- if (cred_debug_buf_p == NULL) {
- return EAGAIN;
- }
-
- /* calculate space needed */
- space = sizeof(cred_debug_buf_p->next_slot);
- space += (sizeof(cred_backtrace) * cred_debug_buf_p->next_slot);
-
- /* they are querying us so just return the space required. */
- if (req->oldptr == USER_ADDR_NULL) {
- req->oldidx = space;
- return 0;
- }
-
- if (space > req->oldlen) {
- return ENOMEM;
- }
-
- MALLOC( bt_bufp, cred_debug_buffer *, req->oldlen, M_TEMP, M_WAITOK | M_ZERO);
- if (bt_bufp == NULL) {
- return ENOMEM;
- }
-
- /* fill in backtrace info to send back */
- bt_bufp->next_slot = cred_debug_buf_p->next_slot;
- space = sizeof(bt_bufp->next_slot);
-
- nextp = &bt_bufp->stack_buffer[0];
- for (i = 0; i < cred_debug_buf_p->next_slot; i++) {
- nextp->depth = cred_debug_buf_p->stack_buffer[i].depth;
- for (j = 0; j < nextp->depth; j++) {
- nextp->stack[j] = cred_debug_buf_p->stack_buffer[i].stack[j];
- }
- space += sizeof(*nextp);
- nextp++;
- }
- req->oldlen = space;
- error = SYSCTL_OUT(req, bt_bufp, req->oldlen);
- FREE(bt_bufp, M_TEMP);
- return error;
-}
-
-#endif /* DEBUG_CRED */
-
-
/*
**********************************************************************
* The following routines will be moved to a policy_posix.c module at
#endif /* !SECURE_KERNEL */
int cs_all_vnodes = 0;
-static lck_grp_t *cs_lockgrp;
-
SYSCTL_INT(_vm, OID_AUTO, cs_force_kill, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_force_kill, 0, "");
SYSCTL_INT(_vm, OID_AUTO, cs_force_hard, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_force_hard, 0, "");
SYSCTL_INT(_vm, OID_AUTO, cs_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_debug, 0, "");
sizeof(cs_library_val_enable));
#endif
#endif /* !SECURE_KERNEL */
-
- lck_grp_attr_t *attr = lck_grp_attr_alloc_init();
- cs_lockgrp = lck_grp_alloc_init("KERNCS", attr);
- lck_grp_attr_free(attr);
}
STARTUP(CODESIGNING, STARTUP_RANK_FIRST, cs_init);
vm_address_t
csblob_get_addr(struct cs_blob *blob)
{
- return blob->csb_mem_kaddr;
+ return (vm_address_t)blob->csb_mem_kaddr;
}
/*
return 0;
}
- *out_start = (void *)csblob->csb_mem_kaddr;
+ *out_start = csblob->csb_mem_kaddr;
*out_length = csblob->csb_mem_size;
return 0;
#include <os/atomic_private.h>
#include <IOKit/IOBSD.h>
-#define IPC_KMSG_FLAGS_ALLOW_IMMOVABLE_SEND 0x1
+#define IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND 0x1
kern_return_t ipc_object_copyin(ipc_space_t, mach_port_name_t,
mach_msg_type_name_t, ipc_port_t *, mach_port_context_t, mach_msg_guard_flags_t *, uint32_t);
void ipc_port_release_send(ipc_port_t);
static void fdrelse(struct proc * p, int fd);
-extern void file_lock_init(void);
-
extern kauth_scope_t kauth_scope_fileop;
/* Conflict wait queue for when selects collide (opaque type) */
sizeof(struct fileproc), ZC_NOENCRYPT | ZC_ZFREE_CLEARMEM);
ZONE_DECLARE(fdp_zone, "filedesc",
sizeof(struct filedesc), ZC_NOENCRYPT | ZC_ZFREE_CLEARMEM);
+/*
+ * If you need accounting for KM_OFILETABL consider using
+ * KALLOC_HEAP_DEFINE to define a view.
+ */
+#define KM_OFILETABL KHEAP_DEFAULT
/*
* Descriptor management.
static const struct fileops uninitops;
os_refgrp_decl(, f_refgrp, "files refcounts", NULL);
-lck_grp_attr_t * file_lck_grp_attr;
-lck_grp_t * file_lck_grp;
-lck_attr_t * file_lck_attr;
+static LCK_GRP_DECLARE(file_lck_grp, "file");
#pragma mark fileglobs
if (IS_VALID_CRED(fg->fg_cred)) {
kauth_cred_unref(&fg->fg_cred);
}
- lck_mtx_destroy(&fg->fg_lock, file_lck_grp);
+ lck_mtx_destroy(&fg->fg_lock, &file_lck_grp);
#if CONFIG_MACF
mac_file_label_destroy(fg);
}
-/*
- * file_lock_init
- *
- * Description: Initialize the file lock group and the uipc and flist locks
- *
- * Parameters: (void)
- *
- * Returns: void
- *
- * Notes: Called at system startup from bsd_init().
- */
-void
-file_lock_init(void)
-{
- /* allocate file lock group attribute and group */
- file_lck_grp_attr = lck_grp_attr_alloc_init();
-
- file_lck_grp = lck_grp_alloc_init("file", file_lck_grp_attr);
-
- /* Allocate file lock attribute */
- file_lck_attr = lck_attr_alloc_init();
-}
-
-
void
proc_dirs_lock_shared(proc_t p)
{
proc_fdunlock(p);
pathlen = MAXPATHLEN;
- MALLOC(pathbufp, char *, pathlen, M_TEMP, M_WAITOK);
- if (pathbufp == NULL) {
- error = ENOMEM;
- goto outdrop;
- }
+ pathbufp = zalloc(ZV_NAMEI);
+
if ((error = vnode_getwithref(vp)) == 0) {
if (uap->cmd == F_GETPATH_NOFIRMLINK) {
error = vn_getpath_ext(vp, NULL, pathbufp, &pathlen, VN_GETPATH_NO_FIRMLINK);
error = copyout((caddr_t)pathbufp, argp, pathlen);
}
}
- FREE(pathbufp, M_TEMP);
+ zfree(ZV_NAMEI, pathbufp);
goto outdrop;
}
.len = CP_MAX_WRAPPEDKEYSIZE,
};
- MALLOC(k.key, char *, k.len, M_TEMP, M_WAITOK | M_ZERO);
-
- error = VNOP_IOCTL(vp, F_TRANSCODEKEY, (caddr_t)&k, 1, &context);
+ k.key = kheap_alloc(KHEAP_TEMP, CP_MAX_WRAPPEDKEYSIZE, Z_WAITOK | Z_ZERO);
+ if (k.key == NULL) {
+ error = ENOMEM;
+ } else {
+ error = VNOP_IOCTL(vp, F_TRANSCODEKEY, (caddr_t)&k, 1, &context);
+ }
vnode_put(vp);
*retval = k.len;
}
- FREE(k.key, M_TEMP);
+ kheap_free(KHEAP_TEMP, k.key, CP_MAX_WRAPPEDKEYSIZE);
break;
}
proc_fdunlock(p);
pathlen = MAXPATHLEN;
- MALLOC(pathbufp, char *, pathlen, M_TEMP, M_WAITOK);
- if (pathbufp == NULL) {
- error = ENOMEM;
- goto outdrop;
- }
+ pathbufp = zalloc(ZV_NAMEI);
+
if ((error = vnode_getwithref(vp)) == 0) {
int backingstore = 0;
(void)vnode_put(vp);
}
}
- FREE(pathbufp, M_TEMP);
+
+ zfree(ZV_NAMEI, pathbufp);
goto outdrop;
}
numfiles = (int)lim;
}
proc_fdunlock(p);
- MALLOC(newofiles, struct fileproc **,
- numfiles * OFILESIZE, M_OFILETABL, M_WAITOK);
+ newofiles = kheap_alloc(KM_OFILETABL, numfiles * OFILESIZE,
+ Z_WAITOK);
proc_fdlock(p);
if (newofiles == NULL) {
return ENOMEM;
}
if (fdp->fd_nfiles >= numfiles) {
- FREE(newofiles, M_OFILETABL);
+ kheap_free(KM_OFILETABL, newofiles, numfiles * OFILESIZE);
continue;
}
newofileflags = (char *) &newofiles[numfiles];
fdp->fd_ofiles = newofiles;
fdp->fd_ofileflags = newofileflags;
fdp->fd_nfiles = numfiles;
- FREE(ofiles, M_OFILETABL);
+ kheap_free(KM_OFILETABL, ofiles, oldnfiles * OFILESIZE);
fdexpand++;
}
}
return ENOMEM;
}
fg = zalloc_flags(fg_zone, Z_WAITOK | Z_ZERO);
- lck_mtx_init(&fg->fg_lock, file_lck_grp, file_lck_attr);
+ lck_mtx_init(&fg->fg_lock, &file_lck_grp, LCK_ATTR_NULL);
os_ref_retain_locked(&fp->fp_iocount);
os_ref_init_raw(&fg->fg_count, &f_refgrp);
}
proc_fdunlock(p);
- MALLOC(newfdp->fd_ofiles, struct fileproc **,
- i * OFILESIZE, M_OFILETABL, M_WAITOK);
+ newfdp->fd_ofiles = kheap_alloc(KM_OFILETABL, i * OFILESIZE,
+ Z_WAITOK | Z_ZERO);
if (newfdp->fd_ofiles == NULL) {
if (newfdp->fd_cdir) {
vnode_rele(newfdp->fd_cdir);
zfree(fdp_zone, newfdp);
return NULL;
}
- (void) memset(newfdp->fd_ofiles, 0, i * OFILESIZE);
proc_fdlock(p);
newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
newfdp->fd_kqhash = NULL;
newfdp->fd_kqhashmask = 0;
newfdp->fd_wqkqueue = NULL;
- lck_mtx_init(&newfdp->fd_kqhashlock, proc_kqhashlock_grp, proc_lck_attr);
- lck_mtx_init(&newfdp->fd_knhashlock, proc_knhashlock_grp, proc_lck_attr);
+ lck_mtx_init(&newfdp->fd_kqhashlock, &proc_kqhashlock_grp, &proc_lck_attr);
+ lck_mtx_init(&newfdp->fd_knhashlock, &proc_knhashlock_grp, &proc_lck_attr);
return newfdp;
}
proc_fdlock(p);
}
}
- FREE(fdp->fd_ofiles, M_OFILETABL);
+ kheap_free(KM_OFILETABL, fdp->fd_ofiles, fdp->fd_nfiles * OFILESIZE);
fdp->fd_ofiles = NULL;
fdp->fd_nfiles = 0;
}
hashdestroy(fdp->fd_kqhash, M_KQUEUE, fdp->fd_kqhashmask);
}
- lck_mtx_destroy(&fdp->fd_kqhashlock, proc_kqhashlock_grp);
- lck_mtx_destroy(&fdp->fd_knhashlock, proc_knhashlock_grp);
+ lck_mtx_destroy(&fdp->fd_kqhashlock, &proc_kqhashlock_grp);
+ lck_mtx_destroy(&fdp->fd_knhashlock, &proc_knhashlock_grp);
zfree(fdp_zone, fdp);
}
int err;
res = ipc_object_copyin(get_task_ipcspace(p->task),
- send, MACH_MSG_TYPE_COPY_SEND, &port, 0, NULL, IPC_KMSG_FLAGS_ALLOW_IMMOVABLE_SEND);
+ send, MACH_MSG_TYPE_COPY_SEND, &port, 0, NULL, IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND);
if (res == KERN_SUCCESS) {
err = fileport_makefd(p, port, UF_EXCLOSE, retval);
#define KEV_EVTID(code) BSDDBG_CODE(DBG_BSD_KEVENT, (code))
-MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system");
+/*
+ * If you need accounting for KM_KQUEUE consider using
+ * KALLOC_HEAP_DEFINE to define a zone view.
+ */
+#define KM_KQUEUE KHEAP_DEFAULT
#define KQ_EVENT NO_EVENT64
}
}
/* free the table */
- FREE(fdp->fd_knlist, M_KQUEUE);
- fdp->fd_knlist = NULL;
+ kheap_free(KM_KQUEUE, fdp->fd_knlist,
+ fdp->fd_knlistsize * sizeof(struct klist *));
}
fdp->fd_knlistsize = 0;
goto out_locked;
}
- MALLOC(list, struct klist *,
- size * sizeof(struct klist *), M_KQUEUE, M_WAITOK);
+ list = kheap_alloc(KM_KQUEUE, size * sizeof(struct klist *),
+ Z_WAITOK);
if (list == NULL) {
ret = ENOMEM;
goto out_locked;
bzero((caddr_t)list +
fdp->fd_knlistsize * sizeof(struct klist *),
(size - fdp->fd_knlistsize) * sizeof(struct klist *));
- FREE(fdp->fd_knlist, M_KQUEUE);
+ kheap_free(KM_KQUEUE, fdp->fd_knlist,
+ fdp->fd_knlistsize * sizeof(struct klist *));
fdp->fd_knlist = list;
fdp->fd_knlistsize = size;
}
ROUNDUP64(sizeof(struct xsockstat_n));
struct kern_event_pcb *ev_pcb;
- buf = _MALLOC(item_size, M_TEMP, M_WAITOK | M_ZERO);
+ buf = kheap_alloc(KHEAP_TEMP, item_size, Z_WAITOK | Z_ZERO);
if (buf == NULL) {
return ENOMEM;
}
done:
lck_rw_done(&kev_rwlock);
- if (buf != NULL) {
- FREE(buf, M_TEMP);
- }
-
+ kheap_free(KHEAP_TEMP, buf, item_size);
return error;
}
err = copyout(kqext, ubuf, sizeof(struct kevent_extinfo) * MIN(buflen, nknotes));
out:
- if (kqext) {
- kheap_free(KHEAP_TEMP, kqext, buflen * sizeof(struct kevent_extinfo));
- kqext = NULL;
- }
+ kheap_free(KHEAP_TEMP, kqext, buflen * sizeof(struct kevent_extinfo));
if (!err) {
*retval = (int32_t)MIN(nknotes, PROC_PIDFDKQUEUE_KNOTES_MAX);
/*
* Mach things for which prototypes are unavailable from Mach headers
*/
-#define IPC_KMSG_FLAGS_ALLOW_IMMOVABLE_SEND 0x1
+#define IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND 0x1
void ipc_task_reset(
task_t task);
void ipc_thread_reset(
vm_map_set_user_wire_limit(map, (vm_size_t)proc_limitgetcur(p, RLIMIT_MEMLOCK, FALSE));
#if XNU_TARGET_OS_OSX
if (p->p_platform == PLATFORM_IOS) {
- vm_map_mark_alien(map);
+ assert(vm_map_is_alien(map));
+ } else {
+ assert(!vm_map_is_alien(map));
}
#endif /* XNU_TARGET_OS_OSX */
proc_unlock(p);
int cputype = cpu_type();
vm_map_exec(map, task, load_result.is_64bit_addr, (void *)p->p_fd->fd_rdir, cputype, cpu_subtype, reslide);
+#if XNU_TARGET_OS_OSX
+#define SINGLE_JIT_ENTITLEMENT "com.apple.security.cs.single-jit"
+
+ if (IOTaskHasEntitlement(task, SINGLE_JIT_ENTITLEMENT)) {
+ vm_map_single_jit(map);
+ }
+#endif /* XNU_TARGET_OS_OSX */
+
/*
* Close file descriptors which specify close-on-exec.
*/
/* Use excpath, which contains the copyin-ed exec path */
DTRACE_PROC1(exec, uintptr_t, excpath);
- MALLOC(ndp, struct nameidata *, sizeof(*ndp), M_TEMP, M_WAITOK | M_ZERO);
+ ndp = kheap_alloc(KHEAP_TEMP, sizeof(*ndp), Z_WAITOK | Z_ZERO);
if (ndp == NULL) {
error = ENOMEM;
goto bad_notrans;
if (imgp->ip_ndp) {
nameidone(imgp->ip_ndp);
}
- if (ndp) {
- FREE(ndp, M_TEMP);
- }
+ kheap_free(KHEAP_TEMP, ndp, sizeof(*ndp));
return error;
}
if (MACH_PORT_VALID(act->new_port)) {
kr = ipc_object_copyin(get_task_ipcspace(current_task()),
act->new_port, MACH_MSG_TYPE_COPY_SEND,
- (ipc_object_t *) &port, 0, NULL, IPC_KMSG_FLAGS_ALLOW_IMMOVABLE_SEND);
+ (ipc_object_t *) &port, 0, NULL, IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND);
if (kr != KERN_SUCCESS) {
ret = EINVAL;
int mode = psfa->psfaa_openargs.psfao_mode;
int origfd;
- MALLOC(bufp, char *, sizeof(*vap) + sizeof(*ndp), M_TEMP, M_WAITOK | M_ZERO);
+ bufp = kheap_alloc(KHEAP_TEMP,
+ sizeof(*vap) + sizeof(*ndp), Z_WAITOK | Z_ZERO);
if (bufp == NULL) {
error = ENOMEM;
break;
fileproc_alloc_init, NULL,
&origfd);
- FREE(bufp, M_TEMP);
+ kheap_free(KHEAP_TEMP, bufp, sizeof(*vap) + sizeof(*ndp));
AUDIT_SUBCALL_EXIT(uthread, error);
kr = ipc_object_copyin(get_task_ipcspace(current_task()),
psfa->psfaa_fileport, MACH_MSG_TYPE_COPY_SEND,
- (ipc_object_t *) &port, 0, NULL, IPC_KMSG_FLAGS_ALLOW_IMMOVABLE_SEND);
+ (ipc_object_t *) &port, 0, NULL, IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND);
if (kr != KERN_SUCCESS) {
error = EINVAL;
return NULL;
}
+static void
+spawn_free_macpolicyinfo(const struct user__posix_spawn_args_desc *px_args,
+ _posix_spawn_mac_policy_extensions_t psmx, int count)
+{
+ if (psmx == NULL) {
+ return;
+ }
+ for (int i = 0; i < count; i++) {
+ _ps_mac_policy_extension_t *ext = &psmx->psmx_extensions[i];
+ kheap_free(KHEAP_TEMP, ext->datap, (vm_size_t) ext->datalen);
+ }
+ kheap_free(KHEAP_TEMP, psmx, px_args->mac_extensions_size);
+}
+
static int
-spawn_copyin_macpolicyinfo(const struct user__posix_spawn_args_desc *px_args, _posix_spawn_mac_policy_extensions_t *psmxp)
+spawn_copyin_macpolicyinfo(const struct user__posix_spawn_args_desc *px_args,
+ _posix_spawn_mac_policy_extensions_t *psmxp)
{
_posix_spawn_mac_policy_extensions_t psmx = NULL;
int error = 0;
int copycnt = 0;
- int i = 0;
*psmxp = NULL;
goto bad;
}
- MALLOC(psmx, _posix_spawn_mac_policy_extensions_t, px_args->mac_extensions_size, M_TEMP, M_WAITOK);
- if ((error = copyin(px_args->mac_extensions, psmx, px_args->mac_extensions_size)) != 0) {
+ psmx = kheap_alloc(KHEAP_TEMP, px_args->mac_extensions_size, Z_WAITOK);
+ if (psmx == NULL) {
+ error = ENOMEM;
+ goto bad;
+ }
+
+ error = copyin(px_args->mac_extensions, psmx, px_args->mac_extensions_size);
+ if (error) {
goto bad;
}
goto bad;
}
- for (i = 0; i < psmx->psmx_count; i++) {
+ for (int i = 0; i < psmx->psmx_count; i++) {
_ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[i];
if (extension->datalen == 0 || extension->datalen > PAGE_SIZE) {
error = EINVAL;
goto bad;
}
#endif
- MALLOC(data, void *, (size_t)extension->datalen, M_TEMP, M_WAITOK);
- if ((error = copyin((user_addr_t)extension->data, data, (size_t)extension->datalen)) != 0) {
- FREE(data, M_TEMP);
+ data = kheap_alloc(KHEAP_TEMP, (vm_size_t) extension->datalen, Z_WAITOK);
+ if (data == NULL) {
+ error = ENOMEM;
+ goto bad;
+ }
+ error = copyin((user_addr_t)extension->data, data, (size_t)extension->datalen);
+ if (error) {
+ kheap_free(KHEAP_TEMP, data, (vm_size_t) extension->datalen);
+ error = ENOMEM;
goto bad;
}
extension->datap = data;
return 0;
bad:
- if (psmx != NULL) {
- for (i = 0; i < copycnt; i++) {
- FREE(psmx->psmx_extensions[i].datap, M_TEMP);
- }
- FREE(psmx, M_TEMP);
- }
+ spawn_free_macpolicyinfo(px_args, psmx, copycnt);
return error;
}
-
-static void
-spawn_free_macpolicyinfo(_posix_spawn_mac_policy_extensions_t psmx)
-{
- int i;
-
- if (psmx == NULL) {
- return;
- }
- for (i = 0; i < psmx->psmx_count; i++) {
- FREE(psmx->psmx_extensions[i].datap, M_TEMP);
- }
- FREE(psmx, M_TEMP);
-}
#endif /* CONFIG_MACF */
#if CONFIG_COALITIONS
* Allocate a big chunk for locals instead of using stack since these
* structures are pretty big.
*/
- MALLOC(bufp, char *, (sizeof(*imgp) + sizeof(*vap) + sizeof(*origvap)), M_TEMP, M_WAITOK | M_ZERO);
+ bufp = kheap_alloc(KHEAP_TEMP,
+ sizeof(*imgp) + sizeof(*vap) + sizeof(*origvap), Z_WAITOK | Z_ZERO);
imgp = (struct image_params *) bufp;
if (bufp == NULL) {
error = ENOMEM;
error = EINVAL;
goto bad;
}
- MALLOC(px_sfap, _posix_spawn_file_actions_t, px_args.file_actions_size, M_TEMP, M_WAITOK);
+
+ px_sfap = kheap_alloc(KHEAP_TEMP,
+ px_args.file_actions_size, Z_WAITOK);
if (px_sfap == NULL) {
error = ENOMEM;
goto bad;
goto bad;
}
- MALLOC(px_spap, _posix_spawn_port_actions_t,
- px_args.port_actions_size, M_TEMP, M_WAITOK);
+ px_spap = kheap_alloc(KHEAP_TEMP,
+ px_args.port_actions_size, Z_WAITOK);
if (px_spap == NULL) {
error = ENOMEM;
goto bad;
goto bad;
}
- MALLOC(px_persona, struct _posix_spawn_persona_info *, px_args.persona_info_size, M_TEMP, M_WAITOK | M_ZERO);
+ px_persona = kheap_alloc(KHEAP_TEMP,
+ px_args.persona_info_size, Z_WAITOK);
if (px_persona == NULL) {
error = ENOMEM;
goto bad;
goto bad;
}
- MALLOC(px_pcred_info, struct _posix_spawn_posix_cred_info *,
- px_args.posix_cred_info_size, M_TEMP, M_WAITOK | M_ZERO);
+ px_pcred_info = kheap_alloc(KHEAP_TEMP,
+ px_args.posix_cred_info_size, Z_WAITOK);
if (px_pcred_info == NULL) {
error = ENOMEM;
goto bad;
* ...AND the parent has the entitlement, copy
* the subsystem root path in.
*/
- MALLOC(subsystem_root_path, char *, px_args.subsystem_root_path_size, M_SBUF, M_WAITOK | M_ZERO | M_NULL);
+ subsystem_root_path = zalloc_flags(ZV_NAMEI, Z_WAITOK | Z_ZERO);
if (subsystem_root_path == NULL) {
error = ENOMEM;
if (imgp->ip_strings) {
execargs_free(imgp);
}
- if (imgp->ip_px_sfa != NULL) {
- FREE(imgp->ip_px_sfa, M_TEMP);
- }
- if (imgp->ip_px_spa != NULL) {
- FREE(imgp->ip_px_spa, M_TEMP);
- }
+ kheap_free(KHEAP_TEMP, imgp->ip_px_sfa,
+ px_args.file_actions_size);
+ kheap_free(KHEAP_TEMP, imgp->ip_px_spa,
+ px_args.port_actions_size);
#if CONFIG_PERSONAS
- if (imgp->ip_px_persona != NULL) {
- FREE(imgp->ip_px_persona, M_TEMP);
- }
+ kheap_free(KHEAP_TEMP, imgp->ip_px_persona,
+ px_args.persona_info_size);
#endif
- if (imgp->ip_px_pcred_info != NULL) {
- FREE(imgp->ip_px_pcred_info, M_TEMP);
- }
+ kheap_free(KHEAP_TEMP, imgp->ip_px_pcred_info,
+ px_args.posix_cred_info_size);
if (subsystem_root_path != NULL) {
- FREE(subsystem_root_path, M_SBUF);
+ zfree(ZV_NAMEI, subsystem_root_path);
}
#if CONFIG_MACF
- if (imgp->ip_px_smpx != NULL) {
- spawn_free_macpolicyinfo(imgp->ip_px_smpx);
+ _posix_spawn_mac_policy_extensions_t psmx = imgp->ip_px_smpx;
+ if (psmx) {
+ spawn_free_macpolicyinfo(&px_args,
+ psmx, psmx->psmx_count);
}
if (imgp->ip_execlabelp) {
mac_cred_label_free(imgp->ip_execlabelp);
proc_rele(p);
}
- if (bufp != NULL) {
- FREE(bufp, M_TEMP);
- }
+ kheap_free(KHEAP_TEMP, bufp,
+ sizeof(*imgp) + sizeof(*vap) + sizeof(*origvap));
if (inherit != NULL) {
ipc_importance_release(inherit);
/* Allocate a big chunk for locals instead of using stack since these
* structures a pretty big.
*/
- MALLOC(bufp, char *, (sizeof(*imgp) + sizeof(*vap) + sizeof(*origvap)), M_TEMP, M_WAITOK | M_ZERO);
+ bufp = kheap_alloc(KHEAP_TEMP,
+ sizeof(*imgp) + sizeof(*vap) + sizeof(*origvap), Z_WAITOK | Z_ZERO);
imgp = (struct image_params *) bufp;
if (bufp == NULL) {
error = ENOMEM;
proc_rele(p);
}
- if (bufp != NULL) {
- FREE(bufp, M_TEMP);
- }
+ kheap_free(KHEAP_TEMP, bufp,
+ sizeof(*imgp) + sizeof(*vap) + sizeof(*origvap));
if (inherit != NULL) {
ipc_importance_release(inherit);
#define PTRAUTH_DISABLED_FLAG "ptrauth_disabled=1"
#define DYLD_ARM64E_ABI_KEY "arm64e_abi="
#endif /* __has_feature(ptrauth_calls) */
+#define MAIN_TH_PORT_KEY "th_port="
#define FSID_MAX_STRING "0x1234567890abcdef,0x1234567890abcdef"
#define HEX_STR_LEN 18 // 64-bit hex value "0x0123456701234567"
+#define HEX_STR_LEN32 10 // 32-bit hex value "0x01234567"
static int
exec_add_entropy_key(struct image_params *imgp,
{
int error;
int img_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT_ADDR) ? 8 : 4;
+ thread_t new_thread;
+ ipc_port_t sright;
/* exec_save_path stored the first string */
imgp->ip_applec = 1;
imgp->ip_applec++;
}
#endif
+ /*
+ * Add main thread mach port name
+ * +1 uref on main thread port, this ref will be extracted by libpthread in __pthread_init
+ * and consumed in _bsdthread_terminate. Leaking the main thread port name if not linked
+ * against libpthread.
+ */
+ if ((new_thread = imgp->ip_new_thread) != THREAD_NULL) {
+ thread_reference(new_thread);
+ sright = convert_thread_to_port_pinned(new_thread);
+ task_t new_task = get_threadtask(new_thread);
+ mach_port_name_t name = ipc_port_copyout_send(sright, get_task_ipcspace(new_task));
+ char port_name_hex_str[strlen(MAIN_TH_PORT_KEY) + HEX_STR_LEN32 + 1];
+ snprintf(port_name_hex_str, sizeof(port_name_hex_str), MAIN_TH_PORT_KEY "0x%x", name);
+
+ error = exec_add_user_string(imgp, CAST_USER_ADDR_T(port_name_hex_str), UIO_SYSSPACE, FALSE);
+ if (error) {
+ goto bad;
+ }
+ imgp->ip_applec++;
+ }
/* Align the tail of the combined applev area */
while (imgp->ip_strspace % img_ptr_size != 0) {
continue;
}
- MALLOC(ndp, struct nameidata *, sizeof(*ndp), M_TEMP, M_WAITOK | M_ZERO);
+ ndp = kheap_alloc(KHEAP_TEMP,
+ sizeof(*ndp), Z_WAITOK | Z_ZERO);
if (ndp == NULL) {
fp_free(p, indx, fp);
error = ENOMEM;
if ((error = vn_open(ndp, flag, 0)) != 0) {
fp_free(p, indx, fp);
- FREE(ndp, M_TEMP);
+ kheap_free(KHEAP_TEMP, ndp, sizeof(*ndp));
break;
}
fp_drop(p, indx, fp, 1);
proc_fdunlock(p);
- FREE(ndp, M_TEMP);
+ kheap_free(KHEAP_TEMP, ndp, sizeof(*ndp));
}
}
}
static int execargs_waiters = 0;
-lck_mtx_t *execargs_cache_lock;
+static LCK_MTX_DECLARE_ATTR(execargs_cache_lock, &proc_lck_grp, &proc_lck_attr);
static void
execargs_lock_lock(void)
{
- lck_mtx_lock_spin(execargs_cache_lock);
+ lck_mtx_lock_spin(&execargs_cache_lock);
}
static void
execargs_lock_unlock(void)
{
- lck_mtx_unlock(execargs_cache_lock);
+ lck_mtx_unlock(&execargs_cache_lock);
}
static wait_result_t
execargs_lock_sleep(void)
{
- return lck_mtx_sleep(execargs_cache_lock, LCK_SLEEP_DEFAULT, &execargs_free_count, THREAD_INTERRUPTIBLE);
+ return lck_mtx_sleep(&execargs_cache_lock, LCK_SLEEP_DEFAULT, &execargs_free_count, THREAD_INTERRUPTIBLE);
}
static kern_return_t
#include <kern/assert.h>
#include <kern/policy_internal.h>
#include <kern/exc_guard.h>
+#include <kern/backtrace.h>
#include <vm/vm_protos.h>
#include <os/log.h>
os_reason_free(exit_reason);
if (current_proc() == p) {
if (p->exit_thread == self) {
- printf("exit_thread failed to exit, leaving process %s[%d] in unkillable limbo\n",
- p->p_comm, p->p_pid);
+ panic("exit_thread failed to exit");
}
if (thread_can_terminate) {
static void
proc_memorystatus_remove(proc_t p)
{
- LCK_MTX_ASSERT(proc_list_mlock, LCK_MTX_ASSERT_OWNED);
+ LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
while (memorystatus_remove(p) == EAGAIN) {
os_log(OS_LOG_DEFAULT, "memorystatus_remove: Process[%d] tried to exit while being frozen. Blocking exit until freeze completes.", p->p_pid);
- msleep(&p->p_memstat_state, proc_list_mlock, PWAIT, "proc_memorystatus_remove", NULL);
+ msleep(&p->p_memstat_state, &proc_list_mlock, PWAIT, "proc_memorystatus_remove", NULL);
}
}
#endif
if (kr != 0) {
create_corpse = TRUE;
}
+
+ /*
+ * Revalidate the code signing of the text pages around current PC.
+ * This is an attempt to detect and repair faults due to memory
+ * corruption of text pages.
+ *
+ * The goal here is to fixup infrequent memory corruptions due to
+ * things like aging RAM bit flips. So the approach is to only expect
+ * to have to fixup one thing per crash. This also limits the amount
+ * of extra work we cause in case this is a development kernel with an
+ * active memory stomp happening.
+ */
+ task_t task = proc_task(p);
+ uintptr_t bt[2];
+ int bt_err;
+ bool user64;
+ bool was_truncated;
+ unsigned int frame_count = backtrace_user(bt, 2, &bt_err, &user64, &was_truncated);
+
+ if (bt_err == 0 && frame_count >= 1) {
+ /*
+ * First check at the page containing the current PC.
+ * This passes if the page code signs -or- if we can't figure out
+ * what is at that address. The latter action is so we continue checking
+ * previous pages which may be corrupt and caused a wild branch.
+ */
+ kr = revalidate_text_page(task, bt[0]);
+
+ /* No corruption found, check the previous sequential page */
+ if (kr == KERN_SUCCESS) {
+ kr = revalidate_text_page(task, bt[0] - get_task_page_size(task));
+ }
+
+ /* Still no corruption found, check the current function's caller */
+ if (kr == KERN_SUCCESS) {
+ if (frame_count > 1 &&
+ atop(bt[0]) != atop(bt[1]) && /* don't recheck PC page */
+ atop(bt[0]) - 1 != atop(bt[1])) { /* don't recheck page before */
+ kr = revalidate_text_page(task, (vm_map_offset_t)bt[1]);
+ }
+ }
+
+ /*
+ * Log that we found a corruption.
+ * TBD..figure out how to bubble this up to crash reporter too,
+ * instead of just the log message.
+ */
+ if (kr != KERN_SUCCESS) {
+ os_log(OS_LOG_DEFAULT,
+ "Text page corruption detected in dying process %d\n", p->p_pid);
+ }
+ }
}
skipcheck:
}
/* check for sysctl zomb lookup */
while ((q->p_listflag & P_LIST_WAITING) == P_LIST_WAITING) {
- msleep(&q->p_stat, proc_list_mlock, PWAIT, "waitcoll", 0);
+ msleep(&q->p_stat, &proc_list_mlock, PWAIT, "waitcoll", 0);
}
q->p_listflag |= P_LIST_WAITING;
/*
pid, exitval, 0, 0, 0);
/* check for sysctl zomb lookup */
while ((p->p_listflag & P_LIST_WAITING) == P_LIST_WAITING) {
- msleep(&p->p_stat, proc_list_mlock, PWAIT, "waitcoll", 0);
+ msleep(&p->p_stat, &proc_list_mlock, PWAIT, "waitcoll", 0);
}
/* safe to use p as this is a system reap */
p->p_stat = SZOMB;
child->p_ucred = NOCRED;
}
- lck_mtx_destroy(&child->p_mlock, proc_mlock_grp);
- lck_mtx_destroy(&child->p_ucred_mlock, proc_ucred_mlock_grp);
- lck_mtx_destroy(&child->p_fdmlock, proc_fdmlock_grp);
+ lck_mtx_destroy(&child->p_mlock, &proc_mlock_grp);
+ lck_mtx_destroy(&child->p_ucred_mlock, &proc_ucred_mlock_grp);
+ lck_mtx_destroy(&child->p_fdmlock, &proc_fdmlock_grp);
#if CONFIG_DTRACE
- lck_mtx_destroy(&child->p_dtrace_sprlock, proc_lck_grp);
+ lck_mtx_destroy(&child->p_dtrace_sprlock, &proc_lck_grp);
#endif
- lck_spin_destroy(&child->p_slock, proc_slock_grp);
+ lck_spin_destroy(&child->p_slock, &proc_slock_grp);
+ lck_rw_destroy(&child->p_dirs_lock, &proc_dirslock_grp);
zfree(proc_zone, child);
if ((locked == 1) && (droplock == 0)) {
wait4_data->args = uap;
thread_set_pending_block_hint(current_thread(), kThreadWaitOnProcess);
- (void)msleep(&p->p_stat, proc_list_mlock, PWAIT, "waitcoll", 0);
+ (void)msleep(&p->p_stat, &proc_list_mlock, PWAIT, "waitcoll", 0);
goto loop1;
}
p->p_listflag |= P_LIST_WAITING; /* only allow single thread to wait() */
wait4_data->retval = retval;
thread_set_pending_block_hint(current_thread(), kThreadWaitOnProcess);
- if ((error = msleep0((caddr_t)q, proc_list_mlock, PWAIT | PCATCH | PDROP, "wait", 0, wait1continue))) {
+ if ((error = msleep0((caddr_t)q, &proc_list_mlock, PWAIT | PCATCH | PDROP, "wait", 0, wait1continue))) {
return error;
}
* the single return for waited process guarantee.
*/
if (p->p_listflag & P_LIST_WAITING) {
- (void) msleep(&p->p_stat, proc_list_mlock,
+ (void) msleep(&p->p_stat, &proc_list_mlock,
PWAIT, "waitidcoll", 0);
goto loop1;
}
}
goto out;
}
- ASSERT_LCK_MTX_OWNED(proc_list_mlock);
+ ASSERT_LCK_MTX_OWNED(&proc_list_mlock);
/* Not a process we are interested in; go on to next child */
p->p_listflag &= ~P_LIST_WAITING;
wakeup(&p->p_stat);
}
- ASSERT_LCK_MTX_OWNED(proc_list_mlock);
+ ASSERT_LCK_MTX_OWNED(&proc_list_mlock);
/* No child processes that could possibly satisfy the request? */
waitid_data->args = uap;
waitid_data->retval = retval;
- if ((error = msleep0(q, proc_list_mlock,
+ if ((error = msleep0(q, &proc_list_mlock,
PWAIT | PCATCH | PDROP, "waitid", 0, waitidcontinue)) != 0) {
return error;
}
}
/* check for lookups by zomb sysctl */
while ((q->p_listflag & P_LIST_WAITING) == P_LIST_WAITING) {
- msleep(&q->p_stat, proc_list_mlock, PWAIT, "waitcoll", 0);
+ msleep(&q->p_stat, &proc_list_mlock, PWAIT, "waitcoll", 0);
}
q->p_listflag |= P_LIST_WAITING;
/*
zfree(proc_sigacts_zone, p->p_sigacts);
p->p_sigacts = NULL;
- FREE(p->p_subsystem_root_path, M_SBUF);
- p->p_subsystem_root_path = NULL;
+ if (p->p_subsystem_root_path) {
+ zfree(ZV_NAMEI, p->p_subsystem_root_path);
+ }
proc_limitdrop(p);
proc_list_lock();
/* check for lookups by zomb sysctl */
while ((p->p_listflag & P_LIST_WAITING) == P_LIST_WAITING) {
- msleep(&p->p_stat, proc_list_mlock, PWAIT, "waitcoll", 0);
+ msleep(&p->p_stat, &proc_list_mlock, PWAIT, "waitcoll", 0);
}
p->p_stat = SZOMB;
p->p_listflag |= P_LIST_WAITING;
}
/*
- * Create a new thread for the child process
+ * Create a new thread for the child process. Pin it and make it immovable.
* The new thread is waiting on the event triggered by 'task_clear_return_wait'
*/
result = thread_create_waiting(child_task,
(thread_continue_t)task_wait_to_return,
task_get_return_wait_event(child_task),
+ TH_CREATE_WAITING_OPTION_PINNED | TH_CREATE_WAITING_OPTION_IMMOVABLE,
&child_thread);
if (result != KERN_SUCCESS) {
/* Update the audit session proc count */
AUDIT_SESSION_PROCEXIT(p);
- lck_mtx_destroy(&p->p_mlock, proc_mlock_grp);
- lck_mtx_destroy(&p->p_fdmlock, proc_fdmlock_grp);
- lck_mtx_destroy(&p->p_ucred_mlock, proc_ucred_mlock_grp);
+ lck_mtx_destroy(&p->p_mlock, &proc_mlock_grp);
+ lck_mtx_destroy(&p->p_fdmlock, &proc_fdmlock_grp);
+ lck_mtx_destroy(&p->p_ucred_mlock, &proc_ucred_mlock_grp);
#if CONFIG_DTRACE
- lck_mtx_destroy(&p->p_dtrace_sprlock, proc_lck_grp);
+ lck_mtx_destroy(&p->p_dtrace_sprlock, &proc_lck_grp);
#endif
- lck_spin_destroy(&p->p_slock, proc_slock_grp);
+ lck_spin_destroy(&p->p_slock, &proc_slock_grp);
+ lck_rw_destroy(&p->p_dirs_lock, &proc_dirslock_grp);
/* Release the credential reference */
kauth_cred_t tmp_ucred = p->p_ucred;
p->p_sigacts = NULL;
zfree(proc_stats_zone, p->p_stats);
p->p_stats = NULL;
- FREE(p->p_subsystem_root_path, M_SBUF);
- p->p_subsystem_root_path = NULL;
+ if (p->p_subsystem_root_path) {
+ zfree(ZV_NAMEI, p->p_subsystem_root_path);
+ }
proc_checkdeadrefs(p);
zfree(proc_zone, p);
/* update audit session proc count */
AUDIT_SESSION_PROCNEW(child_proc);
- lck_mtx_init(&child_proc->p_mlock, proc_mlock_grp, proc_lck_attr);
- lck_mtx_init(&child_proc->p_fdmlock, proc_fdmlock_grp, proc_lck_attr);
- lck_mtx_init(&child_proc->p_ucred_mlock, proc_ucred_mlock_grp, proc_lck_attr);
+ lck_mtx_init(&child_proc->p_mlock, &proc_mlock_grp, &proc_lck_attr);
+ lck_mtx_init(&child_proc->p_fdmlock, &proc_fdmlock_grp, &proc_lck_attr);
+ lck_mtx_init(&child_proc->p_ucred_mlock, &proc_ucred_mlock_grp, &proc_lck_attr);
#if CONFIG_DTRACE
- lck_mtx_init(&child_proc->p_dtrace_sprlock, proc_lck_grp, proc_lck_attr);
+ lck_mtx_init(&child_proc->p_dtrace_sprlock, &proc_lck_grp, &proc_lck_attr);
#endif
- lck_spin_init(&child_proc->p_slock, proc_slock_grp, proc_lck_attr);
+ lck_spin_init(&child_proc->p_slock, &proc_slock_grp, &proc_lck_attr);
+ lck_rw_init(&child_proc->p_dirs_lock, &proc_dirslock_grp, &proc_lck_attr);
klist_init(&child_proc->p_klist);
*
* XXX may fail to copy descriptors to child
*/
- lck_rw_init(&child_proc->p_dirs_lock, proc_dirslock_grp, proc_lck_attr);
child_proc->p_fd = fdcopy(parent_proc, parent_uthread->uu_cdir);
#if SYSV_SHM
if (parent_proc->p_subsystem_root_path) {
size_t parent_length = strlen(parent_proc->p_subsystem_root_path) + 1;
- MALLOC(child_proc->p_subsystem_root_path, char *, parent_length, M_SBUF, M_WAITOK | M_ZERO);
+ assert(parent_length <= MAXPATHLEN);
+ child_proc->p_subsystem_root_path = zalloc_flags(ZV_NAMEI,
+ Z_WAITOK | Z_ZERO);
memcpy(child_proc->p_subsystem_root_path, parent_proc->p_subsystem_root_path, parent_length);
}
void
proc_lock(proc_t p)
{
- LCK_MTX_ASSERT(proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
+ LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
lck_mtx_lock(&p->p_mlock);
}
void
proc_spinlock(proc_t p)
{
- lck_spin_lock_grp(&p->p_slock, proc_slock_grp);
+ lck_spin_lock_grp(&p->p_slock, &proc_slock_grp);
}
void
void
proc_list_lock(void)
{
- lck_mtx_lock(proc_list_mlock);
+ lck_mtx_lock(&proc_list_mlock);
}
void
proc_list_unlock(void)
{
- lck_mtx_unlock(proc_list_mlock);
+ lck_mtx_unlock(&proc_list_mlock);
}
void
void
uthread_cleanup(task_t task, void *uthread, void * bsd_info)
{
- struct _select *sel;
uthread_t uth = (uthread_t)uthread;
proc_t p = (proc_t)bsd_info;
kqueue_threadreq_unbind(p, uth->uu_kqr_bound);
}
- sel = &uth->uu_select;
- /* cleanup the select bit space */
- if (sel->nbytes) {
- FREE(sel->ibits, M_TEMP);
- FREE(sel->obits, M_TEMP);
- sel->nbytes = 0;
+ if (uth->uu_select.nbytes) {
+ select_cleanup_uthread(&uth->uu_select);
}
if (uth->uu_cdir) {
if (waitq_set_is_valid(uth->uu_wqset)) {
waitq_set_deinit(uth->uu_wqset);
}
- FREE(uth->uu_wqset, M_SELECT);
+ kheap_free(KHEAP_DEFAULT, uth->uu_wqset, uth->uu_wqstate_sz);
uth->uu_wqset = NULL;
uth->uu_wqstate_sz = 0;
}
}
static int label_slot;
-static lck_rw_t llock;
-static lck_grp_t *llock_grp;
+static LCK_GRP_DECLARE(llock_grp, VNG_POLICY_NAME);
+static LCK_RW_DECLARE(llock, &llock_grp);
static __inline void *
vng_lbl_get(struct label *label)
if (vng_policy_flags & kVNG_POLICY_EXC_CORPSE) {
char *path;
int len = MAXPATHLEN;
- MALLOC(path, char *, len, M_TEMP, M_WAITOK);
+
+ path = zalloc(ZV_NAMEI);
+
os_reason_t r = NULL;
if (NULL != path) {
vn_getpath(vp, path, &len);
if (NULL != r) {
os_reason_free(r);
}
- if (NULL != path) {
- FREE(path, M_TEMP);
- }
+
+ zfree(ZV_NAMEI, path);
} else {
thread_t t = current_thread();
thread_guard_violation(t, code, subcode, TRUE);
* Configuration gorp
*/
-static void
-vng_init(struct mac_policy_conf *mpc)
-{
- llock_grp = lck_grp_alloc_init(mpc->mpc_name, LCK_GRP_ATTR_NULL);
- lck_rw_init(&llock, llock_grp, LCK_ATTR_NULL);
-}
-
SECURITY_READ_ONLY_EARLY(static struct mac_policy_ops) vng_policy_ops = {
.mpo_file_label_destroy = vng_file_label_destroy,
.mpo_vnode_check_open = vng_vnode_check_open,
.mpo_policy_syscall = vng_policy_syscall,
- .mpo_policy_init = vng_init,
};
static const char *vng_labelnames[] = {
static int kpc_initted = 0;
-static lck_grp_attr_t *sysctl_lckgrp_attr = NULL;
-static lck_grp_t *sysctl_lckgrp = NULL;
-static lck_mtx_t sysctl_lock;
+static LCK_GRP_DECLARE(sysctl_lckgrp, "kpc");
+static LCK_MTX_DECLARE(sysctl_lock, &sysctl_lckgrp);
/*
* Another element is needed to hold the CPU number when getting counter values.
void
kpc_init(void)
{
- sysctl_lckgrp_attr = lck_grp_attr_alloc_init();
- sysctl_lckgrp = lck_grp_alloc_init("kpc", sysctl_lckgrp_attr);
- lck_mtx_init(&sysctl_lock, sysctl_lckgrp, LCK_ATTR_NULL);
-
kpc_arch_init();
kpc_initted = 1;
kern_return_t ktrace_background_available_notify_user(void);
-static lck_mtx_t *ktrace_mtx;
+static LCK_GRP_DECLARE(ktrace_grp, "ktrace");
+static LCK_MTX_DECLARE(ktrace_mtx, &ktrace_grp);
/*
* The overall state of ktrace, whether it is unconfigured, in foreground mode,
ktrace_lock(void)
{
if (!ktrace_single_threaded) {
- lck_mtx_lock(ktrace_mtx);
+ lck_mtx_lock(&ktrace_mtx);
}
}
ktrace_unlock(void)
{
if (!ktrace_single_threaded) {
- lck_mtx_unlock(ktrace_mtx);
+ lck_mtx_unlock(&ktrace_mtx);
}
}
ktrace_assert_lock_held(void)
{
if (!ktrace_single_threaded) {
- lck_mtx_assert(ktrace_mtx, LCK_MTX_ASSERT_OWNED);
+ lck_mtx_assert(&ktrace_mtx, LCK_MTX_ASSERT_OWNED);
}
}
ktrace_unlock();
return ret;
}
-
-/* This should only be called from the bootstrap thread. */
-void
-ktrace_init(void)
-{
- static lck_grp_attr_t *lock_grp_attr = NULL;
- static lck_grp_t *lock_grp = NULL;
- static bool initialized = false;
-
- if (initialized) {
- return;
- }
-
- lock_grp_attr = lck_grp_attr_alloc_init();
- lock_grp = lck_grp_alloc_init("ktrace", lock_grp_attr);
- lck_grp_attr_free(lock_grp_attr);
-
- ktrace_mtx = lck_mtx_alloc_init(lock_grp, LCK_ATTR_NULL);
- assert(ktrace_mtx != NULL);;
- initialized = true;
-}
#define LOCKF_DEBUG(mask, ...) /* mask */
#endif /* !LOCKF_DEBUGGING */
-MALLOC_DEFINE(M_LOCKF, "lockf", "Byte-range locking structures");
+/*
+ * If you need accounting for KM_LOCKF consider using
+ * ZONE_VIEW_DEFINE to define a view.
+ */
+#define KM_LOCKF KHEAP_DEFAULT
#define NOLOCKF (struct lockf *)0
#define SELF 0x1
static void lf_adjust_assertion(struct lockf *block);
#endif /* IMPORTANCE_INHERITANCE */
-static lck_mtx_t lf_dead_lock;
-static lck_grp_t *lf_dead_lock_grp;
-
-void
-lf_init(void)
-{
- lf_dead_lock_grp = lck_grp_alloc_init("lf_dead_lock", LCK_GRP_ATTR_NULL);
- lck_mtx_init(&lf_dead_lock, lf_dead_lock_grp, LCK_ATTR_NULL);
-}
+static LCK_GRP_DECLARE(lf_dead_lock_grp, "lf_dead_lock");
+static LCK_MTX_DECLARE(lf_dead_lock, &lf_dead_lock_grp);
/*
* lf_advlock
/*
* Create the lockf structure
*/
- MALLOC(lock, struct lockf *, sizeof *lock, M_LOCKF, M_WAITOK);
+ lock = kheap_alloc(KM_LOCKF, sizeof(struct lockf), Z_WAITOK);
if (lock == NULL) {
return ENOLCK;
}
case F_UNLCK:
error = lf_clearlock(lock);
- FREE(lock, M_LOCKF);
+ kheap_free(KM_LOCKF, lock, sizeof(struct lockf));
break;
case F_GETLK:
error = lf_getlock(lock, fl, -1);
- FREE(lock, M_LOCKF);
+ kheap_free(KM_LOCKF, lock, sizeof(struct lockf));
break;
case F_GETLKPID:
error = lf_getlock(lock, fl, fl->l_pid);
- FREE(lock, M_LOCKF);
+ kheap_free(KM_LOCKF, lock, sizeof(struct lockf));
break;
default:
- FREE(lock, M_LOCKF);
+ kheap_free(KM_LOCKF, lock, sizeof(struct lockf));
error = EINVAL;
break;
}
lf_move_blocked(lock, adjacent);
- FREE(adjacent, M_LOCKF);
+ kheap_free(KM_LOCKF, adjacent, sizeof(struct lockf));
continue;
}
/* If the lock starts adjacent to us, we can coalesce it */
lf_move_blocked(lock, adjacent);
- FREE(adjacent, M_LOCKF);
+ kheap_free(KM_LOCKF, adjacent, sizeof(struct lockf));
continue;
}
*/
if ((lock->lf_flags & F_WAIT) == 0) {
DTRACE_FSINFO(advlock__nowait, vnode_t, vp);
- FREE(lock, M_LOCKF);
+ kheap_free(KM_LOCKF, lock, sizeof(struct lockf));
return EAGAIN;
}
LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p which is me, so EDEADLK\n", lock);
proc_unlock(wproc);
lck_mtx_unlock(&lf_dead_lock);
- FREE(lock, M_LOCKF);
+ kheap_free(KM_LOCKF, lock, sizeof(struct lockf));
return EDEADLK;
}
}
lock->lf_type == F_WRLCK) {
lock->lf_type = F_UNLCK;
if ((error = lf_clearlock(lock)) != 0) {
- FREE(lock, M_LOCKF);
+ kheap_free(KM_LOCKF, lock, sizeof(struct lockf));
return error;
}
lock->lf_type = F_WRLCK;
if (!TAILQ_EMPTY(&lock->lf_blkhd)) {
lf_wakelock(lock, TRUE);
}
- FREE(lock, M_LOCKF);
+ kheap_free(KM_LOCKF, lock, sizeof(struct lockf));
/* Return ETIMEDOUT if timeout occoured. */
if (error == EWOULDBLOCK) {
error = ETIMEDOUT;
}
overlap->lf_type = lock->lf_type;
lf_move_blocked(overlap, lock);
- FREE(lock, M_LOCKF);
+ kheap_free(KM_LOCKF, lock, sizeof(struct lockf));
lock = overlap; /* for lf_coalesce_adjacent() */
break;
*/
if (overlap->lf_type == lock->lf_type) {
lf_move_blocked(overlap, lock);
- FREE(lock, M_LOCKF);
+ kheap_free(KM_LOCKF, lock, sizeof(struct lockf));
lock = overlap; /* for lf_coalesce_adjacent() */
break;
}
* resource shortage.
*/
if (lf_split(overlap, lock)) {
- FREE(lock, M_LOCKF);
+ kheap_free(KM_LOCKF, lock, sizeof(struct lockf));
return ENOLCK;
}
}
} else {
*prev = overlap->lf_next;
}
- FREE(overlap, M_LOCKF);
+ kheap_free(KM_LOCKF, overlap, sizeof(struct lockf));
continue;
case OVERLAP_STARTS_BEFORE_LOCK:
case OVERLAP_EQUALS_LOCK:
*prev = overlap->lf_next;
- FREE(overlap, M_LOCKF);
+ kheap_free(KM_LOCKF, overlap, sizeof(struct lockf));
break;
case OVERLAP_CONTAINS_LOCK: /* split it */
case OVERLAP_CONTAINED_BY_LOCK:
*prev = overlap->lf_next;
lf = overlap->lf_next;
- FREE(overlap, M_LOCKF);
+ kheap_free(KM_LOCKF, overlap, sizeof(struct lockf));
continue;
case OVERLAP_STARTS_BEFORE_LOCK:
* Make a new lock consisting of the last part of
* the encompassing lock
*/
- MALLOC(splitlock, struct lockf *, sizeof *splitlock, M_LOCKF, M_WAITOK);
+ splitlock = kheap_alloc(KM_LOCKF, sizeof(struct lockf), Z_WAITOK);
if (splitlock == NULL) {
return ENOLCK;
}
lock->lf_type == F_RDLCK ? "shared" :
lock->lf_type == F_WRLCK ? "exclusive" :
lock->lf_type == F_UNLCK ? "unlock" : "unknown",
- (intmax_t)lock->lf_start, (intmax_t)lock->lf_end);
+ (uint64_t)lock->lf_start, (uint64_t)lock->lf_end);
} else {
printf(" %s, start 0x%016llx, end 0x%016llx",
lock->lf_type == F_RDLCK ? "shared" :
lock->lf_type == F_WRLCK ? "exclusive" :
lock->lf_type == F_UNLCK ? "unlock" : "unknown",
- (intmax_t)lock->lf_start, (intmax_t)lock->lf_end);
+ (uint64_t)lock->lf_start, (uint64_t)lock->lf_end);
}
if (!TAILQ_EMPTY(&lock->lf_blkhd)) {
printf(" block %p\n", (void *)TAILQ_FIRST(&lock->lf_blkhd));
lf->lf_type == F_RDLCK ? "shared" :
lf->lf_type == F_WRLCK ? "exclusive" :
lf->lf_type == F_UNLCK ? "unlock" :
- "unknown", (intmax_t)lf->lf_start, (intmax_t)lf->lf_end);
+ "unknown", (uint64_t)lf->lf_start, (uint64_t)lf->lf_end);
TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) {
printf("\n\t\tlock request %p for ", (void *)blk);
if (blk->lf_flags & F_POSIX) {
blk->lf_type == F_RDLCK ? "shared" :
blk->lf_type == F_WRLCK ? "exclusive" :
blk->lf_type == F_UNLCK ? "unlock" :
- "unknown", (intmax_t)blk->lf_start,
- (intmax_t)blk->lf_end);
+ "unknown", (uint64_t)blk->lf_start,
+ (uint64_t)blk->lf_end);
if (!TAILQ_EMPTY(&blk->lf_blkhd)) {
panic("lf_printlist: bad list");
}
#if DEVELOPMENT || DEBUG
/*
* On development and debug kernels, we allow one pid to take ownership
- * of the memorystatus snapshot (via memorystatus_control).
- * If there's an owner, then only they may consume the snapshot.
- * This is used when testing the snapshot interface to avoid racing with other
- * processes on the system that consume snapshots.
+ * of some memorystatus data structures for testing purposes (via memorystatus_control).
+ * If there's an owner, then only they may consume the jetsam snapshot & set freezer probabilities.
+ * This is used when testing these interface to avoid racing with other
+ * processes on the system that typically use them (namely OSAnalytics & dasd).
*/
-static pid_t memorystatus_snapshot_owner = 0;
-SYSCTL_INT(_kern, OID_AUTO, memorystatus_snapshot_owner, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_snapshot_owner, 0, "");
+static pid_t memorystatus_testing_pid = 0;
+SYSCTL_INT(_kern, OID_AUTO, memorystatus_testing_pid, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_testing_pid, 0, "");
#endif /* DEVELOPMENT || DEBUG */
static void memorystatus_init_jetsam_snapshot_header(memorystatus_jetsam_snapshot_t *snapshot);
#endif /* DEVELOPMENT || DEBUG */
#endif /* __arm64__ */
-static lck_grp_attr_t *memorystatus_jetsam_fg_band_lock_grp_attr;
-static lck_grp_t *memorystatus_jetsam_fg_band_lock_grp;
-lck_mtx_t memorystatus_jetsam_fg_band_lock;
+static LCK_GRP_DECLARE(memorystatus_jetsam_fg_band_lock_grp,
+ "memorystatus_jetsam_fg_band");
+LCK_MTX_DECLARE(memorystatus_jetsam_fg_band_lock,
+ &memorystatus_jetsam_fg_band_lock_grp);
/* Idle guard handling */
int memlimit_mb_active = 0, memlimit_mb_inactive = 0;
boolean_t memlimit_active_is_fatal = FALSE, memlimit_inactive_is_fatal = FALSE, use_active_limit = FALSE;
- LCK_MTX_ASSERT(proc_list_mlock, LCK_MTX_ASSERT_OWNED);
+ LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
if (p->p_memstat_memlimit_active > 0) {
memlimit_mb_active = p->p_memstat_memlimit_active;
#if DEVELOPMENT || DEBUG
-lck_grp_attr_t *disconnect_page_mappings_lck_grp_attr;
-lck_grp_t *disconnect_page_mappings_lck_grp;
-static lck_mtx_t disconnect_page_mappings_mutex;
+static LCK_GRP_DECLARE(disconnect_page_mappings_lck_grp, "disconnect_page_mappings");
+static LCK_MTX_DECLARE(disconnect_page_mappings_mutex, &disconnect_page_mappings_lck_grp);
extern bool kill_on_no_paging_space;
#endif /* DEVELOPMENT || DEBUG */
static void
memorystatus_sort_bucket_locked(unsigned int bucket_index, int sort_order)
{
- LCK_MTX_ASSERT(proc_list_mlock, LCK_MTX_ASSERT_OWNED);
+ LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
if (memstat_bucket[bucket_index].count == 0) {
return;
}
#endif
#if DEVELOPMENT || DEBUG
- disconnect_page_mappings_lck_grp_attr = lck_grp_attr_alloc_init();
- disconnect_page_mappings_lck_grp = lck_grp_alloc_init("disconnect_page_mappings", disconnect_page_mappings_lck_grp_attr);
-
- lck_mtx_init(&disconnect_page_mappings_mutex, disconnect_page_mappings_lck_grp, NULL);
-
if (kill_on_no_paging_space) {
max_kill_priority = JETSAM_PRIORITY_MAX;
}
#endif
- memorystatus_jetsam_fg_band_lock_grp_attr = lck_grp_attr_alloc_init();
- memorystatus_jetsam_fg_band_lock_grp =
- lck_grp_alloc_init("memorystatus_jetsam_fg_band", memorystatus_jetsam_fg_band_lock_grp_attr);
- lck_mtx_init(&memorystatus_jetsam_fg_band_lock, memorystatus_jetsam_fg_band_lock_grp, NULL);
-
/* Init buckets */
for (i = 0; i < MEMSTAT_BUCKET_COUNT; i++) {
TAILQ_INIT(&memstat_bucket[i].list);
/* Centralised for the purposes of allowing panic-on-jetsam */
extern void
vm_run_compactor(void);
+extern void
+vm_wake_compactor_swapper(void);
/*
* The jetsam no frills kill call
KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_COMPACTOR_RUN)) | DBG_FUNC_START,
victim_pid, cause, vm_page_free_count, *footprint_of_killed_proc, 0);
- vm_run_compactor();
+ if (jetsam_reason->osr_code == JETSAM_REASON_VNODE) {
+ /*
+ * vnode jetsams are syncronous and not caused by memory pressure.
+ * Running the compactor on this thread adds significant latency to the filesystem operation
+ * that triggered this jetsam.
+ * Kick of compactor thread asyncronously instead.
+ */
+ vm_wake_compactor_swapper();
+ } else {
+ vm_run_compactor();
+ }
KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_COMPACTOR_RUN)) | DBG_FUNC_END,
victim_pid, cause, vm_page_free_count, 0, 0);
#endif
#if DEVELOPMENT || DEBUG
- if (p->p_pid == memorystatus_snapshot_owner) {
- memorystatus_snapshot_owner = 0;
+ if (p->p_pid == memorystatus_testing_pid) {
+ memorystatus_testing_pid = 0;
}
#endif /* DEVELOPMENT || DEBUG */
p->p_memstat_state |= P_MEMSTAT_REFREEZE_ELIGIBLE;
memorystatus_refreeze_eligible_count++;
}
+ if (p->p_memstat_thaw_count == 0 || p->p_memstat_last_thaw_interval < memorystatus_freeze_current_interval) {
+ os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_thawed), relaxed);
+ }
+ p->p_memstat_last_thaw_interval = memorystatus_freeze_current_interval;
p->p_memstat_thaw_count++;
memorystatus_thaw_count++;
static bool
memorystatus_jetsam_snapshot_copy_entry_locked(memorystatus_jetsam_snapshot_t *dst_snapshot, unsigned int dst_snapshot_size, const memorystatus_jetsam_snapshot_entry_t *src_entry)
{
- LCK_MTX_ASSERT(proc_list_mlock, LCK_MTX_ASSERT_OWNED);
+ LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
assert(dst_snapshot);
if (dst_snapshot->entry_count == dst_snapshot_size) {
static bool
memorystatus_init_jetsam_snapshot_entry_with_kill_locked(memorystatus_jetsam_snapshot_t *snapshot, proc_t p, uint32_t kill_cause, uint64_t killtime, memorystatus_jetsam_snapshot_entry_t **entry)
{
- LCK_MTX_ASSERT(proc_list_mlock, LCK_MTX_ASSERT_OWNED);
+ LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
memorystatus_jetsam_snapshot_entry_t *snapshot_list = snapshot->entries;
size_t i = snapshot->entry_count;
bool copied_to_freezer_snapshot = false;
#endif /* CONFIG_FREEZE */
- LCK_MTX_ASSERT(proc_list_mlock, LCK_MTX_ASSERT_OWNED);
+ LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
if (memorystatus_jetsam_snapshot_count == 0) {
/*
memorystatus_jetsam_snapshot_entry_t *snapshot_list = NULL;
unsigned int snapshot_max = 0;
- LCK_MTX_ASSERT(proc_list_mlock, LCK_MTX_ASSERT_OWNED);
+ LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
if (od_snapshot) {
/*
static int
memorystatus_verify_sort_order(unsigned int bucket_index, pid_t *expected_order, size_t num_pids)
{
- LCK_MTX_ASSERT(proc_list_mlock, LCK_MTX_ASSERT_OWNED);
+ LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
int error = 0;
proc_t p = NULL;
*/
proc_list_lock();
#if DEVELOPMENT || DEBUG
- if (memorystatus_snapshot_owner != 0 && memorystatus_snapshot_owner != current_proc()->p_pid) {
+ if (memorystatus_testing_pid != 0 && memorystatus_testing_pid != current_proc()->p_pid) {
/* Snapshot is currently owned by someone else. Don't consume it. */
proc_list_unlock();
goto out;
#if DEVELOPMENT || DEBUG
static int
-memorystatus_cmd_set_jetsam_snapshot_ownership(int32_t flags)
+memorystatus_cmd_set_testing_pid(int32_t flags)
{
int error = EINVAL;
proc_t caller = current_proc();
assert(caller != kernproc);
proc_list_lock();
- if (flags & MEMORYSTATUS_FLAGS_SNAPSHOT_TAKE_OWNERSHIP) {
- if (memorystatus_snapshot_owner == 0) {
- memorystatus_snapshot_owner = caller->p_pid;
+ if (flags & MEMORYSTATUS_FLAGS_SET_TESTING_PID) {
+ if (memorystatus_testing_pid == 0) {
+ memorystatus_testing_pid = caller->p_pid;
error = 0;
- } else if (memorystatus_snapshot_owner == caller->p_pid) {
+ } else if (memorystatus_testing_pid == caller->p_pid) {
error = 0;
} else {
/* We don't allow ownership to be taken from another proc. */
error = EBUSY;
}
- } else if (flags & MEMORYSTATUS_FLAGS_SNAPSHOT_DROP_OWNERSHIP) {
- if (memorystatus_snapshot_owner == caller->p_pid) {
- memorystatus_snapshot_owner = 0;
+ } else if (flags & MEMORYSTATUS_FLAGS_UNSET_TESTING_PID) {
+ if (memorystatus_testing_pid == caller->p_pid) {
+ memorystatus_testing_pid = 0;
error = 0;
- } else if (memorystatus_snapshot_owner != 0) {
+ } else if (memorystatus_testing_pid != 0) {
/* We don't allow ownership to be taken from another proc. */
error = EPERM;
}
size_t entry_count = 0, i = 0;
memorystatus_internal_probabilities_t *tmp_table_new = NULL, *tmp_table_old = NULL;
size_t tmp_table_new_size = 0, tmp_table_old_size = 0;
+#if DEVELOPMENT || DEBUG
+ if (memorystatus_testing_pid != 0 && memorystatus_testing_pid != current_proc()->p_pid) {
+ /* probabilites are currently owned by someone else. Don't change them. */
+ error = EPERM;
+ goto out;
+ }
+#endif /* (DEVELOPMENT || DEBUG)*/
/* Verify inputs */
if ((buffer == USER_ADDR_NULL) || (buffer_size == 0)) {
{
int error = 0;
- LCK_MTX_ASSERT(proc_list_mlock, LCK_MTX_ASSERT_OWNED);
+ LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
/*
* Store the active limit variants in the proc.
error = memorystatus_cmd_get_jetsam_snapshot((int32_t)args->flags, args->buffer, args->buffersize, ret);
break;
#if DEVELOPMENT || DEBUG
- case MEMORYSTATUS_CMD_SET_JETSAM_SNAPSHOT_OWNERSHIP:
- error = memorystatus_cmd_set_jetsam_snapshot_ownership((int32_t) args->flags);
+ case MEMORYSTATUS_CMD_SET_TESTING_PID:
+ error = memorystatus_cmd_set_testing_pid((int32_t) args->flags);
break;
#endif
case MEMORYSTATUS_CMD_GET_PRESSURE_STATUS:
#if CONFIG_FREEZE
-lck_grp_attr_t *freezer_lck_grp_attr;
-lck_grp_t *freezer_lck_grp;
-static lck_mtx_t freezer_mutex;
+static LCK_GRP_DECLARE(freezer_lck_grp, "freezer");
+static LCK_MTX_DECLARE(freezer_mutex, &freezer_lck_grp);
/* Thresholds */
unsigned int memorystatus_freeze_threshold = 0;
uint64_t memorystatus_thaw_count_since_boot = 0; /* The number of thaws since boot */
unsigned int memorystatus_refreeze_eligible_count = 0; /* # of processes currently thawed i.e. have state on disk & in-memory */
-/* Freezer counters collected for telemtry */
-static struct memorystatus_freezer_stats_t {
- /*
- * # of processes that we've considered freezing.
- * Used to normalize the error reasons below.
- */
- uint64_t mfs_process_considered_count;
-
- /*
- * The following counters track how many times we've failed to freeze
- * a process because of a specific FREEZER_ERROR.
- */
- /* EXCESS_SHARED_MEMORY */
- uint64_t mfs_error_excess_shared_memory_count;
- /* LOW_PRIVATE_SHARED_RATIO */
- uint64_t mfs_error_low_private_shared_ratio_count;
- /* NO_COMPRESSOR_SPACE */
- uint64_t mfs_error_no_compressor_space_count;
- /* NO_SWAP_SPACE */
- uint64_t mfs_error_no_swap_space_count;
- /* pages < memorystatus_freeze_pages_min */
- uint64_t mfs_error_below_min_pages_count;
- /* dasd determined it was unlikely to be relaunched. */
- uint64_t mfs_error_low_probability_of_use_count;
- /* transient reasons (like inability to acquire a lock). */
- uint64_t mfs_error_other_count;
-
- /*
- * # of times that we saw memorystatus_available_pages <= memorystatus_freeze_threshold.
- * Used to normalize skipped_full_count and shared_mb_high_count.
- */
- uint64_t mfs_below_threshold_count;
-
- /* Skipped running the freezer because we were out of slots */
- uint64_t mfs_skipped_full_count;
-
- /* Skipped running the freezer because we were over the shared mb limit*/
- uint64_t mfs_skipped_shared_mb_high_count;
-
- /*
- * How many pages have not been sent to swap because they were in a shared object?
- * This is being used to gather telemtry so we can understand the impact we'd have
- * on our NAND budget if we did swap out these pages.
- */
- uint64_t mfs_shared_pages_skipped;
-
- /*
- * A running sum of the total number of bytes sent to NAND during
- * refreeze operations since boot.
- */
- uint64_t mfs_bytes_refrozen;
- /* The number of refreeze operations since boot */
- uint64_t mfs_refreeze_count;
-} memorystatus_freezer_stats = {0};
+struct memorystatus_freezer_stats_t memorystatus_freezer_stats = {0};
#endif /* XNU_KERNEL_PRIVATE */
};
throttle_interval_t *degraded_throttle_window = &throttle_intervals[0];
throttle_interval_t *normal_throttle_window = &throttle_intervals[1];
+uint32_t memorystatus_freeze_current_interval = 0;
extern uint64_t vm_swap_get_free_space(void);
extern boolean_t vm_swap_max_budget(uint64_t *);
SYSCTL_UINT(_kern, OID_AUTO, memorystatus_thaw_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_thaw_count, 0, "");
SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_thaw_count_since_boot, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_thaw_count_since_boot, "");
SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_pageouts, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_pageouts, "");
+SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_interval, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_current_interval, 0, "");
#if DEVELOPMENT || DEBUG
static int sysctl_memorystatus_freeze_budget_pages_remaining SYSCTL_HANDLER_ARGS
{
static int sysctl_memorystatus_freezer_thaw_percentage SYSCTL_HANDLER_ARGS
{
#pragma unused(arg1, arg2)
- size_t thaw_count = 0, frozen_count = 0;
+ uint64_t thaw_count = 0, frozen_count = 0;
int thaw_percentage = 100;
- unsigned int band = (unsigned int) memorystatus_freeze_jetsam_band;
- proc_t p = PROC_NULL;
- proc_list_lock();
-
- p = memorystatus_get_first_proc_locked(&band, FALSE);
+ frozen_count = os_atomic_load(&(memorystatus_freezer_stats.mfs_processes_frozen), relaxed);
+ thaw_count = os_atomic_load(&(memorystatus_freezer_stats.mfs_processes_thawed), relaxed);
- while (p) {
- if (p->p_memstat_state & P_MEMSTAT_FROZEN) {
- if (p->p_memstat_thaw_count > 0) {
- thaw_count++;
- }
- frozen_count++;
- }
- p = memorystatus_get_next_proc_locked(&band, p, FALSE);
- }
- proc_list_unlock();
if (frozen_count > 0) {
- assert(thaw_count <= frozen_count);
- thaw_percentage = (int)(100 * thaw_count / frozen_count);
+ if (thaw_count > frozen_count) {
+ /*
+ * Both counts are using relaxed atomics & could be out of sync
+ * causing us to see thaw_percentage > 100.
+ */
+ thaw_percentage = 100;
+ } else {
+ thaw_percentage = (int)(100 * thaw_count / frozen_count);
+ }
}
return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
}
#define FREEZER_ERROR_STRING_LENGTH 128
+EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_pages_min, &memorystatus_freeze_pages_min, 0, UINT32_MAX, "");
+EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_pages_max, &memorystatus_freeze_pages_max, 0, UINT32_MAX, "");
+EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_processes_max, &memorystatus_frozen_processes_max, 0, UINT32_MAX, "");
+EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_jetsam_band, &memorystatus_freeze_jetsam_band, JETSAM_PRIORITY_IDLE, JETSAM_PRIORITY_MAX - 1, "");
+EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_private_shared_pages_ratio, &memorystatus_freeze_private_shared_pages_ratio, 0, UINT32_MAX, "");
+EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_min_processes, &memorystatus_freeze_suspended_threshold, 0, UINT32_MAX, "");
+/*
+ * max. # of frozen process demotions we will allow in our daily cycle.
+ */
+EXPERIMENT_FACTOR_UINT(_kern, memorystatus_max_freeze_demotions_daily, &memorystatus_max_frozen_demotions_daily, 0, UINT32_MAX, "");
+
+/*
+ * min # of thaws needed by a process to protect it from getting demoted into the IDLE band.
+ */
+EXPERIMENT_FACTOR_UINT(_kern, memorystatus_thaw_count_demotion_threshold, &memorystatus_thaw_count_demotion_threshold, 0, UINT32_MAX, "");
+
#if DEVELOPMENT || DEBUG
-SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_jetsam_band, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_jetsam_band, 0, "");
SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_daily_mb_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_daily_mb_max, 0, "");
SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_degraded_mode, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_degradation, 0, "");
SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_threshold, 0, "");
-SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_pages_min, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_pages_min, 0, "");
-SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_pages_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_pages_max, 0, "");
SYSCTL_UINT(_kern, OID_AUTO, memorystatus_refreeze_eligible_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_refreeze_eligible_count, 0, "");
-SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_processes_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_frozen_processes_max, 0, "");
/*
* Max. shared-anonymous memory in MB that can be held by frozen processes in the high jetsam band.
SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_shared_mb, 0, "");
SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb_per_process_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_shared_mb_per_process_max, 0, "");
-SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_private_shared_pages_ratio, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_private_shared_pages_ratio, 0, "");
-
-SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_min_processes, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_suspended_threshold, 0, "");
-
-/*
- * max. # of frozen process demotions we will allow in our daily cycle.
- */
-SYSCTL_UINT(_kern, OID_AUTO, memorystatus_max_freeze_demotions_daily, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_max_frozen_demotions_daily, 0, "");
-/*
- * min # of thaws needed by a process to protect it from getting demoted into the IDLE band.
- */
-SYSCTL_UINT(_kern, OID_AUTO, memorystatus_thaw_count_demotion_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_thaw_count_demotion_threshold, 0, "");
boolean_t memorystatus_freeze_throttle_enabled = TRUE;
SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_throttle_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_throttle_enabled, 0, "");
p->p_memstat_state |= P_MEMSTAT_FROZEN;
p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
memorystatus_frozen_count++;
+ os_atomic_inc(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
if (memorystatus_frozen_count == memorystatus_frozen_processes_max) {
memorystatus_freeze_out_of_slots();
}
for (j = 0; j < entry_count; j++) {
if (strncmp(memorystatus_global_probabilities_table[j].proc_name,
p->p_name,
- MAXCOMLEN + 1) == 0) {
+ MAXCOMLEN) == 0) {
probability_of_use = memorystatus_global_probabilities_table[j].use_probability;
break;
}
kern_return_t result;
thread_t thread;
- freezer_lck_grp_attr = lck_grp_attr_alloc_init();
- freezer_lck_grp = lck_grp_alloc_init("freezer", freezer_lck_grp_attr);
-
- lck_mtx_init(&freezer_mutex, freezer_lck_grp, NULL);
-
/*
* This is just the default value if the underlying
* storage device doesn't have any specific budget.
* Called with proc_list_lock held.
*/
- LCK_MTX_ASSERT(proc_list_mlock, LCK_MTX_ASSERT_OWNED);
+ LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
boolean_t should_freeze = FALSE;
uint32_t state = 0, pages = 0;
if (entry_count) {
for (i = 0; i < entry_count; i++) {
+ /*
+ * NB: memorystatus_internal_probabilities.proc_name is MAXCOMLEN + 1 bytes
+ * proc_t.p_name is 2*MAXCOMLEN + 1 bytes. So we only compare the first
+ * MAXCOMLEN bytes here since the name in the probabilities table could
+ * be truncated from the proc_t's p_name.
+ */
if (strncmp(memorystatus_global_probabilities_table[i].proc_name,
p->p_name,
- MAXCOMLEN + 1) == 0) {
+ MAXCOMLEN) == 0) {
probability_of_use = memorystatus_global_probabilities_table[i].use_probability;
break;
}
p->p_memstat_state |= P_MEMSTAT_FROZEN;
p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
memorystatus_frozen_count++;
+ os_atomic_inc(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
if (memorystatus_frozen_count == memorystatus_frozen_processes_max) {
memorystatus_freeze_out_of_slots();
}
p->p_memstat_state |= P_MEMSTAT_FROZEN;
p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
memorystatus_frozen_count++;
+ os_atomic_inc(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
if (memorystatus_frozen_count == memorystatus_frozen_processes_max) {
memorystatus_freeze_out_of_slots();
}
} else {
p->p_memstat_state |= P_MEMSTAT_FREEZE_IGNORE;
}
- memorystatus_freeze_handle_error(p, p->p_memstat_state & P_MEMSTAT_FROZEN, freezer_error_code, aPid, coal, "memorystatus_freeze_top_process");
+ memorystatus_freeze_handle_error(p, freezer_error_code, p->p_memstat_state & P_MEMSTAT_FROZEN, aPid, coal, "memorystatus_freeze_top_process");
proc_rele_locked(p);
return ret;
}
+#if DEVELOPMENT || DEBUG
+/* For testing memorystatus_freeze_top_process */
+static int
+sysctl_memorystatus_freeze_top_process SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+ int error, val;
+ /*
+ * Only freeze on write to prevent freezing during `sysctl -a`.
+ * The actual value written doesn't matter.
+ */
+ error = sysctl_handle_int(oidp, &val, 0, req);
+ if (error || !req->newptr) {
+ return error;
+ }
+ lck_mtx_lock(&freezer_mutex);
+ int ret = memorystatus_freeze_top_process();
+ lck_mtx_unlock(&freezer_mutex);
+ if (ret == -1) {
+ ret = ESRCH;
+ }
+ return ret;
+}
+SYSCTL_PROC(_vm, OID_AUTO, memorystatus_freeze_top_process, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED,
+ 0, 0, &sysctl_memorystatus_freeze_top_process, "I", "");
+#endif /* DEVELOPMENT || DEBUG */
+
static inline boolean_t
memorystatus_can_freeze_processes(void)
{
memorystatus_freeze_mark_eligible_processes_with_skip_reason(memorystatus_freeze_skip_reason_t reason, bool locked)
{
LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
- LCK_MTX_ASSERT(proc_list_mlock, locked ? LCK_MTX_ASSERT_OWNED : LCK_MTX_ASSERT_NOTOWNED);
+ LCK_MTX_ASSERT(&proc_list_mlock, locked ? LCK_MTX_ASSERT_OWNED : LCK_MTX_ASSERT_NOTOWNED);
unsigned int band = JETSAM_PRIORITY_IDLE;
proc_t p;
memorystatus_freeze_start_normal_throttle_interval(uint32_t new_budget, mach_timespec_t start_ts)
{
LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
- LCK_MTX_ASSERT(proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
+ LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
normal_throttle_window->max_pageouts = new_budget;
normal_throttle_window->ts.tv_sec = normal_throttle_window->mins * 60;
}
/* Ensure the normal window is now active. */
memorystatus_freeze_degradation = FALSE;
+ memorystatus_freezer_stats.mfs_shared_pages_skipped = 0;
+ /*
+ * Reset the thawed percentage to 0 so we re-evaluate in the new interval.
+ */
+ os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed, 0, release);
+ os_atomic_store(&memorystatus_freezer_stats.mfs_processes_frozen, memorystatus_frozen_count, release);
+ os_atomic_inc(&memorystatus_freeze_current_interval, release);
}
#if DEVELOPMENT || DEBUG
memorystatus_freeze_out_of_budget(const struct throttle_interval_t *interval)
{
LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
- LCK_MTX_ASSERT(proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
+ LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
mach_timespec_t time_left = {0, 0};
mach_timespec_t now_ts;
memorystatus_freeze_out_of_slots(void)
{
LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
- LCK_MTX_ASSERT(proc_list_mlock, LCK_MTX_ASSERT_OWNED);
+ LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
assert(memorystatus_frozen_count == memorystatus_frozen_processes_max);
os_log(OS_LOG_DEFAULT,
clock_nsec_t nsec;
mach_timespec_t now_ts;
LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
- LCK_MTX_ASSERT(proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
+ LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
unsigned int freeze_daily_pageouts_max = 0;
uint32_t budget_rollover = 0;
interval->mins, budget_rollover),
now_ts);
*budget_pages_allowed = interval->max_pageouts;
- memorystatus_freezer_stats.mfs_shared_pages_skipped = 0;
memorystatus_demote_frozen_processes(FALSE); /* normal mode...don't force a demotion */
} else {
static uint64_t cacheconfig[10], cachesize[10];
static int packages;
-static char * osenvironment;
+static char * osenvironment = NULL;
static uint32_t osenvironment_size = 0;
static int osenvironment_initialized = 0;
uint32_t use_recovery_securityd:1;
} property_existence = {0, 0};
-SYSCTL_NODE(, 0, sysctl, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
+SYSCTL_EXTENSIBLE_NODE(, 0, sysctl, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
"Sysctl internal magic");
-SYSCTL_NODE(, CTL_KERN, kern, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
+SYSCTL_EXTENSIBLE_NODE(, CTL_KERN, kern, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
"High kernel, proc, limits &c");
-SYSCTL_NODE(, CTL_VM, vm, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
+SYSCTL_EXTENSIBLE_NODE(, CTL_VM, vm, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
"Virtual memory");
-SYSCTL_NODE(, CTL_VFS, vfs, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
+SYSCTL_EXTENSIBLE_NODE(, CTL_VFS, vfs, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
"File system");
-SYSCTL_NODE(, CTL_NET, net, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
+SYSCTL_EXTENSIBLE_NODE(, CTL_NET, net, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
"Network, (see socket.h)");
-SYSCTL_NODE(, CTL_DEBUG, debug, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
+SYSCTL_EXTENSIBLE_NODE(, CTL_DEBUG, debug, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
"Debugging");
SYSCTL_NODE(, CTL_HW, hw, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
"hardware");
-SYSCTL_NODE(, CTL_MACHDEP, machdep, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
+SYSCTL_EXTENSIBLE_NODE(, CTL_MACHDEP, machdep, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
"machine dependent");
SYSCTL_NODE(, CTL_USER, user, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
"user-level");
return sysctl_io_number(req, l, sizeof(l), NULL, NULL);
}
+/*
+ * Called by IOKit on Intel, or by sysctl_load_devicetree_entries()
+ */
void
sysctl_set_osenvironment(unsigned int size, const void* value)
{
if (osenvironment_size == 0 && size > 0) {
- MALLOC(osenvironment, char *, size, M_TEMP, M_WAITOK);
+ osenvironment = zalloc_permanent(size, ZALIGN_NONE);
if (osenvironment) {
memcpy(osenvironment, value, size);
osenvironment_size = size;
* PE_init_iokit(). Doing this also avoids the extern-C hackery to access these entries
* from IORegistry (which requires C++).
*/
-void
+__startup_func
+static void
sysctl_load_devicetree_entries(void)
{
DTEntry chosen;
/* load osenvironment */
if (kSuccess == SecureDTGetProperty(chosen, "osenvironment", (void const **) &value, &size)) {
- MALLOC(osenvironment, char *, size, M_TEMP, M_WAITOK);
- if (osenvironment) {
- memcpy(osenvironment, value, size);
- osenvironment_size = size;
- }
+ sysctl_set_osenvironment(size, value);
}
/* load ephemeral_storage */
}
}
}
+STARTUP(SYSCTL, STARTUP_RANK_MIDDLE, sysctl_load_devicetree_entries);
static int
sysctl_osenvironment
/*
* Optional device hardware features can be registered by drivers below hw.features
*/
-SYSCTL_NODE(_hw, OID_AUTO, features, CTLFLAG_RD | CTLFLAG_LOCKED, NULL, "hardware features");
+SYSCTL_EXTENSIBLE_NODE(_hw, OID_AUTO, features, CTLFLAG_RD | CTLFLAG_LOCKED, NULL, "hardware features");
/*
* Deprecated variables. These are supported for backwards compatibility
void
sysctl_mib_init(void)
{
- cputhreadtype = cpu_threadtype();
#if defined(__i386__) || defined (__x86_64__)
cpu64bit = (_get_cpu_capabilities() & k64Bit) == k64Bit;
#elif defined(__arm__) || defined (__arm64__)
#error Unsupported arch
#endif
- /*
- * Populate the optional portion of the hw.* MIB.
- *
- * XXX This could be broken out into parts of the code
- * that actually directly relate to the functions in
- * question.
- */
-
- if (cputhreadtype != CPU_THREADTYPE_NONE) {
- sysctl_register_oid(&sysctl__hw_cputhreadtype);
- }
-
#if defined (__i386__) || defined (__x86_64__)
/* hw.cacheconfig */
cacheconfig[0] = ml_cpu_cache_sharing(0);
cachesize[4] = 0;
packages = 1;
-
#else
#error unknown architecture
#endif /* !__i386__ && !__x86_64 && !__arm__ && !__arm64__ */
}
+
+__startup_func
+static void
+sysctl_mib_startup(void)
+{
+ cputhreadtype = cpu_threadtype();
+
+ /*
+ * Populate the optional portion of the hw.* MIB.
+ *
+ * XXX This could be broken out into parts of the code
+ * that actually directly relate to the functions in
+ * question.
+ */
+
+ if (cputhreadtype != CPU_THREADTYPE_NONE) {
+ sysctl_register_oid_early(&sysctl__hw_cputhreadtype);
+ }
+
+}
+STARTUP(SYSCTL, STARTUP_RANK_MIDDLE, sysctl_mib_startup);
/* make sure mapping fits into numeric range etc */
- if (os_add3_overflow(file_pos, user_size, PAGE_SIZE_64 - 1, &sum)) {
+ if (os_add3_overflow(file_pos, user_size, vm_map_page_size(user_map) - 1, &sum)) {
return EINVAL;
}
}
KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0);
-#ifndef CONFIG_EMBEDDED
+#if XNU_TARGET_OS_OSX
KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32),
(uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0);
-#endif
+#endif /* XNU_TARGET_OS_OSX */
return error;
}
user_map = current_map();
addr = (mach_vm_offset_t) uap->addr;
size = (mach_vm_size_t) uap->len;
-#ifndef CONFIG_EMBEDDED
+#if XNU_TARGET_OS_OSX
KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_msync) | DBG_FUNC_NONE), (uint32_t)(addr >> 32), (uint32_t)(size >> 32), 0, 0, 0);
-#endif
+#endif /* XNU_TARGET_OS_OSX */
if (mach_vm_range_overflows(addr, size)) {
return EINVAL;
}
req_vec_size_pages = (end - addr) >> effective_page_shift;
cur_vec_size_pages = MIN(req_vec_size_pages, (MAX_PAGE_RANGE_QUERY >> effective_page_shift));
+ size_t kernel_vec_size = cur_vec_size_pages;
- kernel_vec = (void*) _MALLOC(cur_vec_size_pages * sizeof(char), M_TEMP, M_WAITOK | M_ZERO);
+ kernel_vec = kheap_alloc(KHEAP_TEMP, kernel_vec_size, Z_WAITOK | Z_ZERO);
if (kernel_vec == NULL) {
return ENOMEM;
vec = uap->vec;
pqueryinfo_vec_size = cur_vec_size_pages * sizeof(struct vm_page_info_basic);
- info = (void*) _MALLOC(pqueryinfo_vec_size, M_TEMP, M_WAITOK);
+
+ info = kheap_alloc(KHEAP_TEMP, pqueryinfo_vec_size, Z_WAITOK);
if (info == NULL) {
- FREE(kernel_vec, M_TEMP);
+ kheap_free(KHEAP_TEMP, kernel_vec, kernel_vec_size);
return ENOMEM;
}
first_addr = addr;
}
- FREE(kernel_vec, M_TEMP);
- FREE(info, M_TEMP);
+ kheap_free(KHEAP_TEMP, info, pqueryinfo_vec_size);
+ kheap_free(KHEAP_TEMP, kernel_vec, kernel_vec_size);
if (error) {
return EFAULT;
*/
+#include <kern/counter.h>
#include <sys/param.h>
#include <sys/buf.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/sysproto.h>
+#include <os/atomic_private.h>
+
#include <security/audit/audit.h>
#include <pexpert/pexpert.h>
+#include <IOKit/IOBSD.h>
+
#if CONFIG_MACF
#include <security/mac_framework.h>
#endif
#include <ptrauth.h>
#endif /* defined(HAS_APPLE_PAC) */
-lck_grp_t * sysctl_lock_group = NULL;
-lck_rw_t * sysctl_geometry_lock = NULL;
-lck_mtx_t * sysctl_unlocked_node_lock = NULL;
+static LCK_GRP_DECLARE(sysctl_lock_group, "sysctl");
+static LCK_RW_DECLARE(sysctl_geometry_lock, &sysctl_lock_group);
+static LCK_MTX_DECLARE(sysctl_unlocked_node_lock, &sysctl_lock_group);
/*
* Conditionally allow dtrace to see these functions for debugging purposes.
int *name, u_int namelen, struct sysctl_req *req,
size_t *retval);
-struct sysctl_oid_list sysctl__children; /* root list */
+SECURITY_READ_ONLY_LATE(struct sysctl_oid_list) sysctl__children; /* root list */
+__SYSCTL_EXTENSION_NODE();
/*
* Initialization of the MIB tree.
* Order by number in each list.
*/
+static void
+sysctl_register_oid_locked(struct sysctl_oid *new_oidp,
+ struct sysctl_oid *oidp)
+{
+ struct sysctl_oid_list *parent = new_oidp->oid_parent;
+ struct sysctl_oid_list *parent_rw = NULL;
+ struct sysctl_oid *p, **prevp;
+
+ p = SLIST_FIRST(parent);
+ if (p && p->oid_number == OID_MUTABLE_ANCHOR) {
+ parent_rw = p->oid_arg1;
+ }
+
+ if (oidp->oid_number == OID_AUTO) {
+ int n = OID_AUTO_START;
+
+ /*
+ * If this oid has a number OID_AUTO, give it a number which
+ * is greater than any current oid. Make sure it is at least
+ * OID_AUTO_START to leave space for pre-assigned oid numbers.
+ */
+
+ SLIST_FOREACH_PREVPTR(p, prevp, parent, oid_link) {
+ if (p->oid_number >= n) {
+ n = p->oid_number + 1;
+ }
+ }
+
+ if (parent_rw) {
+ SLIST_FOREACH_PREVPTR(p, prevp, parent_rw, oid_link) {
+ if (p->oid_number >= n) {
+ n = p->oid_number + 1;
+ }
+ }
+ }
+
+ /*
+ * Reflect the number in an allocated OID into the template
+ * of the caller for sysctl_unregister_oid() compares.
+ */
+ oidp->oid_number = new_oidp->oid_number = n;
+ } else {
+ /*
+ * Insert the oid into the parent's list in order.
+ */
+ SLIST_FOREACH_PREVPTR(p, prevp, parent, oid_link) {
+ if (oidp->oid_number == p->oid_number) {
+ panic("attempting to register a sysctl at previously registered slot : %d",
+ oidp->oid_number);
+ } else if (oidp->oid_number < p->oid_number) {
+ break;
+ }
+ }
+
+ if (parent_rw) {
+ SLIST_FOREACH_PREVPTR(p, prevp, parent_rw, oid_link) {
+ if (oidp->oid_number == p->oid_number) {
+ panic("attempting to register a sysctl at previously registered slot : %d",
+ oidp->oid_number);
+ } else if (oidp->oid_number < p->oid_number) {
+ break;
+ }
+ }
+ }
+ }
+
+#if defined(HAS_APPLE_PAC)
+ if (oidp->oid_handler) {
+ /*
+ * Sign oid_handler address-discriminated upon installation to make it
+ * harder to replace with an arbitrary function pointer. Blend with
+ * a hash of oid_arg1 for robustness against memory corruption.
+ */
+ oidp->oid_handler = ptrauth_auth_and_resign(oidp->oid_handler,
+ ptrauth_key_function_pointer,
+ ptrauth_function_pointer_type_discriminator(typeof(oidp->oid_handler)),
+ ptrauth_key_function_pointer,
+ ptrauth_blend_discriminator(&oidp->oid_handler,
+ os_hash_kernel_pointer(oidp->oid_arg1)));
+ }
+#endif /* defined(HAS_APPLE_PAC) */
+
+ SLIST_NEXT(oidp, oid_link) = *prevp;
+ *prevp = oidp;
+}
+
void
sysctl_register_oid(struct sysctl_oid *new_oidp)
{
- struct sysctl_oid *oidp = NULL;
- struct sysctl_oid_list *parent = new_oidp->oid_parent;
- struct sysctl_oid *p;
- struct sysctl_oid *q;
- int n;
+ struct sysctl_oid *oidp;
+
+ if (new_oidp->oid_number < OID_AUTO) {
+ panic("trying to register a node %p with an invalid oid_number: %d",
+ new_oidp, new_oidp->oid_number);
+ }
+ if (new_oidp->oid_kind & CTLFLAG_PERMANENT) {
+ panic("Use sysctl_register_oid_early to register permanent nodes");
+ }
/*
* The OID can be old-style (needs copy), new style without an earlier
if (!(new_oidp->oid_kind & CTLFLAG_OID2)) {
#if __x86_64__
/*
- * XXX: M_TEMP is perhaps not the most apropriate zone, as it
+ * XXX: KHEAP_DEFAULT is perhaps not the most apropriate zone, as it
* XXX: will subject us to use-after-free by other consumers.
*/
- MALLOC(oidp, struct sysctl_oid *, sizeof(*oidp), M_TEMP, M_WAITOK | M_ZERO);
+ oidp = kheap_alloc(KHEAP_DEFAULT, sizeof(struct sysctl_oid),
+ Z_WAITOK | Z_ZERO);
if (oidp == NULL) {
return; /* reject: no memory */
}
* Note: We may want to set the oid_descr to the
* oid_name (or "") at some future date.
*/
- *oidp = *new_oidp;
+ memcpy(oidp, new_oidp, offsetof(struct sysctl_oid, oid_descr));
#else
panic("Old style sysctl without a version number isn't supported");
#endif
}
}
- /* Get the write lock to modify the geometry */
- lck_rw_lock_exclusive(sysctl_geometry_lock);
-
- /*
- * If this oid has a number OID_AUTO, give it a number which
- * is greater than any current oid. Make sure it is at least
- * OID_AUTO_START to leave space for pre-assigned oid numbers.
- */
- if (oidp->oid_number == OID_AUTO) {
- /* First, find the highest oid in the parent list >OID_AUTO_START-1 */
- n = OID_AUTO_START;
- SLIST_FOREACH(p, parent, oid_link) {
- if (p->oid_number > n) {
- n = p->oid_number;
- }
- }
- oidp->oid_number = n + 1;
- /*
- * Reflect the number in an llocated OID into the template
- * of the caller for sysctl_unregister_oid() compares.
- */
- if (oidp != new_oidp) {
- new_oidp->oid_number = oidp->oid_number;
- }
- }
+ lck_rw_lock_exclusive(&sysctl_geometry_lock);
+ sysctl_register_oid_locked(new_oidp, oidp);
+ lck_rw_unlock_exclusive(&sysctl_geometry_lock);
+}
-#if defined(HAS_APPLE_PAC)
- if (oidp->oid_handler) {
- /*
- * Sign oid_handler address-discriminated upon installation to make it
- * harder to replace with an arbitrary function pointer. Blend with
- * a hash of oid_arg1 for robustness against memory corruption.
- */
- oidp->oid_handler = ptrauth_auth_and_resign(oidp->oid_handler,
- ptrauth_key_function_pointer,
- ptrauth_function_pointer_type_discriminator(typeof(oidp->oid_handler)),
- ptrauth_key_function_pointer,
- ptrauth_blend_discriminator(&oidp->oid_handler,
- os_hash_kernel_pointer(oidp->oid_arg1)));
- }
-#endif /* defined(HAS_APPLE_PAC) */
+__startup_func
+void
+sysctl_register_oid_early(struct sysctl_oid *oidp)
+{
+ assert((oidp->oid_kind & CTLFLAG_OID2) &&
+ (oidp->oid_kind & CTLFLAG_PERMANENT) &&
+ oidp->oid_version == SYSCTL_OID_VERSION);
+ assert(startup_phase < STARTUP_SUB_SYSCTL);
/*
- * Insert the oid into the parent's list in order.
+ * Clear the flag so that callers can use sysctl_register_oid_early
+ * again if they wish to register their node.
*/
- q = NULL;
- SLIST_FOREACH(p, parent, oid_link) {
- if (oidp->oid_number == p->oid_number) {
- panic("attempting to register a sysctl at previously registered slot : %d", oidp->oid_number);
- } else if (oidp->oid_number < p->oid_number) {
- break;
- }
- q = p;
- }
- if (q) {
- SLIST_INSERT_AFTER(q, oidp, oid_link);
- } else {
- SLIST_INSERT_HEAD(parent, oidp, oid_link);
+ if (oidp->oid_kind & CTLFLAG_NOAUTO) {
+ oidp->oid_kind &= ~CTLFLAG_NOAUTO;
+ return;
}
- /* Release the write lock */
- lck_rw_unlock_exclusive(sysctl_geometry_lock);
+ sysctl_register_oid_locked(oidp, oidp);
}
void
struct sysctl_oid *removed_oidp = NULL; /* OID removed from tree */
#if __x86_64__
struct sysctl_oid *old_oidp = NULL; /* OID compatibility copy */
-#else
- struct sysctl_oid *const old_oidp = NULL;
#endif
+ struct sysctl_oid_list *lsp;
/* Get the write lock to modify the geometry */
- lck_rw_lock_exclusive(sysctl_geometry_lock);
+ lck_rw_lock_exclusive(&sysctl_geometry_lock);
+
+ lsp = oidp->oid_parent;
+ if (SLIST_FIRST(lsp) && SLIST_FIRST(lsp)->oid_number == OID_MUTABLE_ANCHOR) {
+ lsp = SLIST_FIRST(lsp)->oid_arg1;
+ }
+
+ if (oidp->oid_kind & CTLFLAG_PERMANENT) {
+ panic("Trying to unregister permanent sysctl %p", oidp);
+ }
if (!(oidp->oid_kind & CTLFLAG_OID2)) {
#if __x86_64__
* partial structure; when we find a match, we remove it
* normally and free the memory.
*/
- SLIST_FOREACH(old_oidp, oidp->oid_parent, oid_link) {
+ SLIST_FOREACH(old_oidp, lsp, oid_link) {
if (!memcmp(&oidp->oid_number, &old_oidp->oid_number, (offsetof(struct sysctl_oid, oid_descr) - offsetof(struct sysctl_oid, oid_number)))) {
break;
}
}
if (old_oidp != NULL) {
- SLIST_REMOVE(old_oidp->oid_parent, old_oidp, sysctl_oid, oid_link);
+ SLIST_REMOVE(lsp, old_oidp, sysctl_oid, oid_link);
removed_oidp = old_oidp;
}
#else
switch (oidp->oid_version) {
case SYSCTL_OID_VERSION:
/* We can just remove the OID directly... */
- SLIST_REMOVE(oidp->oid_parent, oidp, sysctl_oid, oid_link);
+ SLIST_REMOVE(lsp, oidp, sysctl_oid, oid_link);
removed_oidp = oidp;
break;
default:
}
#if defined(HAS_APPLE_PAC)
- if (removed_oidp && removed_oidp->oid_handler && old_oidp == NULL) {
+ if (removed_oidp && removed_oidp->oid_handler) {
/*
* Revert address-discriminated signing performed by
* sysctl_register_oid() (in case this oid is registered again).
* Note: oidp could be NULL if it wasn't found.
*/
while (removed_oidp && removed_oidp->oid_refcnt) {
- lck_rw_sleep(sysctl_geometry_lock, LCK_SLEEP_EXCLUSIVE, &removed_oidp->oid_refcnt, THREAD_UNINT);
+ lck_rw_sleep(&sysctl_geometry_lock, LCK_SLEEP_EXCLUSIVE,
+ &removed_oidp->oid_refcnt, THREAD_UNINT);
}
/* Release the write lock */
- lck_rw_unlock_exclusive(sysctl_geometry_lock);
+ lck_rw_unlock_exclusive(&sysctl_geometry_lock);
- if (old_oidp != NULL) {
#if __x86_64__
- /* If it was allocated, free it after dropping the lock */
- FREE(old_oidp, M_TEMP);
+ /* If it was allocated, free it after dropping the lock */
+ kheap_free(KHEAP_DEFAULT, old_oidp, sizeof(struct sysctl_oid));
#endif
- }
-}
-
-/*
- * Bulk-register all the oids in a linker_set.
- */
-void
-sysctl_register_set(const char *set)
-{
- struct sysctl_oid **oidpp, *oidp;
-
- LINKER_SET_FOREACH(oidpp, struct sysctl_oid **, set) {
- oidp = *oidpp;
- if (!(oidp->oid_kind & CTLFLAG_NOAUTO)) {
- sysctl_register_oid(oidp);
- }
- }
-}
-
-void
-sysctl_unregister_set(const char *set)
-{
- struct sysctl_oid **oidpp, *oidp;
-
- LINKER_SET_FOREACH(oidpp, struct sysctl_oid **, set) {
- oidp = *oidpp;
- if (!(oidp->oid_kind & CTLFLAG_NOAUTO)) {
- sysctl_unregister_oid(oidp);
- }
- }
}
/*
}
#endif
-/*
- * Register the kernel's oids on startup.
- */
-
-void
-sysctl_early_init(void)
-{
- /*
- * Initialize the geometry lock for reading/modifying the
- * sysctl tree. This is done here because IOKit registers
- * some sysctl's before bsd_init() would otherwise perform
- * subsystem initialization.
- */
-
- sysctl_lock_group = lck_grp_alloc_init("sysctl", NULL);
- sysctl_geometry_lock = lck_rw_alloc_init(sysctl_lock_group, NULL);
- sysctl_unlocked_node_lock = lck_mtx_alloc_init(sysctl_lock_group, NULL);
-
- sysctl_register_set("__sysctl_set");
- sysctl_load_devicetree_entries();
-}
-
/*
* New handler interface
* If the sysctl caller (user mode or kernel mode) is interested in the
return error;
}
+/*
+ * SYSCTL_OID enumerators
+ *
+ * Because system OIDs are immutable, they are composed of 2 lists hanging from
+ * a first dummy OID_MUTABLE_ANCHOR node that has an immutable list hanging from
+ * its `oid_parent` field and a mutable list hanging from its oid_arg1 one.
+ *
+ * Those enumerators abstract away the implicit merging of those two lists in
+ * two possible order:
+ * - oid_number order (which will interleave both sorted lists)
+ * - system order which will list the immutable list first,
+ * and the mutable list second.
+ */
+struct sysctl_oid_iterator {
+ struct sysctl_oid *a;
+ struct sysctl_oid *b;
+};
+
+static struct sysctl_oid_iterator
+sysctl_oid_iterator_begin(struct sysctl_oid_list *l)
+{
+ struct sysctl_oid_iterator it = { };
+ struct sysctl_oid *a = SLIST_FIRST(l);
+
+ if (a == NULL) {
+ return it;
+ }
+
+ if (a->oid_number == OID_MUTABLE_ANCHOR) {
+ it.a = SLIST_NEXT(a, oid_link);
+ it.b = SLIST_FIRST((struct sysctl_oid_list *)a->oid_arg1);
+ } else {
+ it.a = a;
+ }
+ return it;
+}
+
+static struct sysctl_oid *
+sysctl_oid_iterator_next_num_order(struct sysctl_oid_iterator *it)
+{
+ struct sysctl_oid *a = it->a;
+ struct sysctl_oid *b = it->b;
+
+ if (a == NULL && b == NULL) {
+ return NULL;
+ }
+
+ if (a == NULL) {
+ it->b = SLIST_NEXT(b, oid_link);
+ return b;
+ }
+
+ if (b == NULL || a->oid_number <= b->oid_number) {
+ it->a = SLIST_NEXT(a, oid_link);
+ return a;
+ }
+
+ it->b = SLIST_NEXT(b, oid_link);
+ return b;
+}
+
+#define SYSCTL_OID_FOREACH_NUM_ORDER(oidp, l) \
+ for (struct sysctl_oid_iterator it = sysctl_oid_iterator_begin(l); \
+ ((oidp) = sysctl_oid_iterator_next_num_order(&it)); )
+
+static struct sysctl_oid *
+sysctl_oid_iterator_next_system_order(struct sysctl_oid_iterator *it)
+{
+ struct sysctl_oid *a = it->a;
+ struct sysctl_oid *b = it->b;
+
+ if (a) {
+ it->a = SLIST_NEXT(a, oid_link);
+ return a;
+ }
+
+ if (b) {
+ it->b = SLIST_NEXT(b, oid_link);
+ return b;
+ }
+
+ return NULL;
+}
+
+#define SYSCTL_OID_FOREACH_SYS_ORDER(oidp, l) \
+ for (struct sysctl_oid_iterator it = sysctl_oid_iterator_begin(l); \
+ ((oidp) = sysctl_oid_iterator_next_system_order(&it)); )
+
/*
* "Staff-functions"
*
STATIC void
sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i)
{
- int k;
struct sysctl_oid *oidp;
+ struct sysctl_oid_list *lp;
+ const char *what;
- SLIST_FOREACH(oidp, l, oid_link) {
- for (k = 0; k < i; k++) {
- printf(" ");
+ SYSCTL_OID_FOREACH_SYS_ORDER(oidp, l) {
+ switch (oidp->oid_kind & CTLTYPE) {
+ case CTLTYPE_NODE:
+ lp = oidp->oid_arg1;
+ what = "Node ";
+ if (lp && SLIST_FIRST(lp) &&
+ SLIST_FIRST(lp)->oid_number == OID_MUTABLE_ANCHOR) {
+ what = "NodeExt";
+ } else {
+ }
+ break;
+ case CTLTYPE_INT:
+ what = "Int ";
+ break;
+ case CTLTYPE_STRING:
+ what = "String ";
+ break;
+ case CTLTYPE_QUAD:
+ what = "Quad ";
+ break;
+ case CTLTYPE_OPAQUE:
+ what = "Opaque ";
+ break;
+ default:
+ what = "Unknown";
+ break;
}
- printf("%d %s ", oidp->oid_number, oidp->oid_name);
-
- printf("%c%c%c",
+ printf("%*s%-3d[%c%c%c%c%c] %s %s\n", i, "", oidp->oid_number,
oidp->oid_kind & CTLFLAG_LOCKED ? 'L':' ',
oidp->oid_kind & CTLFLAG_RD ? 'R':' ',
- oidp->oid_kind & CTLFLAG_WR ? 'W':' ');
-
- if (oidp->oid_handler) {
- printf(" *Handler");
- }
+ oidp->oid_kind & CTLFLAG_WR ? 'W':' ',
+ oidp->oid_kind & CTLFLAG_PERMANENT ? ' ':'*',
+ oidp->oid_handler ? 'h' : ' ',
+ what, oidp->oid_name);
- switch (oidp->oid_kind & CTLTYPE) {
- case CTLTYPE_NODE:
- printf(" Node\n");
+ if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
if (!oidp->oid_handler) {
- sysctl_sysctl_debug_dump_node(
- oidp->oid_arg1, i + 2);
+ sysctl_sysctl_debug_dump_node(lp, i + 2);
}
- break;
- case CTLTYPE_INT: printf(" Int\n"); break;
- case CTLTYPE_STRING: printf(" String\n"); break;
- case CTLTYPE_QUAD: printf(" Quad\n"); break;
- case CTLTYPE_OPAQUE: printf(" Opaque/struct\n"); break;
- default: printf("\n");
}
}
}
sysctl_sysctl_debug(__unused struct sysctl_oid *oidp, __unused void *arg1,
__unused int arg2, __unused struct sysctl_req *req)
{
- lck_rw_lock_shared(sysctl_geometry_lock);
+ lck_rw_lock_shared(&sysctl_geometry_lock);
sysctl_sysctl_debug_dump_node(&sysctl__children, 0);
- lck_rw_done(sysctl_geometry_lock);
+ lck_rw_done(&sysctl_geometry_lock);
return ENOENT;
}
struct sysctl_oid_list *lsp = &sysctl__children, *lsp2;
char tempbuf[10] = {};
- lck_rw_lock_shared(sysctl_geometry_lock);
+ lck_rw_lock_shared(&sysctl_geometry_lock);
while (namelen) {
if (!lsp) {
snprintf(tempbuf, sizeof(tempbuf), "%d", *name);
error = SYSCTL_OUT(req, tempbuf, strlen(tempbuf));
}
if (error) {
- lck_rw_done(sysctl_geometry_lock);
+ lck_rw_done(&sysctl_geometry_lock);
return error;
}
namelen--;
continue;
}
lsp2 = 0;
- SLIST_FOREACH(oid, lsp, oid_link) {
+ SYSCTL_OID_FOREACH_NUM_ORDER(oid, lsp) {
if (oid->oid_number != *name) {
continue;
}
strlen(oid->oid_name));
}
if (error) {
- lck_rw_done(sysctl_geometry_lock);
+ lck_rw_done(&sysctl_geometry_lock);
return error;
}
}
lsp = lsp2;
}
- lck_rw_done(sysctl_geometry_lock);
+ lck_rw_done(&sysctl_geometry_lock);
return SYSCTL_OUT(req, "", 1);
}
struct sysctl_oid *oidp;
*len = level;
- SLIST_FOREACH(oidp, lsp, oid_link) {
+ SYSCTL_OID_FOREACH_NUM_ORDER(oidp, lsp) {
*next = oidp->oid_number;
*oidpp = oidp;
struct sysctl_oid_list *lsp = &sysctl__children;
int newoid[CTL_MAXNAME] = {};
- lck_rw_lock_shared(sysctl_geometry_lock);
+ lck_rw_lock_shared(&sysctl_geometry_lock);
i = sysctl_sysctl_next_ls(lsp, name, namelen, newoid, &j, 1, &oid);
- lck_rw_done(sysctl_geometry_lock);
+ lck_rw_done(&sysctl_geometry_lock);
if (i) {
return ENOENT;
}
STATIC int
name2oid(char *name, int *oid, size_t *len)
{
- char i;
+ struct sysctl_oid_iterator it;
struct sysctl_oid *oidp;
- struct sysctl_oid_list *lsp = &sysctl__children;
char *p;
+ char i;
if (!*name) {
return ENOENT;
*p = '\0';
}
- oidp = SLIST_FIRST(lsp);
+ it = sysctl_oid_iterator_begin(&sysctl__children);
+ oidp = sysctl_oid_iterator_next_system_order(&it);
while (oidp && *len < CTL_MAXNAME) {
if (strcmp(name, oidp->oid_name)) {
- oidp = SLIST_NEXT(oidp, oid_link);
+ oidp = sysctl_oid_iterator_next_system_order(&it);
continue;
}
*oid++ = oidp->oid_number;
break;
}
- lsp = (struct sysctl_oid_list *)oidp->oid_arg1;
- oidp = SLIST_FIRST(lsp);
+ it = sysctl_oid_iterator_begin(oidp->oid_arg1);
+ oidp = sysctl_oid_iterator_next_system_order(&it);
+
*p = i; /* restore */
name = p + 1;
for (p = name; *p && *p != '.'; p++) {
return ENAMETOOLONG;
}
- MALLOC(p, char *, req->newlen + 1, M_TEMP, M_WAITOK);
+ p = kheap_alloc(KHEAP_TEMP, req->newlen + 1, Z_WAITOK);
if (!p) {
return ENOMEM;
}
error = SYSCTL_IN(req, p, req->newlen);
if (error) {
- FREE(p, M_TEMP);
+ kheap_free(KHEAP_TEMP, p, req->newlen + 1);
return error;
}
* Note: We acquire and release the geometry lock here to
* avoid making name2oid needlessly complex.
*/
- lck_rw_lock_shared(sysctl_geometry_lock);
+ lck_rw_lock_shared(&sysctl_geometry_lock);
error = name2oid(p, oid, &len);
- lck_rw_done(sysctl_geometry_lock);
+ lck_rw_done(&sysctl_geometry_lock);
- FREE(p, M_TEMP);
+ kheap_free(KHEAP_TEMP, p, req->newlen + 1);
if (error) {
return error;
int error = ENOENT; /* default error: not found */
u_int namelen = arg2;
u_int indx;
+ struct sysctl_oid_iterator it;
struct sysctl_oid *oid;
- struct sysctl_oid_list *lsp = &sysctl__children;
- lck_rw_lock_shared(sysctl_geometry_lock);
- oid = SLIST_FIRST(lsp);
+ lck_rw_lock_shared(&sysctl_geometry_lock);
+
+ it = sysctl_oid_iterator_begin(&sysctl__children);
+ oid = sysctl_oid_iterator_next_system_order(&it);
indx = 0;
while (oid && indx < CTL_MAXNAME) {
if (indx == namelen) {
goto found;
}
- lsp = (struct sysctl_oid_list *)oid->oid_arg1;
- oid = SLIST_FIRST(lsp);
+ it = sysctl_oid_iterator_begin(oid->oid_arg1);
+ oid = sysctl_oid_iterator_next_system_order(&it);
} else {
if (indx != namelen) {
error = EISDIR;
goto found;
}
} else {
- oid = SLIST_NEXT(oid, oid_link);
+ oid = sysctl_oid_iterator_next_system_order(&it);
}
}
/* Not found */
strlen(oid->oid_fmt) + 1);
}
err:
- lck_rw_done(sysctl_geometry_lock);
+ lck_rw_done(&sysctl_geometry_lock);
return error;
}
return error;
}
+#define WRITE_EXPERIMENT_FACTORS_ENTITLEMENT "com.apple.private.write-kr-experiment-factors"
+/*
+ * Is the current task allowed to write to experiment factors?
+ * tasks with the WRITE_EXPERIMENT_FACTORS_ENTITLEMENT are always allowed to write these.
+ * In the development / debug kernel we also allow root to write them.
+ */
+STATIC bool
+can_write_experiment_factors(__unused struct sysctl_req *req)
+{
+ if (IOTaskHasEntitlement(current_task(), WRITE_EXPERIMENT_FACTORS_ENTITLEMENT)) {
+ return true;
+ }
+#if DEBUG || DEVELOPMENT
+ return !proc_suser(req->p);
+#else
+ return false;
+#endif /* DEBUG || DEVELOPMENT */
+}
+
/*
* Traverse our tree, and find the right node, execute whatever it points
* at, and return the resulting error code.
*/
int
-sysctl_root(boolean_t from_kernel, boolean_t string_is_canonical, char *namestring, size_t namestringlen, int *name, size_t namelen, struct sysctl_req *req)
+sysctl_root(boolean_t from_kernel, boolean_t string_is_canonical,
+ char *namestring, size_t namestringlen,
+ int *name, size_t namelen, struct sysctl_req *req)
{
u_int indx;
int i;
+ struct sysctl_oid_iterator it;
struct sysctl_oid *oid;
- struct sysctl_oid_list *lsp = &sysctl__children;
sysctl_handler_t oid_handler = NULL;
int error;
boolean_t unlocked_node_found = FALSE;
boolean_t namestring_started = FALSE;
/* Get the read lock on the geometry */
- lck_rw_lock_shared(sysctl_geometry_lock);
+ lck_rw_lock_shared(&sysctl_geometry_lock);
if (string_is_canonical) {
/* namestring is actually canonical, name/namelen needs to be populated */
}
}
- oid = SLIST_FIRST(lsp);
+ it = sysctl_oid_iterator_begin(&sysctl__children);
+ oid = sysctl_oid_iterator_next_system_order(&it);
indx = 0;
while (oid && indx < CTL_MAXNAME) {
goto err;
}
- lsp = (struct sysctl_oid_list *)oid->oid_arg1;
- oid = SLIST_FIRST(lsp);
+ it = sysctl_oid_iterator_begin(oid->oid_arg1);
+ oid = sysctl_oid_iterator_next_system_order(&it);
} else {
if (indx != namelen) {
error = EISDIR;
goto found;
}
} else {
- oid = SLIST_NEXT(oid, oid_link);
+ oid = sysctl_oid_iterator_next_system_order(&it);
}
}
error = ENOENT;
goto err;
}
- /*
- * This is where legacy enforcement of permissions occurs. If the
- * flag does not say CTLFLAG_ANYBODY, then we prohibit anyone but
- * root from writing new values down. If local enforcement happens
- * at the leaf node, then it needs to be set as CTLFLAG_ANYBODY. In
- * addition, if the leaf node is set this way, then in order to do
- * specific enforcement, it has to be of type SYSCTL_PROC.
- */
- if (!(oid->oid_kind & CTLFLAG_ANYBODY) &&
- req->newptr && req->p &&
- (error = proc_suser(req->p))) {
- goto err;
+ if (req->newptr && req->p) {
+ if (oid->oid_kind & CTLFLAG_EXPERIMENT) {
+ /*
+ * Experiment factors have different permissions since they need to be
+ * writable by procs with WRITE_EXPERIMENT_FACTORS_ENTITLEMENT.
+ */
+ if (!can_write_experiment_factors(req)) {
+ error = (EPERM);
+ goto err;
+ }
+ } else {
+ /*
+ * This is where legacy enforcement of permissions occurs. If the
+ * flag does not say CTLFLAG_ANYBODY, then we prohibit anyone but
+ * root from writing new values down. If local enforcement happens
+ * at the leaf node, then it needs to be set as CTLFLAG_ANYBODY. In
+ * addition, if the leaf node is set this way, then in order to do
+ * specific enforcement, it has to be of type SYSCTL_PROC.
+ */
+ if (!(oid->oid_kind & CTLFLAG_ANYBODY) &&
+ (error = proc_suser(req->p))) {
+ goto err;
+ }
+ }
}
/*
* not prevent other calls into handlers or calls to manage the
* geometry elsewhere from blocking...
*/
- OSAddAtomic(1, &oid->oid_refcnt);
+ if ((oid->oid_kind & CTLFLAG_PERMANENT) == 0) {
+ OSAddAtomic(1, &oid->oid_refcnt);
+ }
- lck_rw_done(sysctl_geometry_lock);
+ lck_rw_done(&sysctl_geometry_lock);
#if CONFIG_MACF
if (!from_kernel) {
* may be into code whose reentrancy is protected by it.
*/
if (unlocked_node_found) {
- lck_mtx_lock(sysctl_unlocked_node_lock);
+ lck_mtx_lock(&sysctl_unlocked_node_lock);
}
#if defined(HAS_APPLE_PAC)
error = i;
if (unlocked_node_found) {
- lck_mtx_unlock(sysctl_unlocked_node_lock);
+ lck_mtx_unlock(&sysctl_unlocked_node_lock);
}
#if CONFIG_MACF
* barrier to avoid waking every time through on "hot"
* OIDs.
*/
- lck_rw_lock_shared(sysctl_geometry_lock);
- if (OSAddAtomic(-1, &oid->oid_refcnt) == 1) {
- wakeup(&oid->oid_refcnt);
+ lck_rw_lock_shared(&sysctl_geometry_lock);
+
+ if ((oid->oid_kind & CTLFLAG_PERMANENT) == 0) {
+ if (OSAddAtomic(-1, &oid->oid_refcnt) == 1) {
+ wakeup(&oid->oid_refcnt);
+ }
}
err:
- lck_rw_done(sysctl_geometry_lock);
+ lck_rw_done(&sysctl_geometry_lock);
return error;
}
}
}
- MALLOC(namestring, char *, namestringlen, M_TEMP, M_WAITOK);
+ namestring = kheap_alloc(KHEAP_TEMP, namestringlen, Z_WAITOK);
if (!namestring) {
oldlen = 0;
goto err;
error = userland_sysctl(FALSE, namestring, namestringlen, name, uap->namelen, &req, &oldlen);
- FREE(namestring, M_TEMP);
+ kheap_free(KHEAP_TEMP, namestring, namestringlen);
if ((error) && (error != ENOMEM)) {
return error;
}
namelen = (size_t)uap->namelen;
- MALLOC(name, char *, namelen + 1, M_TEMP, M_WAITOK);
+ name = kheap_alloc(KHEAP_TEMP, namelen + 1, Z_WAITOK);
if (!name) {
return ENOMEM;
}
error = copyin(uap->name, name, namelen);
if (error) {
- FREE(name, M_TEMP);
+ kheap_free(KHEAP_TEMP, name, namelen + 1);
return error;
}
name[namelen] = '\0';
*/
if (uap->newlen > SIZE_T_MAX) {
- FREE(name, M_TEMP);
+ kheap_free(KHEAP_TEMP, name, namelen + 1);
return EINVAL;
}
newlen = (size_t)uap->newlen;
error = userland_sysctl(TRUE, name, namelen + 1, oid, CTL_MAXNAME, &req, &oldlen);
- FREE(name, M_TEMP);
+ kheap_free(KHEAP_TEMP, name, namelen + 1);
if ((error) && (error != ENOMEM)) {
return error;
}
return error;
}
+
+int
+scalable_counter_sysctl_handler SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg2, oidp)
+ scalable_counter_t counter = *(scalable_counter_t*) arg1;
+ uint64_t value = counter_load(&counter);
+ return SYSCTL_OUT(req, &value, sizeof(value));
+}
+
+#define X(name, T) \
+int \
+experiment_factor_##name##_handler SYSCTL_HANDLER_ARGS \
+{ \
+ int error, changed = 0; \
+ T *ptr; \
+ T new_value, current_value; \
+ struct experiment_spec *spec = (struct experiment_spec *) arg1; \
+ if (!arg1) { \
+ return EINVAL; \
+ } \
+ ptr = (T *)(spec->ptr); \
+ current_value = *ptr; \
+ error = sysctl_io_number(req, current_value, sizeof(T), &new_value, &changed); \
+ if (error != 0) { \
+ return error; \
+ } \
+ if (changed) { \
+ if (new_value < (T) spec->min_value || new_value > (T) spec->max_value) { \
+ return EINVAL; \
+ } \
+ if (os_atomic_cmpxchg(&spec->modified, false, true, acq_rel)) { \
+ spec->original_value = current_value; \
+ } \
+ os_atomic_store_wide(ptr, new_value, relaxed); \
+ } \
+ return 0; \
+}
+
+experiment_factor_numeric_types
+#undef X
static int64_t time_adjtime;
static int updated;
-static lck_spin_t * ntp_lock;
-static lck_grp_t * ntp_lock_grp;
-static lck_attr_t * ntp_lock_attr;
-static lck_grp_attr_t *ntp_lock_grp_attr;
+static LCK_GRP_DECLARE(ntp_lock_grp, "ntp_lock");
+static LCK_SPIN_DECLARE(ntp_lock, &ntp_lock_grp);
#define NTP_LOCK(enable) \
enable = ml_set_interrupts_enabled(FALSE); \
- lck_spin_lock(ntp_lock);
+ lck_spin_lock(&ntp_lock);
#define NTP_UNLOCK(enable) \
- lck_spin_unlock(ntp_lock);\
+ lck_spin_unlock(&ntp_lock);\
ml_set_interrupts_enabled(enable);
-#define NTP_ASSERT_LOCKED() LCK_SPIN_ASSERT(ntp_lock, LCK_ASSERT_OWNED)
+#define NTP_ASSERT_LOCKED() LCK_SPIN_ASSERT(&ntp_lock, LCK_ASSERT_OWNED)
static timer_call_data_t ntp_loop_update;
static uint64_t ntp_loop_deadline;
void
ntp_init(void)
{
- L_CLR(time_offset);
- L_CLR(time_freq);
-
- ntp_lock_grp_attr = lck_grp_attr_alloc_init();
- ntp_lock_grp = lck_grp_alloc_init("ntp_lock", ntp_lock_grp_attr);
- ntp_lock_attr = lck_attr_alloc_init();
- ntp_lock = lck_spin_alloc_init(ntp_lock_grp, ntp_lock_attr);
-
- updated = 0;
-
init_ntp_loop();
}
-
-SYSINIT(ntpclocks, SI_SUB_CLOCKS, SI_ORDER_MIDDLE, ntp_init, NULL);
#include <sys/kern_memorystatus.h>
/* Mutex for global system override state */
-static lck_mtx_t sys_override_lock;
-static lck_grp_t *sys_override_mtx_grp;
-static lck_attr_t *sys_override_mtx_attr;
-static lck_grp_attr_t *sys_override_mtx_grp_attr;
+static LCK_GRP_DECLARE(sys_override_mtx_grp, "system_override");
+static LCK_MTX_DECLARE(sys_override_lock, &sys_override_mtx_grp);
/*
* Assertion counts for system properties (add new ones for each new mechanism)
/* Wait Channel for system override */
static uint64_t sys_override_wait;
-/* Global variable to indicate if system_override is enabled */
-int sys_override_enabled;
-
/* Helper routines */
static void system_override_begin(uint64_t flags);
static void system_override_end(uint64_t flags);
static void system_override_callouts(uint64_t flags, boolean_t enable_override);
static __attribute__((noinline)) int PROCESS_OVERRIDING_SYSTEM_DEFAULTS(uint64_t timeout);
-void
-init_system_override()
-{
- sys_override_mtx_grp_attr = lck_grp_attr_alloc_init();
- sys_override_mtx_grp = lck_grp_alloc_init("system_override", sys_override_mtx_grp_attr);
- sys_override_mtx_attr = lck_attr_alloc_init();
- lck_mtx_init(&sys_override_lock, sys_override_mtx_grp, sys_override_mtx_attr);
- io_throttle_assert_cnt = cpu_throttle_assert_cnt = fast_jetsam_assert_cnt = 0;
- sys_override_enabled = 1;
-}
-
/* system call implementation */
int
system_override(__unused struct proc *p, struct system_override_args * uap, __unused int32_t *retval)
goto out;
}
- /* Make sure that the system override syscall has been initialized */
- if (!sys_override_enabled) {
- error = EINVAL;
- goto out;
- }
-
lck_mtx_lock(&sys_override_lock);
if (flags & SYS_OVERRIDE_DISABLE) {
/* Only Multiuser Mode needs to update the session login name to the persona name */
#if (TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR)
- volatile uint32_t *multiuser_flag_address = (volatile uint32_t *)(uintptr_t)(_COMM_PAGE_MULTIUSER_CONFIG);
- uint32_t multiuser_flags = *multiuser_flag_address;
+ uint32_t multiuser_flags = COMM_PAGE_READ(uint32_t, MULTIUSER_CONFIG);
/* set the login name of the session */
if (multiuser_flags) {
struct session * sessp = proc_session(p);
#include <kern/backtrace.h>
#endif
+static LCK_MTX_DECLARE_ATTR(proc_klist_mlock, &proc_mlock_grp, &proc_lck_attr);
+
ZONE_DECLARE(pgrp_zone, "pgrp",
sizeof(struct pgrp), ZC_ZFREE_CLEARMEM);
ZONE_DECLARE(session_zone, "session",
sizeof(struct session), ZC_ZFREE_CLEARMEM);
+/*
+ * If you need accounting for KM_PROC consider using
+ * ZONE_VIEW_DEFINE to define a zone view.
+ */
+#define KM_PROC KHEAP_DEFAULT
typedef uint64_t unaligned_u64 __attribute__((aligned(1)));
LIST_REMOVE(uip, ui_hash);
retval = 0;
proc_list_unlock();
- FREE(uip, M_PROC);
+ kheap_free(KM_PROC, uip, sizeof(struct uidinfo));
goto out;
}
if (diff <= 0) {
goto out;
}
proc_list_unlock();
- MALLOC(newuip, struct uidinfo *, sizeof(*uip), M_PROC, M_WAITOK);
+ newuip = kheap_alloc(KM_PROC, sizeof(struct uidinfo), Z_WAITOK);
if (newuip == NULL) {
panic("chgproccnt: M_PROC zone depleted");
}
goto again;
out:
- if (newuip != NULL) {
- FREE(newuip, M_PROC);
- }
+ kheap_free(KM_PROC, newuip, sizeof(struct uidinfo));
return retval;
}
(((p->p_listflag & (P_LIST_DRAIN | P_LIST_DRAINWAIT)) == 0) ||
((p->p_listflag & P_LIST_REFWAIT) != 0))) {
if ((p->p_listflag & P_LIST_REFWAIT) != 0 && uthread_needs_to_wait_in_proc_refwait()) {
- msleep(&p->p_listflag, proc_list_mlock, 0, "proc_refwait", 0);
+ msleep(&p->p_listflag, &proc_list_mlock, 0, "proc_refwait", 0);
/*
* the proc might have been recycled since we dropped
* the proc list lock, get the proc again.
/* If someone else is controlling the (unreaped) zombie - wait */
if ((p->p_listflag & P_LIST_WAITING) != 0) {
- (void)msleep(&p->p_stat, proc_list_mlock, PWAIT, "waitcoll", 0);
+ (void)msleep(&p->p_stat, &proc_list_mlock, PWAIT, "waitcoll", 0);
goto again;
}
p->p_listflag |= P_LIST_WAITING;
/* Do not wait in ref drain for launchd exec */
while (p->p_refcount && !initexec) {
p->p_listflag |= P_LIST_DRAINWAIT;
- msleep(&p->p_refcount, proc_list_mlock, 0, "proc_refdrain", 0);
+ msleep(&p->p_refcount, &proc_list_mlock, 0, "proc_refdrain", 0);
}
p->p_listflag &= ~P_LIST_DRAIN;
if ((pp->p_listflag & (P_LIST_CHILDDRSTART | P_LIST_CHILDDRAINED)) == P_LIST_CHILDDRSTART) {
pp->p_listflag |= P_LIST_CHILDDRWAIT;
- msleep(&pp->p_childrencnt, proc_list_mlock, 0, "proc_parent", 0);
+ msleep(&pp->p_childrencnt, &proc_list_mlock, 0, "proc_parent", 0);
loopcnt++;
if (loopcnt == 5) {
parent = PROC_NULL;
/* wait for all that hold parentrefs to drop */
while (p->p_parentref > 0) {
p->p_listflag |= P_LIST_PARENTREFWAIT;
- msleep(&p->p_parentref, proc_list_mlock, 0, "proc_childdrainstart", 0);
+ msleep(&p->p_parentref, &proc_list_mlock, 0, "proc_childdrainstart", 0);
}
}
proc_pid(proc_t p)
{
if (p != NULL) {
+ proc_require(p, PROC_REQUIRE_ALLOW_KERNPROC);
return p->p_pid;
}
return -1;
proc_ppid(proc_t p)
{
if (p != NULL) {
+ proc_require(p, PROC_REQUIRE_ALLOW_KERNPROC);
return p->p_ppid;
}
return -1;
proc_original_ppid(proc_t p)
{
if (p != NULL) {
+ proc_require(p, PROC_REQUIRE_ALLOW_KERNPROC);
return p->p_original_ppid;
}
return -1;
proc_csflags(proc_t p, uint64_t *flags)
{
if (p && flags) {
+ proc_require(p, PROC_REQUIRE_ALLOW_KERNPROC);
*flags = (uint64_t)p->p_csflags;
return 0;
}
parent = proc_ref_locked(pp);
if ((parent == PROC_NULL) && (pp != PROC_NULL) && (pp->p_stat != SZOMB) && ((pp->p_listflag & P_LIST_EXITED) != 0) && ((pp->p_listflag & P_LIST_CHILDDRAINED) == 0)) {
pp->p_listflag |= P_LIST_CHILDLKWAIT;
- msleep(&pp->p_childrencnt, proc_list_mlock, 0, "proc_parent", 0);
+ msleep(&pp->p_childrencnt, &proc_list_mlock, 0, "proc_parent", 0);
goto loop;
}
proc_list_unlock();
sess->s_count = 1;
sess->s_ttypgrpid = NO_PID;
- lck_mtx_init(&sess->s_mlock, proc_mlock_grp, proc_lck_attr);
+ lck_mtx_init(&sess->s_mlock, &proc_mlock_grp, &proc_lck_attr);
bcopy(procsp->s_login, sess->s_login,
sizeof(sess->s_login));
}
pgrp->pg_id = pgid;
- lck_mtx_init(&pgrp->pg_mlock, proc_mlock_grp, proc_lck_attr);
+ lck_mtx_init(&pgrp->pg_mlock, &proc_mlock_grp, &proc_lck_attr);
LIST_INIT(&pgrp->pg_members);
proc_list_lock();
panic("pg_deleteref: freeing session in use");
}
proc_list_unlock();
- lck_mtx_destroy(&sessp->s_mlock, proc_mlock_grp);
+ lck_mtx_destroy(&sessp->s_mlock, &proc_mlock_grp);
zfree(session_zone, sessp);
} else {
proc_list_unlock();
}
- lck_mtx_destroy(&pgrp->pg_mlock, proc_mlock_grp);
+ lck_mtx_destroy(&pgrp->pg_mlock, &proc_mlock_grp);
zfree(pgrp_zone, pgrp);
}
return os_atomic_load(&p->p_vfs_iopolicy, relaxed) & P_VFS_IOPOLICY_IGNORE_CONTENT_PROTECTION;
}
+bool
+proc_ignores_node_permissions(proc_t p)
+{
+ return os_atomic_load(&p->p_vfs_iopolicy, relaxed) & P_VFS_IOPOLICY_IGNORE_NODE_PERMISSIONS;
+}
+
+bool
+proc_skip_mtime_update(proc_t p)
+{
+ return os_atomic_load(&p->p_vfs_iopolicy, relaxed) & P_VFS_IOPOLICY_SKIP_MTIME_UPDATE;
+}
+
#if CONFIG_COREDUMP
/*
* proc_core_name(name, uid, pid)
proc_list_lock();
pid_count_available = nprocs + 1; /* kernel_task not counted in nprocs */
assert(pid_count_available > 0);
- if (pidlist_nalloc(pl) > pid_count_available) {
+ if (pidlist_nalloc(pl) >= pid_count_available) {
break;
}
proc_list_unlock();
proc_list_unlock();
goto out;
}
- if (pidlist_nalloc(pl) > pid_count_available) {
+ if (pidlist_nalloc(pl) >= pid_count_available) {
break;
}
proc_list_unlock();
}
goto out;
}
- if (pidlist_nalloc(pl) > pid_count_available) {
+ if (pidlist_nalloc(pl) >= pid_count_available) {
break;
}
pgrp_unlock(pgrp);
while ((p->p_listflag & P_LIST_PGRPTRANS) == P_LIST_PGRPTRANS) {
p->p_listflag |= P_LIST_PGRPTRWAIT;
- (void)msleep(&p->p_pgrpid, proc_list_mlock, 0, "proc_pgrp", 0);
+ (void)msleep(&p->p_pgrpid, &proc_list_mlock, 0, "proc_pgrp", 0);
}
p->p_listflag |= P_LIST_PGRPTRANS;
while ((p->p_listflag & P_LIST_PGRPTRANS) == P_LIST_PGRPTRANS) {
p->p_listflag |= P_LIST_PGRPTRWAIT;
- (void)msleep(&p->p_pgrpid, proc_list_mlock, 0, "proc_pgrp", 0);
+ (void)msleep(&p->p_pgrpid, &proc_list_mlock, 0, "proc_pgrp", 0);
}
pgrp = p->p_pgrp;
/* wait during transitions */
while ((p->p_listflag & P_LIST_PGRPTRANS) == P_LIST_PGRPTRANS) {
p->p_listflag |= P_LIST_PGRPTRWAIT;
- (void)msleep(&p->p_pgrpid, proc_list_mlock, 0, "proc_pgrp", 0);
+ (void)msleep(&p->p_pgrpid, &proc_list_mlock, 0, "proc_pgrp", 0);
}
if ((p->p_pgrp != PGRP_NULL) && ((sess = p->p_pgrp->pg_session) != SESSION_NULL)) {
panic("session_rele: freeing session in use");
}
proc_list_unlock();
- lck_mtx_destroy(&sess->s_mlock, proc_mlock_grp);
+ lck_mtx_destroy(&sess->s_mlock, &proc_mlock_grp);
zfree(session_zone, sess);
} else {
proc_list_unlock();
void
proc_klist_lock(void)
{
- lck_mtx_lock(proc_klist_mlock);
+ lck_mtx_lock(&proc_klist_mlock);
}
void
proc_klist_unlock(void)
{
- lck_mtx_unlock(proc_klist_mlock);
+ lck_mtx_unlock(&proc_klist_mlock);
}
void
* result.
*
* Note: Does *NOT* currently include per-thread credential changes
- *
- * We don't use kauth_cred_print() in current debugging, but it
- * can be used if needed when debugging is active.
*/
#if DEBUG_CRED
#define DEBUG_CRED_ENTER printf
#define DEBUG_CRED_CHANGE printf
-extern void kauth_cred_print(kauth_cred_t cred);
#else /* !DEBUG_CRED */
#define DEBUG_CRED_ENTER(fmt, ...) do {} while (0)
#define DEBUG_CRED_CHANGE(fmt, ...) do {} while (0)
iopolicysys_vfs_trigger_resolve(struct proc *p, int cmd, int scope, int policy, struct _iopol_param_t *iop_param);
static int
iopolicysys_vfs_ignore_content_protection(struct proc *p, int cmd, int scope, int policy, struct _iopol_param_t *iop_param);
+static int
+iopolicysys_vfs_ignore_node_permissions(struct proc *p, int cmd, int scope, int policy, struct _iopol_param_t *ipo_param);
+static int
+iopolicysys_vfs_skip_mtime_update(struct proc *p, int cmd, int scope, int policy, struct _iopol_param_t *iop_param);
/*
* iopolicysys
goto out;
}
break;
+ case IOPOL_TYPE_VFS_IGNORE_PERMISSIONS:
+ error = iopolicysys_vfs_ignore_node_permissions(p, uap->cmd, iop_param.iop_scope, iop_param.iop_policy, &iop_param);
+ if (error) {
+ goto out;
+ }
+ break;
+ case IOPOL_TYPE_VFS_SKIP_MTIME_UPDATE:
+ error = iopolicysys_vfs_skip_mtime_update(p, uap->cmd, iop_param.iop_scope, iop_param.iop_policy, &iop_param);
+ if (error) {
+ goto out;
+ }
+ break;
default:
error = EINVAL;
goto out;
return error;
}
+#define AUTHORIZED_ACCESS_ENTITLEMENT \
+ "com.apple.private.vfs.authorized-access"
+int
+iopolicysys_vfs_ignore_node_permissions(struct proc *p, int cmd, int scope,
+ int policy, __unused struct _iopol_param_t *iop_param)
+{
+ int error = EINVAL;
+
+ switch (scope) {
+ case IOPOL_SCOPE_PROCESS:
+ break;
+ default:
+ goto out;
+ }
+
+ switch (cmd) {
+ case IOPOL_CMD_GET:
+ policy = os_atomic_load(&p->p_vfs_iopolicy, relaxed) & P_VFS_IOPOLICY_IGNORE_NODE_PERMISSIONS ?
+ IOPOL_VFS_IGNORE_PERMISSIONS_ON : IOPOL_VFS_IGNORE_PERMISSIONS_OFF;
+ iop_param->iop_policy = policy;
+ goto out_ok;
+ case IOPOL_CMD_SET:
+ /* SET is handled after the switch */
+ break;
+ default:
+ goto out;
+ }
+
+ if (!IOTaskHasEntitlement(current_task(), AUTHORIZED_ACCESS_ENTITLEMENT)) {
+ error = EPERM;
+ goto out;
+ }
+
+ switch (policy) {
+ case IOPOL_VFS_IGNORE_PERMISSIONS_OFF:
+ os_atomic_andnot(&p->p_vfs_iopolicy, P_VFS_IOPOLICY_IGNORE_NODE_PERMISSIONS, relaxed);
+ break;
+ case IOPOL_VFS_IGNORE_PERMISSIONS_ON:
+ os_atomic_or(&p->p_vfs_iopolicy, P_VFS_IOPOLICY_IGNORE_NODE_PERMISSIONS, relaxed);
+ break;
+ default:
+ break;
+ }
+
+out_ok:
+ error = 0;
+out:
+ return error;
+}
+
+#define SKIP_MTIME_UPDATE_ENTITLEMENT \
+ "com.apple.private.vfs.skip-mtime-updates"
+int
+iopolicysys_vfs_skip_mtime_update(struct proc *p, int cmd, int scope,
+ int policy, __unused struct _iopol_param_t *iop_param)
+{
+ int error = EINVAL;
+
+ switch (scope) {
+ case IOPOL_SCOPE_PROCESS:
+ break;
+ default:
+ goto out;
+ }
+
+ switch (cmd) {
+ case IOPOL_CMD_GET:
+ policy = os_atomic_load(&p->p_vfs_iopolicy, relaxed) & P_VFS_IOPOLICY_SKIP_MTIME_UPDATE ?
+ IOPOL_VFS_SKIP_MTIME_UPDATE_ON : IOPOL_VFS_SKIP_MTIME_UPDATE_OFF;
+ iop_param->iop_policy = policy;
+ goto out_ok;
+ case IOPOL_CMD_SET:
+ break;
+ default:
+ break;
+ }
+
+ if (!IOTaskHasEntitlement(current_task(), SKIP_MTIME_UPDATE_ENTITLEMENT)) {
+ error = EPERM;
+ goto out;
+ }
+
+ switch (policy) {
+ case IOPOL_VFS_SKIP_MTIME_UPDATE_OFF:
+ os_atomic_andnot(&p->p_vfs_iopolicy, P_VFS_IOPOLICY_SKIP_MTIME_UPDATE, relaxed);
+ break;
+ case IOPOL_VFS_SKIP_MTIME_UPDATE_ON:
+ os_atomic_or(&p->p_vfs_iopolicy, P_VFS_IOPOLICY_SKIP_MTIME_UPDATE, relaxed);
+ break;
+ default:
+ break;
+ }
+
+out_ok:
+ error = 0;
+out:
+ return error;
+}
/* BSD call back function for task_policy networking changes */
void
proc_apply_task_networkbg(void * bsd_info, thread_t thread)
#include <kern/clock.h> /* for delay_for_interval() */
#include <libkern/OSAtomic.h>
#include <IOKit/IOPlatformExpert.h>
+#include <IOKit/IOMessage.h>
#include <sys/kdebug.h>
static int sd_openlog(vfs_context_t);
static int sd_closelog(vfs_context_t);
static void sd_log(vfs_context_t, const char *, ...);
-static void proc_shutdown(void);
+static void proc_shutdown(int only_non_dext);
static void zprint_panic_info(void);
extern void halt_log_enter(const char * what, const void * pc, uint64_t time);
struct sd_filterargs {
int delayterm;
int shutdownstate;
+ int only_non_dext;
};
static int sd_callback2(proc_t p, void * arg);
static int sd_callback3(proc_t p, void * arg);
-extern boolean_t panic_include_zprint;
+extern bool panic_include_zprint;
extern mach_memory_info_t *panic_kext_memory_info;
extern vm_size_t panic_kext_memory_size;
/* handle live procs (deallocate their root and current directories), suspend initproc */
startTime = mach_absolute_time();
- proc_shutdown();
+ proc_shutdown(TRUE);
halt_log_enter("proc_shutdown", 0, mach_absolute_time() - startTime);
#if CONFIG_AUDIT
#endif /* DEVELOPMENT || DEBUG */
{
startTime = mach_absolute_time();
- vfs_unmountall();
+ vfs_unmountall(TRUE);
halt_log_enter("vfs_unmountall", 0, mach_absolute_time() - startTime);
}
+ IOSystemShutdownNotification(kIOSystemShutdownNotificationTerminateDEXTs);
+
+ startTime = mach_absolute_time();
+ proc_shutdown(FALSE);
+ halt_log_enter("proc_shutdown", 0, mach_absolute_time() - startTime);
+
+#if DEVELOPMENT || DEBUG
+ if (!(howto & RB_PANIC) || !kdp_has_polled_corefile())
+#endif /* DEVELOPMENT || DEBUG */
+ {
+ startTime = mach_absolute_time();
+ vfs_unmountall(FALSE);
+ halt_log_enter("vfs_unmountall", 0, mach_absolute_time() - startTime);
+ }
+
+
+
/* Wait for the buffer cache to clean remaining dirty buffers */
startTime = mach_absolute_time();
for (iter = 0; iter < 100; iter++) {
if (sd_logvp != NULLVP) {
VNOP_FSYNC(sd_logvp, MNT_WAIT, ctx);
error = vnode_close(sd_logvp, FWRITE, ctx);
+ sd_logvp = NULLVP;
}
return error;
va_end(arglist);
}
+#define proc_is_driver(p) (task_is_driver((p)->task))
+
static int
sd_filt1(proc_t p, void * args)
{
int delayterm = sf->delayterm;
int shutdownstate = sf->shutdownstate;
+ if (sf->only_non_dext && proc_is_driver(p)) {
+ return 0;
+ }
+
if (((p->p_flag & P_SYSTEM) != 0) || (p->p_ppid == 0)
|| (p == self) || (p->p_stat == SZOMB)
|| (p->p_shutdownstate != shutdownstate)
proc_shutdown_exitcount++;
proc_list_unlock();
}
-
+ if (proc_is_driver(p)) {
+ printf("lingering dext %s signal(%d)\n", p->p_name, signo);
+ }
psignal(p, signo);
if (countproc != 0) {
sd->activecount++;
int delayterm = sf->delayterm;
int shutdownstate = sf->shutdownstate;
+ if (sf->only_non_dext && proc_is_driver(p)) {
+ return 0;
+ }
+
if (((p->p_flag & P_SYSTEM) != 0) || (p->p_ppid == 0)
|| (p == self) || (p->p_stat == SZOMB)
|| (p->p_shutdownstate == shutdownstate)
proc_shutdown_exitcount++;
proc_list_unlock();
}
+ if (proc_is_driver(p)) {
+ printf("lingering dext %s signal(%d)\n", p->p_name, signo);
+ }
psignal(p, signo);
if (countproc != 0) {
sd->activecount++;
*/
static void
-proc_shutdown(void)
+proc_shutdown(int only_non_dext)
{
vfs_context_t ctx = vfs_context_current();
struct proc *p, *self;
*/
sfargs.delayterm = delayterm;
sfargs.shutdownstate = 0;
+ sfargs.only_non_dext = only_non_dext;
sdargs.signo = SIGTERM;
sdargs.setsdstate = 1;
sdargs.countproc = 1;
*/
ts.tv_sec = 3;
ts.tv_nsec = 0;
- error = msleep(&proc_shutdown_exitcount, proc_list_mlock, PWAIT, "shutdownwait", &ts);
+ error = msleep(&proc_shutdown_exitcount, &proc_list_mlock, PWAIT, "shutdownwait", &ts);
if (error != 0) {
for (p = allproc.lh_first; p; p = p->p_list.le_next) {
if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT) {
*/
ts.tv_sec = 10;
ts.tv_nsec = 0;
- error = msleep(&proc_shutdown_exitcount, proc_list_mlock, PWAIT, "shutdownwait", &ts);
+ error = msleep(&proc_shutdown_exitcount, &proc_list_mlock, PWAIT, "shutdownwait", &ts);
if (error != 0) {
for (p = allproc.lh_first; p; p = p->p_list.le_next) {
if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT) {
sd_closelog(ctx);
+ if (only_non_dext) {
+ return;
+ }
+
/*
* Now that all other processes have been terminated, suspend init
*/
* Default catcher, where the default is to kill
* the process. (Other cases were ignored above.)
*/
- sig_lock_to_exit(p);
/*
* exit_with_reason() below will consume a reference to the thread's exit reason, so we take another
#include <sys/memory_maintenance.h>
#include <sys/priv.h>
#include <stdatomic.h>
+#include <uuid/uuid.h>
#include <security/audit/audit.h>
#include <kern/kalloc.h>
extern unsigned int preheat_max_bytes;
extern unsigned int preheat_min_bytes;
extern long numvnodes;
+extern long freevnodes;
extern long num_recycledvnodes;
extern uuid_string_t bootsessionuuid_string;
host_basic_info_data_t hinfo;
kern_return_t kret;
uint32_t size;
+ uint32_t buf_size = 0;
int changed;
mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
struct _processor_statistics_np *buf;
return EINVAL;
}
- MALLOC(buf, struct _processor_statistics_np*, size, M_TEMP, M_ZERO | M_WAITOK);
+ buf_size = size;
+ buf = kheap_alloc(KHEAP_TEMP, buf_size, Z_ZERO | Z_WAITOK);
kret = get_sched_statistics(buf, &size);
if (kret != KERN_SUCCESS) {
panic("Sched info changed?!");
}
out:
- FREE(buf, M_TEMP);
+ kheap_free(KHEAP_TEMP, buf, buf_size);
return error;
}
__unused int cmd = oidp->oid_arg2; /* subcommand*/
__unused int *name = arg1; /* oid element argument vector */
__unused int namelen = arg2; /* number of oid element arguments */
- user_addr_t oldp = req->oldptr; /* user buffer copy out address */
- size_t *oldlenp = &req->oldlen; /* user buffer copy out size */
- user_addr_t newp = req->newptr; /* user buffer copy in address */
- size_t newlen = req->newlen; /* user buffer copy in size */
- int error;
+ int error, changed;
int tmp;
* for example, to dump current counts:
* sysctl -w kern.count_calls=2
*/
- error = sysctl_int(oldp, oldlenp, newp, newlen, &tmp);
- if (error != 0) {
+ error = sysctl_io_number(req, do_count_syscalls,
+ sizeof(do_count_syscalls), &tmp, &changed);
+
+ if (error != 0 || !changed) {
return error;
}
if (tmp == 1) {
do_count_syscalls = 1;
} else if (tmp == 0 || tmp == 2 || tmp == 3) {
- int i;
- for (i = 0; i < nsysent; i++) {
+ for (int i = 0; i < nsysent; i++) {
if (syscalls_log[i] != 0) {
if (tmp == 2) {
printf("%d calls - name %s \n", syscalls_log[i], syscallnames[i]);
}
}
}
- if (tmp != 0) {
- do_count_syscalls = 1;
- }
- }
-
- /* adjust index so we return the right required/consumed amount */
- if (!error) {
- req->oldidx += req->oldlen;
+ do_count_syscalls = (tmp != 0);
}
return error;
* instead.
*/
-/*
- * Validate parameters and get old / set new parameters
- * for an integer-valued sysctl function.
- */
-int
-sysctl_int(user_addr_t oldp, size_t *oldlenp,
- user_addr_t newp, size_t newlen, int *valp)
-{
- int error = 0;
-
- if (oldp != USER_ADDR_NULL && oldlenp == NULL) {
- return EFAULT;
- }
- if (oldp && *oldlenp < sizeof(int)) {
- return ENOMEM;
- }
- if (newp && newlen != sizeof(int)) {
- return EINVAL;
- }
- *oldlenp = sizeof(int);
- if (oldp) {
- error = copyout(valp, oldp, sizeof(int));
- }
- if (error == 0 && newp) {
- error = copyin(newp, valp, sizeof(int));
- AUDIT_ARG(value32, *valp);
- }
- return error;
-}
-
-/*
- * Validate parameters and get old / set new parameters
- * for an quad(64bit)-valued sysctl function.
- */
-int
-sysctl_quad(user_addr_t oldp, size_t *oldlenp,
- user_addr_t newp, size_t newlen, quad_t *valp)
-{
- int error = 0;
-
- if (oldp != USER_ADDR_NULL && oldlenp == NULL) {
- return EFAULT;
- }
- if (oldp && *oldlenp < sizeof(quad_t)) {
- return ENOMEM;
- }
- if (newp && newlen != sizeof(quad_t)) {
- return EINVAL;
- }
- *oldlenp = sizeof(quad_t);
- if (oldp) {
- error = copyout(valp, oldp, sizeof(quad_t));
- }
- if (error == 0 && newp) {
- error = copyin(newp, valp, sizeof(quad_t));
- }
- return error;
-}
-
STATIC int
sysdoproc_filt_KERN_PROC_PID(proc_t p, void * arg)
{
SYSCTL_LONG(_kern, OID_AUTO, num_recycledvnodes,
CTLFLAG_RD | CTLFLAG_LOCKED,
&num_recycledvnodes, "");
+SYSCTL_COMPAT_INT(_kern, OID_AUTO, free_vnodes,
+ CTLFLAG_RD | CTLFLAG_LOCKED,
+ &freevnodes, 0, "");
STATIC int
sysctl_maxvnodes(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
#include <mach/task.h>
#include <mach/semaphore.h>
-extern lck_grp_t * sysctl_debug_test_stackshot_owner_grp; /* used for both mutexes and rwlocks */
-extern lck_mtx_t * sysctl_debug_test_stackshot_owner_init_mtx; /* used to protect lck_*_init */
+static LCK_GRP_DECLARE(sysctl_debug_test_stackshot_owner_grp, "test-stackshot-owner-grp");
+static LCK_MTX_DECLARE(sysctl_debug_test_stackshot_owner_init_mtx,
+ &sysctl_debug_test_stackshot_owner_grp);
/* This is a sysctl for testing collection of owner info on a lock in kernel space. A multi-threaded
* test from userland sets this sysctl in such a way that a thread blocks in kernel mode, and a
long long mtx_unslid_addr = (long long)VM_KERNEL_UNSLIDE_OR_PERM(&sysctl_debug_test_stackshot_owner_lck);
int error = sysctl_io_number(req, mtx_unslid_addr, sizeof(long long), (void*)&option, NULL);
- lck_mtx_lock(sysctl_debug_test_stackshot_owner_init_mtx);
+ lck_mtx_lock(&sysctl_debug_test_stackshot_owner_init_mtx);
if (!sysctl_debug_test_stackshot_mtx_inited) {
lck_mtx_init(&sysctl_debug_test_stackshot_owner_lck,
- sysctl_debug_test_stackshot_owner_grp,
+ &sysctl_debug_test_stackshot_owner_grp,
LCK_ATTR_NULL);
semaphore_create(kernel_task,
&sysctl_debug_test_stackshot_mutex_sem,
SYNC_POLICY_FIFO, 0);
sysctl_debug_test_stackshot_mtx_inited = 1;
}
- lck_mtx_unlock(sysctl_debug_test_stackshot_owner_init_mtx);
+ lck_mtx_unlock(&sysctl_debug_test_stackshot_owner_init_mtx);
if (!error) {
switch (option) {
semaphore_signal(sysctl_debug_test_stackshot_mutex_sem);
break;
case SYSCTL_DEBUG_MTX_TEARDOWN:
- lck_mtx_lock(sysctl_debug_test_stackshot_owner_init_mtx);
+ lck_mtx_lock(&sysctl_debug_test_stackshot_owner_init_mtx);
lck_mtx_destroy(&sysctl_debug_test_stackshot_owner_lck,
- sysctl_debug_test_stackshot_owner_grp);
+ &sysctl_debug_test_stackshot_owner_grp);
semaphore_destroy(kernel_task,
sysctl_debug_test_stackshot_mutex_sem);
sysctl_debug_test_stackshot_mtx_inited = 0;
- lck_mtx_unlock(sysctl_debug_test_stackshot_owner_init_mtx);
+ lck_mtx_unlock(&sysctl_debug_test_stackshot_owner_init_mtx);
break;
case -1: /* user just wanted to read the value, so do nothing */
break;
long long rwlck_unslid_addr = (long long)VM_KERNEL_UNSLIDE_OR_PERM(&sysctl_debug_test_stackshot_owner_rwlck);
int error = sysctl_io_number(req, rwlck_unslid_addr, sizeof(long long), (void*)&option, NULL);
- lck_mtx_lock(sysctl_debug_test_stackshot_owner_init_mtx);
+ lck_mtx_lock(&sysctl_debug_test_stackshot_owner_init_mtx);
if (!sysctl_debug_test_stackshot_rwlck_inited) {
lck_rw_init(&sysctl_debug_test_stackshot_owner_rwlck,
- sysctl_debug_test_stackshot_owner_grp,
+ &sysctl_debug_test_stackshot_owner_grp,
LCK_ATTR_NULL);
semaphore_create(kernel_task,
&sysctl_debug_test_stackshot_rwlck_sem,
0);
sysctl_debug_test_stackshot_rwlck_inited = 1;
}
- lck_mtx_unlock(sysctl_debug_test_stackshot_owner_init_mtx);
+ lck_mtx_unlock(&sysctl_debug_test_stackshot_owner_init_mtx);
if (!error) {
switch (option) {
semaphore_signal(sysctl_debug_test_stackshot_rwlck_sem);
break;
case SYSCTL_DEBUG_KRWLCK_TEARDOWN:
- lck_mtx_lock(sysctl_debug_test_stackshot_owner_init_mtx);
+ lck_mtx_lock(&sysctl_debug_test_stackshot_owner_init_mtx);
lck_rw_destroy(&sysctl_debug_test_stackshot_owner_rwlck,
- sysctl_debug_test_stackshot_owner_grp);
+ &sysctl_debug_test_stackshot_owner_grp);
semaphore_destroy(kernel_task,
sysctl_debug_test_stackshot_rwlck_sem);
sysctl_debug_test_stackshot_rwlck_inited = 0;
- lck_mtx_unlock(sysctl_debug_test_stackshot_owner_init_mtx);
+ lck_mtx_unlock(&sysctl_debug_test_stackshot_owner_init_mtx);
break;
case -1: /* user just wanted to read the value, so do nothing */
break;
SYSCTL_INT(_vm, OID_AUTO, vm_page_needed_delayed_work_ctx, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_delayed_work_ctx_needed, 0, "");
/* log message counters for persistence mode */
-extern uint32_t oslog_p_total_msgcount;
-extern uint32_t oslog_p_metadata_saved_msgcount;
-extern uint32_t oslog_p_metadata_dropped_msgcount;
-extern uint32_t oslog_p_error_count;
-extern uint32_t oslog_p_saved_msgcount;
-extern uint32_t oslog_p_dropped_msgcount;
-extern uint32_t oslog_p_boot_dropped_msgcount;
-extern uint32_t oslog_p_coprocessor_total_msgcount;
-extern uint32_t oslog_p_coprocessor_dropped_msgcount;
+SCALABLE_COUNTER_DECLARE(oslog_p_total_msgcount);
+SCALABLE_COUNTER_DECLARE(oslog_p_metadata_saved_msgcount);
+SCALABLE_COUNTER_DECLARE(oslog_p_metadata_dropped_msgcount);
+SCALABLE_COUNTER_DECLARE(oslog_p_error_count);
+SCALABLE_COUNTER_DECLARE(oslog_p_saved_msgcount);
+SCALABLE_COUNTER_DECLARE(oslog_p_dropped_msgcount);
+SCALABLE_COUNTER_DECLARE(oslog_p_boot_dropped_msgcount);
+SCALABLE_COUNTER_DECLARE(oslog_p_coprocessor_total_msgcount);
+SCALABLE_COUNTER_DECLARE(oslog_p_coprocessor_dropped_msgcount);
+SCALABLE_COUNTER_DECLARE(oslog_p_unresolved_kc_msgcount);
+SCALABLE_COUNTER_DECLARE(oslog_p_fmt_invalid_msgcount);
+SCALABLE_COUNTER_DECLARE(oslog_p_fmt_max_args_msgcount);
+SCALABLE_COUNTER_DECLARE(oslog_p_truncated_msgcount);
/* log message counters for streaming mode */
extern uint32_t oslog_s_total_msgcount;
extern uint32_t oslog_s_metadata_msgcount;
-extern uint32_t oslog_s_error_count;
+SCALABLE_COUNTER_DECLARE(oslog_s_error_count);
extern uint32_t oslog_s_streamed_msgcount;
extern uint32_t oslog_s_dropped_msgcount;
extern uint32_t vaddlog_msgcount;
extern uint32_t vaddlog_msgcount_dropped;
-SYSCTL_UINT(_debug, OID_AUTO, oslog_p_total_msgcount, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED, &oslog_p_total_msgcount, 0, "");
-SYSCTL_UINT(_debug, OID_AUTO, oslog_p_metadata_saved_msgcount, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED, &oslog_p_metadata_saved_msgcount, 0, "");
-SYSCTL_UINT(_debug, OID_AUTO, oslog_p_metadata_dropped_msgcount, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED, &oslog_p_metadata_dropped_msgcount, 0, "");
-SYSCTL_UINT(_debug, OID_AUTO, oslog_p_error_count, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED, &oslog_p_error_count, 0, "");
-SYSCTL_UINT(_debug, OID_AUTO, oslog_p_saved_msgcount, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED, &oslog_p_saved_msgcount, 0, "");
-SYSCTL_UINT(_debug, OID_AUTO, oslog_p_dropped_msgcount, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED, &oslog_p_dropped_msgcount, 0, "");
-SYSCTL_UINT(_debug, OID_AUTO, oslog_p_boot_dropped_msgcount, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED, &oslog_p_boot_dropped_msgcount, 0, "");
-SYSCTL_UINT(_debug, OID_AUTO, oslog_p_coprocessor_total_msgcount, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED, &oslog_p_coprocessor_total_msgcount, 0, "");
-SYSCTL_UINT(_debug, OID_AUTO, oslog_p_coprocessor_dropped_msgcount, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED, &oslog_p_coprocessor_dropped_msgcount, 0, "");
+SYSCTL_SCALABLE_COUNTER(_debug, oslog_p_total_msgcount, oslog_p_total_msgcount, "");
+SYSCTL_SCALABLE_COUNTER(_debug, oslog_p_metadata_saved_msgcount, oslog_p_metadata_saved_msgcount, "");
+SYSCTL_SCALABLE_COUNTER(_debug, oslog_p_metadata_dropped_msgcount, oslog_p_metadata_dropped_msgcount, "");
+SYSCTL_SCALABLE_COUNTER(_debug, oslog_p_error_count, oslog_p_error_count, "");
+SYSCTL_SCALABLE_COUNTER(_debug, oslog_p_saved_msgcount, oslog_p_saved_msgcount, "");
+SYSCTL_SCALABLE_COUNTER(_debug, oslog_p_dropped_msgcount, oslog_p_dropped_msgcount, "");
+SYSCTL_SCALABLE_COUNTER(_debug, oslog_p_boot_dropped_msgcount, oslog_p_boot_dropped_msgcount, "");
+SYSCTL_SCALABLE_COUNTER(_debug, oslog_p_coprocessor_total_msgcount, oslog_p_coprocessor_total_msgcount, "");
+SYSCTL_SCALABLE_COUNTER(_debug, oslog_p_coprocessor_dropped_msgcount, oslog_p_coprocessor_dropped_msgcount, "");
+SYSCTL_SCALABLE_COUNTER(_debug, oslog_p_unresolved_kc_msgcount, oslog_p_unresolved_kc_msgcount, "");
+
+SYSCTL_SCALABLE_COUNTER(_debug, oslog_p_fmt_invalid_msgcount, oslog_p_fmt_invalid_msgcount, "");
+SYSCTL_SCALABLE_COUNTER(_debug, oslog_p_fmt_max_args_msgcount, oslog_p_fmt_max_args_msgcount, "");
+SYSCTL_SCALABLE_COUNTER(_debug, oslog_p_truncated_msgcount, oslog_p_truncated_msgcount, "");
SYSCTL_UINT(_debug, OID_AUTO, oslog_s_total_msgcount, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED, &oslog_s_total_msgcount, 0, "");
SYSCTL_UINT(_debug, OID_AUTO, oslog_s_metadata_msgcount, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED, &oslog_s_metadata_msgcount, 0, "");
-SYSCTL_UINT(_debug, OID_AUTO, oslog_s_error_count, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED, &oslog_s_error_count, 0, "");
+SYSCTL_SCALABLE_COUNTER(_debug, oslog_s_error_count, oslog_s_error_count, "");
SYSCTL_UINT(_debug, OID_AUTO, oslog_s_streamed_msgcount, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED, &oslog_s_streamed_msgcount, 0, "");
SYSCTL_UINT(_debug, OID_AUTO, oslog_s_dropped_msgcount, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED, &oslog_s_dropped_msgcount, 0, "");
&driverkit_checkin_timed_out, "timestamp of dext checkin timeout");
#endif
+extern int IOGetVMMPresent(void);
+
static int
hv_vmm_present SYSCTL_HANDLER_ARGS
{
int hv_vmm_present = 0;
-#if defined (__arm64__)
- /* <rdar://problem/59966231> Need a way to determine if ARM xnu is running as a guest */
-#elif defined (__x86_64__)
- hv_vmm_present = cpuid_vmm_present();
-#endif
+ hv_vmm_present = IOGetVMMPresent();
return SYSCTL_OUT(req, &hv_vmm_present, sizeof(hv_vmm_present));
}
#endif
#if DEVELOPMENT || DEBUG
-SYSCTL_COMPAT_INT(_kern, OID_AUTO, development, CTLFLAG_RD | CTLFLAG_MASKED, NULL, 1, "");
+SYSCTL_COMPAT_INT(_kern, OID_AUTO, development, CTLFLAG_RD | CTLFLAG_MASKED | CTLFLAG_KERN, NULL, 1, "");
#else
SYSCTL_COMPAT_INT(_kern, OID_AUTO, development, CTLFLAG_RD | CTLFLAG_MASKED, NULL, 0, "");
#endif
mach_port_name_t task_port_name;
task_t task;
size_t buffer_size = (req->oldptr != USER_ADDR_NULL) ? req->oldlen : 0;
- vmobject_list_output_t buffer;
+ vmobject_list_output_t buffer = NULL;
size_t output_size;
size_t entries;
+ /* we have a "newptr" (for write) we get a task port name from the caller. */
+ error = SYSCTL_IN(req, &task_port_name, sizeof(mach_port_name_t));
+
+ if (error != 0) {
+ goto sysctl_get_vmobject_list_exit;
+ }
+
+ task = port_name_to_task_read(task_port_name);
+ if (task == TASK_NULL) {
+ error = ESRCH;
+ goto sysctl_get_vmobject_list_exit;
+ }
+
+ /* get the current size */
+ task_copy_vmobjects(task, NULL, 0, &entries);
+ size_t max_size = (entries > 0) ? entries * sizeof(vm_object_query_data_t) + sizeof(*buffer) : 0;
+
+ /* if buffer_size is specified clamp to the current size then allcoate the kernel buffer */
if (buffer_size) {
if (buffer_size < sizeof(*buffer) + sizeof(vm_object_query_data_t)) {
- return ENOMEM;
+ error = ENOMEM;
+ goto sysctl_get_vmobject_list_deallocate_and_exit;
}
+ buffer_size = (buffer_size > max_size) ? max_size : buffer_size;
buffer = kheap_alloc(KHEAP_TEMP, buffer_size, Z_WAITOK);
if (!buffer) {
error = ENOMEM;
- goto sysctl_get_vmobject_list_exit;
+ goto sysctl_get_vmobject_list_deallocate_and_exit;
}
} else {
buffer = NULL;
}
- /* we have a "newptr" (for write) we get a task port name from the caller. */
- error = SYSCTL_IN(req, &task_port_name, sizeof(mach_port_name_t));
-
- if (error != 0) {
- goto sysctl_get_vmobject_list_exit;
- }
-
- task = port_name_to_task(task_port_name);
- if (task == TASK_NULL) {
- error = ESRCH;
- goto sysctl_get_vmobject_list_exit;
- }
-
/* copy the vmobjects and vmobject data out of the task */
if (buffer_size == 0) {
- task_copy_vmobjects(task, NULL, 0, &entries);
- output_size = (entries > 0) ? entries * sizeof(vm_object_query_data_t) + sizeof(*buffer) : 0;
+ output_size = max_size;
} else {
task_copy_vmobjects(task, &buffer->data[0], buffer_size - sizeof(*buffer), &entries);
buffer->entries = (uint64_t)entries;
output_size = entries * sizeof(vm_object_query_data_t) + sizeof(*buffer);
}
- task_deallocate(task);
-
error = SYSCTL_OUT(req, (char*) buffer, output_size);
+sysctl_get_vmobject_list_deallocate_and_exit:
+ task_deallocate(task);
+
sysctl_get_vmobject_list_exit:
if (buffer) {
kheap_free(KHEAP_TEMP, buffer, buffer_size);
SYSCTL_PROC(_vm, OID_AUTO, get_owned_vmobjects,
CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_WR | CTLFLAG_MASKED | CTLFLAG_KERN | CTLFLAG_LOCKED | CTLFLAG_ANYBODY,
0, 0, sysctl_get_owned_vmobjects, "A", "get owned vmobjects in task");
+
+extern uint64_t num_static_scalable_counters;
+SYSCTL_QUAD(_kern, OID_AUTO, num_static_scalable_counters, CTLFLAG_RD | CTLFLAG_LOCKED, &num_static_scalable_counters, "");
+
+uuid_string_t trial_treatment_id;
+uuid_string_t trial_experiment_id;
+int trial_deployment_id = -1;
+
+SYSCTL_STRING(_kern, OID_AUTO, trial_treatment_id, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY | CTLFLAG_EXPERIMENT, trial_treatment_id, sizeof(trial_treatment_id), "");
+SYSCTL_STRING(_kern, OID_AUTO, trial_experiment_id, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY | CTLFLAG_EXPERIMENT, trial_experiment_id, sizeof(trial_experiment_id), "");
+SYSCTL_INT(_kern, OID_AUTO, trial_deployment_id, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY | CTLFLAG_EXPERIMENT, &trial_deployment_id, 0, "");
+
+#if DEVELOPMENT || DEBUG
+/* For unit testing setting factors & limits. */
+unsigned int testing_experiment_factor;
+EXPERIMENT_FACTOR_UINT(_kern, testing_experiment_factor, &testing_experiment_factor, 5, 10, "");
+#endif /* DEVELOPMENT || DEBUG */
#define HZ 100 /* XXX */
/* simple lock used to access timezone, tz structure */
-lck_spin_t * tz_slock;
-lck_grp_t * tz_slock_grp;
-lck_attr_t * tz_slock_attr;
-lck_grp_attr_t *tz_slock_grp_attr;
+static LCK_GRP_DECLARE(tz_slock_grp, "tzlock");
+static LCK_SPIN_DECLARE(tz_slock, &tz_slock_grp);
static void setthetime(
struct timeval *tv);
-void time_zone_slock_init(void);
static boolean_t timeval_fixusec(struct timeval *t1);
/*
}
if (uap->tzp) {
- lck_spin_lock(tz_slock);
+ lck_spin_lock(&tz_slock);
ltz = tz;
- lck_spin_unlock(tz_slock);
+ lck_spin_unlock(&tz_slock);
error = copyout((caddr_t)<z, CAST_USER_ADDR_T(uap->tzp), sizeof(tz));
}
setthetime(&atv);
}
if (uap->tzp) {
- lck_spin_lock(tz_slock);
+ lck_spin_lock(&tz_slock);
tz = atz;
- lck_spin_unlock(tz_slock);
+ lck_spin_unlock(&tz_slock);
}
return 0;
}
}
#endif /* NETWORKING */
-void
-time_zone_slock_init(void)
-{
- /* allocate lock group attribute and group */
- tz_slock_grp_attr = lck_grp_attr_alloc_init();
-
- tz_slock_grp = lck_grp_alloc_init("tzlock", tz_slock_grp_attr);
-
- /* Allocate lock attribute */
- tz_slock_attr = lck_attr_alloc_init();
-
- /* Allocate the spin lock */
- tz_slock = lck_spin_alloc_init(tz_slock_grp, tz_slock_attr);
-}
-
int
__mach_bridge_remote_time(__unused struct proc *p, struct __mach_bridge_remote_time_args *mbrt_args, uint64_t *retval)
{
}
extern void OSKextResetAfterUserspaceReboot(void);
-extern void zone_gc(boolean_t);
+extern void zone_gc_drain(void);
int
usrctl(struct proc *p, __unused struct usrctl_args *uap, __unused int32_t *retval)
int shm_error = pshm_cache_purge_all(p);
int sem_error = psem_cache_purge_all(p);
- zone_gc(FALSE);
+ zone_gc_drain();
return shm_error != 0 ? shm_error : sem_error;
}
#define MAX_MBUF_TX_COMPL_FUNC 32
mbuf_tx_compl_func
mbuf_tx_compl_table[MAX_MBUF_TX_COMPL_FUNC];
-extern lck_rw_t *mbuf_tx_compl_tbl_lock;
+extern lck_rw_t mbuf_tx_compl_tbl_lock;
u_int32_t mbuf_tx_compl_index = 0;
#if (DEVELOPMENT || DEBUG)
{
u_int32_t i;
- lck_rw_lock_shared(mbuf_tx_compl_tbl_lock);
+ lck_rw_lock_shared(&mbuf_tx_compl_tbl_lock);
i = get_tx_compl_callback_index_locked(callback);
- lck_rw_unlock_shared(mbuf_tx_compl_tbl_lock);
+ lck_rw_unlock_shared(&mbuf_tx_compl_tbl_lock);
return i;
}
ASSERT(0);
return NULL;
}
- lck_rw_lock_shared(mbuf_tx_compl_tbl_lock);
+ lck_rw_lock_shared(&mbuf_tx_compl_tbl_lock);
cb = mbuf_tx_compl_table[idx];
- lck_rw_unlock_shared(mbuf_tx_compl_tbl_lock);
+ lck_rw_unlock_shared(&mbuf_tx_compl_tbl_lock);
return cb;
}
return EINVAL;
}
- lck_rw_lock_exclusive(mbuf_tx_compl_tbl_lock);
+ lck_rw_lock_exclusive(&mbuf_tx_compl_tbl_lock);
i = get_tx_compl_callback_index_locked(callback);
if (i != -1) {
}
}
unlock:
- lck_rw_unlock_exclusive(mbuf_tx_compl_tbl_lock);
+ lck_rw_unlock_exclusive(&mbuf_tx_compl_tbl_lock);
return error;
}
return EINVAL;
}
- lck_rw_lock_exclusive(mbuf_tx_compl_tbl_lock);
+ lck_rw_lock_exclusive(&mbuf_tx_compl_tbl_lock);
/* assume the worst */
error = ENOENT;
}
}
unlock:
- lck_rw_unlock_exclusive(mbuf_tx_compl_tbl_lock);
+ lck_rw_unlock_exclusive(&mbuf_tx_compl_tbl_lock);
return error;
}
continue;
}
- lck_rw_lock_shared(mbuf_tx_compl_tbl_lock);
+ lck_rw_lock_shared(&mbuf_tx_compl_tbl_lock);
callback = mbuf_tx_compl_table[i];
- lck_rw_unlock_shared(mbuf_tx_compl_tbl_lock);
+ lck_rw_unlock_shared(&mbuf_tx_compl_tbl_lock);
if (callback != NULL) {
callback(m->m_pkthdr.pkt_compl_context,
}
if (to->sa_len > sizeof(ss)) {
- MALLOC(sa, struct sockaddr *, to->sa_len, M_SONAME, M_WAITOK);
+ sa = kheap_alloc(KHEAP_TEMP, to->sa_len, Z_WAITOK);
if (sa == NULL) {
return ENOBUFS;
}
error = sobindlock(sock, sa, 1); /* will lock socket */
if (sa != NULL && want_free == TRUE) {
- FREE(sa, M_SONAME);
+ kheap_free(KHEAP_TEMP, sa, sa->sa_len);
}
return error;
}
if (to->sa_len > sizeof(ss)) {
- MALLOC(sa, struct sockaddr *, to->sa_len, M_SONAME,
- (flags & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK);
+ sa = kheap_alloc(KHEAP_TEMP, to->sa_len,
+ (flags & MSG_DONTWAIT) ? Z_NOWAIT : Z_WAITOK);
if (sa == NULL) {
return ENOBUFS;
}
socket_unlock(sock, 1);
if (sa != NULL && want_free == TRUE) {
- FREE(sa, M_SONAME);
+ kheap_free(KHEAP_TEMP, sa, sa->sa_len);
}
return error;
if (error == 0 && *psa == NULL) {
error = ENOMEM;
- } else if (error != 0 && *psa != NULL) {
+ } else if (error != 0) {
FREE(*psa, M_SONAME);
- *psa = NULL;
}
return error;
}
void
sock_freeaddr(struct sockaddr *sa)
{
- if (sa != NULL) {
- FREE(sa, M_SONAME);
- }
+ FREE(sa, M_SONAME);
}
errno_t
if (control != NULL) {
m_freem(control);
}
- if (fromsa != NULL) {
- FREE(fromsa, M_SONAME);
- }
+ FREE(fromsa, M_SONAME);
return error;
}
#define SFEF_NODETACH 0x2 /* Detach should not be called */
#define SFEF_NOSOCKET 0x4 /* Socket is gone */
+/*
+ * If you need accounting for KM_IFADDR consider using
+ * KALLOC_HEAP_DEFINE to define a view.
+ */
+#define KM_IFADDR KHEAP_DEFAULT
+
struct socket_filter_entry {
struct socket_filter_entry *sfe_next_onsocket;
struct socket_filter_entry *sfe_next_onfilter;
TAILQ_HEAD(socket_filter_list, socket_filter);
-static struct socket_filter_list sock_filter_head;
-static lck_rw_t *sock_filter_lock = NULL;
-static lck_mtx_t *sock_filter_cleanup_lock = NULL;
+static LCK_GRP_DECLARE(sock_filter_lock_grp, "socket filter lock");
+static LCK_RW_DECLARE(sock_filter_lock, &sock_filter_lock_grp);
+static LCK_MTX_DECLARE(sock_filter_cleanup_lock, &sock_filter_lock_grp);
+
+static struct socket_filter_list sock_filter_head =
+ TAILQ_HEAD_INITIALIZER(sock_filter_head);
static struct socket_filter_entry *sock_filter_cleanup_entries = NULL;
static thread_t sock_filter_cleanup_thread = NULL;
return 0;
}
-__private_extern__ void
-sflt_init(void)
-{
- lck_grp_attr_t *grp_attrib = NULL;
- lck_attr_t *lck_attrib = NULL;
- lck_grp_t *lck_group = NULL;
-
- TAILQ_INIT(&sock_filter_head);
-
- /* Allocate a rw lock */
- grp_attrib = lck_grp_attr_alloc_init();
- lck_group = lck_grp_alloc_init("socket filter lock", grp_attrib);
- lck_grp_attr_free(grp_attrib);
- lck_attrib = lck_attr_alloc_init();
- sock_filter_lock = lck_rw_alloc_init(lck_group, lck_attrib);
- sock_filter_cleanup_lock = lck_mtx_alloc_init(lck_group, lck_attrib);
- lck_grp_free(lck_group);
- lck_attr_free(lck_attrib);
-}
-
static void
sflt_retain_locked(struct socket_filter *filter)
{
if (os_ref_release_locked(&filter->sf_refcount) == 0) {
/* Call the unregistered function */
if (filter->sf_filter.sf_unregistered) {
- lck_rw_unlock_exclusive(sock_filter_lock);
+ lck_rw_unlock_exclusive(&sock_filter_lock);
filter->sf_filter.sf_unregistered(
filter->sf_filter.sf_handle);
- lck_rw_lock_exclusive(sock_filter_lock);
+ lck_rw_lock_exclusive(&sock_filter_lock);
}
/* Free the entry */
- FREE(filter, M_IFADDR);
+ kheap_free(KM_IFADDR, filter, sizeof(struct socket_filter));
}
}
/* That was the last reference */
/* Take the cleanup lock */
- lck_mtx_lock(sock_filter_cleanup_lock);
+ lck_mtx_lock(&sock_filter_cleanup_lock);
/* Put this item on the cleanup list */
entry->sfe_next_oncleanup = sock_filter_cleanup_entries;
}
/* Drop the cleanup lock */
- lck_mtx_unlock(sock_filter_cleanup_lock);
+ lck_mtx_unlock(&sock_filter_cleanup_lock);
} else if (old <= 0) {
panic("sflt_entry_release - sfe_refcount (%d) <= 0\n",
(int)old);
{
#pragma unused(blah, blah2)
while (1) {
- lck_mtx_lock(sock_filter_cleanup_lock);
+ lck_mtx_lock(&sock_filter_cleanup_lock);
while (sock_filter_cleanup_entries == NULL) {
/* Sleep until we've got something better to do */
msleep(&sock_filter_cleanup_entries,
- sock_filter_cleanup_lock, PWAIT,
+ &sock_filter_cleanup_lock, PWAIT,
"sflt_cleanup", NULL);
}
sock_filter_cleanup_entries = NULL;
/* Drop the lock */
- lck_mtx_unlock(sock_filter_cleanup_lock);
+ lck_mtx_unlock(&sock_filter_cleanup_lock);
/* Take the socket filter lock */
- lck_rw_lock_exclusive(sock_filter_lock);
+ lck_rw_lock_exclusive(&sock_filter_lock);
/* Cleanup every dead item */
struct socket_filter_entry *entry;
if ((entry->sfe_flags & SFEF_NODETACH) == 0 &&
entry->sfe_filter->sf_filter.sf_detach) {
entry->sfe_flags |= SFEF_NODETACH;
- lck_rw_unlock_exclusive(sock_filter_lock);
+ lck_rw_unlock_exclusive(&sock_filter_lock);
/*
* Warning - passing a potentially
entry->sfe_filter->sf_filter.sf_detach(
entry->sfe_cookie, entry->sfe_socket);
- lck_rw_lock_exclusive(sock_filter_lock);
+ lck_rw_lock_exclusive(&sock_filter_lock);
}
/*
sflt_release_locked(entry->sfe_filter);
entry->sfe_socket = NULL;
entry->sfe_filter = NULL;
- FREE(entry, M_IFADDR);
+ kheap_free(KM_IFADDR, entry, sizeof(struct socket_filter_entry));
}
/* Drop the socket filter lock */
- lck_rw_unlock_exclusive(sock_filter_lock);
+ lck_rw_unlock_exclusive(&sock_filter_lock);
}
/* NOTREACHED */
}
}
}
/* allocate the socket filter entry */
- MALLOC(entry, struct socket_filter_entry *, sizeof(*entry), M_IFADDR,
- M_WAITOK);
+ entry = kheap_alloc(KM_IFADDR, sizeof(struct socket_filter_entry),
+ Z_WAITOK);
if (entry == NULL) {
return ENOMEM;
}
* Release the filter lock --
* callers must be aware we will do this
*/
- lck_rw_unlock_exclusive(sock_filter_lock);
+ lck_rw_unlock_exclusive(&sock_filter_lock);
/* Unlock the socket */
if (socklocked) {
}
/* Lock the filters again */
- lck_rw_lock_exclusive(sock_filter_lock);
+ lck_rw_lock_exclusive(&sock_filter_lock);
/*
* If the attach function returns an error,
int result = EINVAL;
- lck_rw_lock_exclusive(sock_filter_lock);
+ lck_rw_lock_exclusive(&sock_filter_lock);
struct socket_filter *filter = NULL;
TAILQ_FOREACH(filter, &sock_filter_head, sf_global_next) {
result = sflt_attach_locked(socket, filter, 1);
}
- lck_rw_unlock_exclusive(sock_filter_lock);
+ lck_rw_unlock_exclusive(&sock_filter_lock);
return result;
}
*/
struct protosw *proto = so->so_proto->pr_protosw;
- lck_rw_lock_shared(sock_filter_lock);
+ lck_rw_lock_shared(&sock_filter_lock);
if (TAILQ_FIRST(&proto->pr_filter_head) != NULL) {
/* Promote lock to exclusive */
- if (!lck_rw_lock_shared_to_exclusive(sock_filter_lock)) {
- lck_rw_lock_exclusive(sock_filter_lock);
+ if (!lck_rw_lock_shared_to_exclusive(&sock_filter_lock)) {
+ lck_rw_lock_exclusive(&sock_filter_lock);
}
/*
filter = filter_next;
}
}
- lck_rw_done(sock_filter_lock);
+ lck_rw_done(&sock_filter_lock);
}
/*
__private_extern__ void
sflt_termsock(struct socket *so)
{
- lck_rw_lock_exclusive(sock_filter_lock);
+ lck_rw_lock_exclusive(&sock_filter_lock);
struct socket_filter_entry *entry;
entry->sfe_flags |= SFEF_NODETACH;
/* Drop the lock before calling the detach function */
- lck_rw_unlock_exclusive(sock_filter_lock);
+ lck_rw_unlock_exclusive(&sock_filter_lock);
sfe_filter->sf_filter.sf_detach(sfe_cookie, so);
- lck_rw_lock_exclusive(sock_filter_lock);
+ lck_rw_lock_exclusive(&sock_filter_lock);
/* Release the filter */
sflt_release_locked(sfe_filter);
}
}
- lck_rw_unlock_exclusive(sock_filter_lock);
+ lck_rw_unlock_exclusive(&sock_filter_lock);
}
struct socket_filter_entry *entry;
int unlocked = 0;
- lck_rw_lock_shared(sock_filter_lock);
+ lck_rw_lock_shared(&sock_filter_lock);
for (entry = so->so_filt; entry; entry = entry->sfe_next_onsocket) {
if ((entry->sfe_flags & SFEF_ATTACHED) &&
entry->sfe_filter->sf_filter.sf_notify &&
* the socket filter lock
*/
sflt_entry_retain(entry);
- lck_rw_unlock_shared(sock_filter_lock);
+ lck_rw_unlock_shared(&sock_filter_lock);
/* If the socket isn't already unlocked, unlock it */
if (unlocked == 0) {
* Take the socket filter lock again
* and release the entry
*/
- lck_rw_lock_shared(sock_filter_lock);
+ lck_rw_lock_shared(&sock_filter_lock);
sflt_entry_release(entry);
}
}
- lck_rw_unlock_shared(sock_filter_lock);
+ lck_rw_unlock_shared(&sock_filter_lock);
if (unlocked != 0) {
socket_lock(so, 0);
int unlocked = 0;
int error = 0;
- lck_rw_lock_shared(sock_filter_lock);
+ lck_rw_lock_shared(&sock_filter_lock);
for (entry = so->so_filt; entry && error == 0;
entry = entry->sfe_next_onsocket) {
if ((entry->sfe_flags & SFEF_ATTACHED) &&
* the socket filter lock
*/
sflt_entry_retain(entry);
- lck_rw_unlock_shared(sock_filter_lock);
+ lck_rw_unlock_shared(&sock_filter_lock);
/* If the socket isn't already unlocked, unlock it */
if (unlocked == 0) {
* Take the socket filter lock again
* and release the entry
*/
- lck_rw_lock_shared(sock_filter_lock);
+ lck_rw_lock_shared(&sock_filter_lock);
sflt_entry_release(entry);
}
}
- lck_rw_unlock_shared(sock_filter_lock);
+ lck_rw_unlock_shared(&sock_filter_lock);
if (unlocked) {
socket_lock(so, 0);
int unlocked = 0;
int error = 0;
- lck_rw_lock_shared(sock_filter_lock);
+ lck_rw_lock_shared(&sock_filter_lock);
for (entry = so->so_filt; entry && error == 0;
entry = entry->sfe_next_onsocket) {
if ((entry->sfe_flags & SFEF_ATTACHED) &&
* release the socket filter lock
*/
sflt_entry_retain(entry);
- lck_rw_unlock_shared(sock_filter_lock);
+ lck_rw_unlock_shared(&sock_filter_lock);
/* If the socket isn't already unlocked, unlock it */
if (unlocked == 0) {
* Take the socket filter lock again and
* release the entry
*/
- lck_rw_lock_shared(sock_filter_lock);
+ lck_rw_lock_shared(&sock_filter_lock);
sflt_entry_release(entry);
}
}
- lck_rw_unlock_shared(sock_filter_lock);
+ lck_rw_unlock_shared(&sock_filter_lock);
if (unlocked) {
socket_lock(so, 0);
int unlocked = 0;
int error = 0;
- lck_rw_lock_shared(sock_filter_lock);
+ lck_rw_lock_shared(&sock_filter_lock);
for (entry = so->so_filt; entry && error == 0;
entry = entry->sfe_next_onsocket) {
if ((entry->sfe_flags & SFEF_ATTACHED) &&
* the socket filter lock
*/
sflt_entry_retain(entry);
- lck_rw_unlock_shared(sock_filter_lock);
+ lck_rw_unlock_shared(&sock_filter_lock);
/* If the socket isn't already unlocked, unlock it */
if (unlocked == 0) {
* Take the socket filter lock again
* and release the entry
*/
- lck_rw_lock_shared(sock_filter_lock);
+ lck_rw_lock_shared(&sock_filter_lock);
sflt_entry_release(entry);
}
}
- lck_rw_unlock_shared(sock_filter_lock);
+ lck_rw_unlock_shared(&sock_filter_lock);
if (unlocked) {
socket_lock(so, 0);
int unlocked = 0;
int error = 0;
- lck_rw_lock_shared(sock_filter_lock);
+ lck_rw_lock_shared(&sock_filter_lock);
for (entry = so->so_filt; entry && error == 0;
entry = entry->sfe_next_onsocket) {
if ((entry->sfe_flags & SFEF_ATTACHED) &&
* release the socket filter lock
*/
sflt_entry_retain(entry);
- lck_rw_unlock_shared(sock_filter_lock);
+ lck_rw_unlock_shared(&sock_filter_lock);
/* If the socket isn't already unlocked, unlock it */
if (unlocked == 0) {
* Take the socket filter lock again
* and release the entry
*/
- lck_rw_lock_shared(sock_filter_lock);
+ lck_rw_lock_shared(&sock_filter_lock);
sflt_entry_release(entry);
}
}
- lck_rw_unlock_shared(sock_filter_lock);
+ lck_rw_unlock_shared(&sock_filter_lock);
if (unlocked) {
socket_lock(so, 0);
int unlocked = 0;
int error = 0;
- lck_rw_lock_shared(sock_filter_lock);
+ lck_rw_lock_shared(&sock_filter_lock);
for (entry = so->so_filt; entry && error == 0;
entry = entry->sfe_next_onsocket) {
if ((entry->sfe_flags & SFEF_ATTACHED) &&
* release the socket filter lock
*/
sflt_entry_retain(entry);
- lck_rw_unlock_shared(sock_filter_lock);
+ lck_rw_unlock_shared(&sock_filter_lock);
/* If the socket isn't already unlocked, unlock it */
if (unlocked == 0) {
* Take the socket filter lock again
* and release the entry
*/
- lck_rw_lock_shared(sock_filter_lock);
+ lck_rw_lock_shared(&sock_filter_lock);
sflt_entry_release(entry);
}
}
- lck_rw_unlock_shared(sock_filter_lock);
+ lck_rw_unlock_shared(&sock_filter_lock);
if (unlocked) {
socket_lock(so, 0);
int unlocked = 0;
int error = 0;
- lck_rw_lock_shared(sock_filter_lock);
+ lck_rw_lock_shared(&sock_filter_lock);
for (entry = so->so_filt; entry && error == 0;
entry = entry->sfe_next_onsocket) {
if ((entry->sfe_flags & SFEF_ATTACHED) &&
* the socket filter lock
*/
sflt_entry_retain(entry);
- lck_rw_unlock_shared(sock_filter_lock);
+ lck_rw_unlock_shared(&sock_filter_lock);
/* If the socket isn't already unlocked, unlock it */
if (unlocked == 0) {
* Take the socket filter lock again
* and release the entry
*/
- lck_rw_lock_shared(sock_filter_lock);
+ lck_rw_lock_shared(&sock_filter_lock);
sflt_entry_release(entry);
}
}
- lck_rw_unlock_shared(sock_filter_lock);
+ lck_rw_unlock_shared(&sock_filter_lock);
if (unlocked) {
socket_lock(so, 0);
int unlocked = 0;
int error = 0;
- lck_rw_lock_shared(sock_filter_lock);
+ lck_rw_lock_shared(&sock_filter_lock);
for (entry = so->so_filt; entry && error == 0;
entry = entry->sfe_next_onsocket) {
if ((entry->sfe_flags & SFEF_ATTACHED) &&
* the socket filter lock
*/
sflt_entry_retain(entry);
- lck_rw_unlock_shared(sock_filter_lock);
+ lck_rw_unlock_shared(&sock_filter_lock);
/* If the socket isn't already unlocked, unlock it */
if (unlocked == 0) {
* Take the socket filter lock again
* and release the entry
*/
- lck_rw_lock_shared(sock_filter_lock);
+ lck_rw_lock_shared(&sock_filter_lock);
sflt_entry_release(entry);
}
}
- lck_rw_unlock_shared(sock_filter_lock);
+ lck_rw_unlock_shared(&sock_filter_lock);
if (unlocked) {
socket_lock(so, 0);
int unlocked = 0;
int error = 0;
- lck_rw_lock_shared(sock_filter_lock);
+ lck_rw_lock_shared(&sock_filter_lock);
for (entry = so->so_filt; entry && error == 0;
entry = entry->sfe_next_onsocket) {
if ((entry->sfe_flags & SFEF_ATTACHED) &&
* the socket filter lock
*/
sflt_entry_retain(entry);
- lck_rw_unlock_shared(sock_filter_lock);
+ lck_rw_unlock_shared(&sock_filter_lock);
/* If the socket isn't already unlocked, unlock it */
if (unlocked == 0) {
* Take the socket filter lock again
* and release the entry
*/
- lck_rw_lock_shared(sock_filter_lock);
+ lck_rw_lock_shared(&sock_filter_lock);
sflt_entry_release(entry);
}
}
- lck_rw_unlock_shared(sock_filter_lock);
+ lck_rw_unlock_shared(&sock_filter_lock);
if (unlocked) {
socket_lock(so, 0);
int unlocked = 0;
int error = 0;
- lck_rw_lock_shared(sock_filter_lock);
+ lck_rw_lock_shared(&sock_filter_lock);
for (entry = so->so_filt; entry && error == 0;
entry = entry->sfe_next_onsocket) {
if ((entry->sfe_flags & SFEF_ATTACHED) &&
* the socket filter lock
*/
sflt_entry_retain(entry);
- lck_rw_unlock_shared(sock_filter_lock);
+ lck_rw_unlock_shared(&sock_filter_lock);
/* If the socket isn't already unlocked, unlock it */
if (unlocked == 0) {
* Take the socket filter lock again
* and release the entry
*/
- lck_rw_lock_shared(sock_filter_lock);
+ lck_rw_lock_shared(&sock_filter_lock);
sflt_entry_release(entry);
}
}
- lck_rw_unlock_shared(sock_filter_lock);
+ lck_rw_unlock_shared(&sock_filter_lock);
if (unlocked) {
socket_lock(so, 0);
int unlocked = 0;
int error = 0;
- lck_rw_lock_shared(sock_filter_lock);
+ lck_rw_lock_shared(&sock_filter_lock);
for (entry = so->so_filt; entry && error == 0;
entry = entry->sfe_next_onsocket) {
if ((entry->sfe_flags & SFEF_ATTACHED) &&
* the socket filter lock
*/
sflt_entry_retain(entry);
- lck_rw_unlock_shared(sock_filter_lock);
+ lck_rw_unlock_shared(&sock_filter_lock);
/* If the socket isn't already unlocked, unlock it */
if (unlocked == 0) {
* Take the socket filter lock again
* and release the entry
*/
- lck_rw_lock_shared(sock_filter_lock);
+ lck_rw_lock_shared(&sock_filter_lock);
sflt_entry_release(entry);
}
}
- lck_rw_unlock_shared(sock_filter_lock);
+ lck_rw_unlock_shared(&sock_filter_lock);
if (unlocked) {
socket_lock(so, 0);
int setsendthread = 0;
int error = 0;
- lck_rw_lock_shared(sock_filter_lock);
+ lck_rw_lock_shared(&sock_filter_lock);
for (entry = so->so_filt; entry && error == 0;
entry = entry->sfe_next_onsocket) {
/* skip if this is a subflow socket */
* release the socket filter lock
*/
sflt_entry_retain(entry);
- lck_rw_unlock_shared(sock_filter_lock);
+ lck_rw_unlock_shared(&sock_filter_lock);
/* If the socket isn't already unlocked, unlock it */
if (unlocked == 0) {
* Take the socket filter lock again
* and release the entry
*/
- lck_rw_lock_shared(sock_filter_lock);
+ lck_rw_lock_shared(&sock_filter_lock);
sflt_entry_release(entry);
}
}
- lck_rw_unlock_shared(sock_filter_lock);
+ lck_rw_unlock_shared(&sock_filter_lock);
if (unlocked) {
socket_lock(so, 0);
int error = 0;
int unlocked = 0;
- lck_rw_lock_shared(sock_filter_lock);
+ lck_rw_lock_shared(&sock_filter_lock);
for (entry = so->so_filt; entry && (error == 0);
entry = entry->sfe_next_onsocket) {
* release the socket filter lock
*/
sflt_entry_retain(entry);
- lck_rw_unlock_shared(sock_filter_lock);
+ lck_rw_unlock_shared(&sock_filter_lock);
/* If the socket isn't already unlocked, unlock it */
if (unlocked == 0) {
* Take the socket filter lock again
* and release the entry
*/
- lck_rw_lock_shared(sock_filter_lock);
+ lck_rw_lock_shared(&sock_filter_lock);
sflt_entry_release(entry);
}
}
- lck_rw_unlock_shared(sock_filter_lock);
+ lck_rw_unlock_shared(&sock_filter_lock);
if (unlocked) {
socket_lock(so, 0);
return EINVAL;
}
- lck_rw_lock_exclusive(sock_filter_lock);
+ lck_rw_lock_exclusive(&sock_filter_lock);
for (entry = socket->so_filt; entry; entry = entry->sfe_next_onsocket) {
if (entry->sfe_filter->sf_filter.sf_handle == handle &&
(entry->sfe_flags & SFEF_ATTACHED) != 0) {
if (entry != NULL) {
sflt_detach_locked(entry);
}
- lck_rw_unlock_exclusive(sock_filter_lock);
+ lck_rw_unlock_exclusive(&sock_filter_lock);
return result;
}
}
/* Allocate the socket filter */
- MALLOC(sock_filt, struct socket_filter *, sizeof(*sock_filt),
- M_IFADDR, M_WAITOK);
+ sock_filt = kheap_alloc(KM_IFADDR,
+ sizeof(struct socket_filter), Z_WAITOK | Z_ZERO);
if (sock_filt == NULL) {
return ENOBUFS;
}
- bzero(sock_filt, sizeof(*sock_filt));
-
/* Legacy sflt_filter length; current structure minus extended */
len = sizeof(*filter) - sizeof(struct sflt_filter_ext);
/*
}
bcopy(filter, &sock_filt->sf_filter, len);
- lck_rw_lock_exclusive(sock_filter_lock);
+ lck_rw_lock_exclusive(&sock_filter_lock);
/* Look for an existing entry */
TAILQ_FOREACH(match, &sock_filter_head, sf_global_next) {
if (match->sf_filter.sf_handle ==
INC_ATOMIC_INT64_LIM(net_api_stats.nas_sfltr_register_os_total);
}
}
- lck_rw_unlock_exclusive(sock_filter_lock);
+ lck_rw_unlock_exclusive(&sock_filter_lock);
if (match != NULL) {
- FREE(sock_filt, M_IFADDR);
+ kheap_free(KM_IFADDR, sock_filt, sizeof(struct socket_filter));
return EEXIST;
}
!SOCK_CHECK_TYPE(so, type)) {
continue;
}
- MALLOC(solist, struct solist *, sizeof(*solist),
- M_IFADDR, M_NOWAIT);
+ solist = kheap_alloc(KHEAP_TEMP, sizeof(struct solist), Z_NOWAIT);
if (!solist) {
continue;
}
!SOCK_CHECK_TYPE(so, type)) {
continue;
}
- MALLOC(solist, struct solist *, sizeof(*solist),
- M_IFADDR, M_NOWAIT);
+ solist = kheap_alloc(KHEAP_TEMP, sizeof(struct solist), Z_NOWAIT);
if (!solist) {
continue;
}
sock_release(so);
solist = solisthead;
solisthead = solisthead->next;
- FREE(solist, M_IFADDR);
+ kheap_free(KHEAP_TEMP, solist, sizeof(struct solist));
}
return error;
sflt_unregister(sflt_handle handle)
{
struct socket_filter *filter;
- lck_rw_lock_exclusive(sock_filter_lock);
+ lck_rw_lock_exclusive(&sock_filter_lock);
/* Find the entry by the handle */
TAILQ_FOREACH(filter, &sock_filter_head, sf_global_next) {
sflt_release_locked(filter);
}
- lck_rw_unlock_exclusive(sock_filter_lock);
+ lck_rw_unlock_exclusive(&sock_filter_lock);
if (filter == NULL) {
return ENOENT;
}
vmc = (struct version_min_command *) lcp;
ret = load_version(vmc, &found_version_cmd, imgp->ip_flags, result);
+#if XNU_TARGET_OS_OSX
+ if (ret == LOAD_SUCCESS) {
+ if (result->ip_platform == PLATFORM_IOS) {
+ vm_map_mark_alien(map);
+ } else {
+ assert(!vm_map_is_alien(map));
+ }
+ }
+#endif /* XNU_TARGET_OS_OSX */
break;
}
case LC_BUILD_VERSION: {
}
result->ip_platform = bvc->platform;
result->lr_sdk = bvc->sdk;
+ result->lr_min_sdk = bvc->minos;
found_version_cmd = TRUE;
+#if XNU_TARGET_OS_OSX
+ if (result->ip_platform == PLATFORM_IOS) {
+ vm_map_mark_alien(map);
+ } else {
+ assert(!vm_map_is_alien(map));
+ }
+#endif /* XNU_TARGET_OS_OSX */
break;
}
default:
{
uint32_t platform = 0;
uint32_t sdk;
+ uint32_t min_sdk;
if (vmc->cmdsize < sizeof(*vmc)) {
return LOAD_BADMACHO;
}
*found_version_cmd = TRUE;
sdk = vmc->sdk;
+ min_sdk = vmc->version;
switch (vmc->cmd) {
case LC_VERSION_MIN_MACOSX:
platform = PLATFORM_MACOS;
/* All LC_VERSION_MIN_* load commands are legacy and we will not be adding any more */
default:
sdk = (uint32_t)-1;
+ min_sdk = (uint32_t)-1;
__builtin_unreachable();
}
result->ip_platform = platform;
- result->lr_min_sdk = sdk;
+ result->lr_min_sdk = min_sdk;
+ result->lr_sdk = sdk;
return LOAD_SUCCESS;
}
/* Allocate wad-of-data from heap to reduce excessively deep stacks */
- MALLOC(dyld_data, void *, sizeof(*dyld_data), M_TEMP, M_WAITOK);
+ dyld_data = kheap_alloc(KHEAP_TEMP, sizeof(*dyld_data), Z_WAITOK);
header = &dyld_data->__header;
myresult = &dyld_data->__myresult;
macho_data = &dyld_data->__macho_data;
vnode_put(vp);
kheap_free(KHEAP_TEMP, va, sizeof(*va));
novp_out:
- FREE(dyld_data, M_TEMP);
+ kheap_free(KHEAP_TEMP, dyld_data, sizeof(*dyld_data));
return ret;
}
* caches when memory runs low.
*/
#define MCACHE_LIST_LOCK() { \
- lck_mtx_lock(mcache_llock); \
+ lck_mtx_lock(&mcache_llock); \
mcache_llock_owner = current_thread(); \
}
#define MCACHE_LIST_UNLOCK() { \
mcache_llock_owner = NULL; \
- lck_mtx_unlock(mcache_llock); \
+ lck_mtx_unlock(&mcache_llock); \
}
#define MCACHE_LOCK(l) lck_mtx_lock(l)
static unsigned int ncpu;
static unsigned int cache_line_size;
-static lck_mtx_t *mcache_llock;
static struct thread *mcache_llock_owner;
-static lck_attr_t *mcache_llock_attr;
-static lck_grp_t *mcache_llock_grp;
-static lck_grp_attr_t *mcache_llock_grp_attr;
+static LCK_GRP_DECLARE(mcache_llock_grp, "mcache.list");
+static LCK_MTX_DECLARE(mcache_llock, &mcache_llock_grp);
static struct zone *mcache_zone;
static const uint32_t mcache_reap_interval = 15;
static const uint32_t mcache_reap_interval_leeway = 2;
int mca_trn_max = MCA_TRN_MAX;
-#define DUMP_MCA_BUF_SIZE 512
-static char *mca_dump_buf;
-
static mcache_bkttype_t mcache_bkttype[] = {
{ 1, 4096, 32768, NULL },
{ 3, 2048, 16384, NULL },
static mcache_t *mcache_create_common(const char *, size_t, size_t,
mcache_allocfn_t, mcache_freefn_t, mcache_auditfn_t, mcache_logfn_t,
- mcache_notifyfn_t, void *, u_int32_t, int, int);
+ mcache_notifyfn_t, void *, u_int32_t, int);
static unsigned int mcache_slab_alloc(void *, mcache_obj_t ***,
unsigned int, int);
static void mcache_slab_free(void *, mcache_obj_t *, boolean_t);
ncpu = ml_wait_max_cpus();
(void) mcache_cache_line_size(); /* prime it */
- mcache_llock_grp_attr = lck_grp_attr_alloc_init();
- mcache_llock_grp = lck_grp_alloc_init("mcache.list",
- mcache_llock_grp_attr);
- mcache_llock_attr = lck_attr_alloc_init();
- mcache_llock = lck_mtx_alloc_init(mcache_llock_grp, mcache_llock_attr);
-
mcache_reap_tcall = thread_call_allocate(mcache_reap_timeout, NULL);
mcache_update_tcall = thread_call_allocate(mcache_update, NULL);
if (mcache_reap_tcall == NULL || mcache_update_tcall == NULL) {
*/
__private_extern__ mcache_t *
mcache_create(const char *name, size_t bufsize, size_t align,
- u_int32_t flags, int wait)
+ u_int32_t flags, int wait __unused)
{
return mcache_create_common(name, bufsize, align, mcache_slab_alloc,
- mcache_slab_free, mcache_slab_audit, NULL, NULL, NULL, flags, 1,
- wait);
+ mcache_slab_free, mcache_slab_audit, NULL, NULL, NULL, flags, 1);
}
/*
mcache_create_ext(const char *name, size_t bufsize,
mcache_allocfn_t allocfn, mcache_freefn_t freefn, mcache_auditfn_t auditfn,
mcache_logfn_t logfn, mcache_notifyfn_t notifyfn, void *arg,
- u_int32_t flags, int wait)
+ u_int32_t flags, int wait __unused)
{
return mcache_create_common(name, bufsize, 0, allocfn,
- freefn, auditfn, logfn, notifyfn, arg, flags, 0, wait);
+ freefn, auditfn, logfn, notifyfn, arg, flags, 0);
}
/*
mcache_create_common(const char *name, size_t bufsize, size_t align,
mcache_allocfn_t allocfn, mcache_freefn_t freefn, mcache_auditfn_t auditfn,
mcache_logfn_t logfn, mcache_notifyfn_t notifyfn, void *arg,
- u_int32_t flags, int need_zone, int wait)
+ u_int32_t flags, int need_zone)
{
mcache_bkttype_t *btp;
mcache_t *cp = NULL;
unsigned int c;
char lck_name[64];
- /* If auditing is on and print buffer is NULL, allocate it now */
- if ((flags & MCF_DEBUG) && mca_dump_buf == NULL) {
- int malloc_wait = (wait & MCR_NOSLEEP) ? M_NOWAIT : M_WAITOK;
- MALLOC(mca_dump_buf, char *, DUMP_MCA_BUF_SIZE, M_TEMP,
- malloc_wait | M_ZERO);
- if (mca_dump_buf == NULL) {
- return NULL;
- }
- }
-
- buf = zalloc(mcache_zone);
+ buf = zalloc_flags(mcache_zone, Z_WAITOK | Z_ZERO);
if (buf == NULL) {
goto fail;
}
- bzero(buf, MCACHE_ALLOC_SIZE);
-
/*
* In case we didn't get a cache-aligned memory, round it up
* accordingly. This is needed in order to get the rest of
(void) snprintf(cp->mc_name, sizeof(cp->mc_name), "mcache.%s", name);
(void) snprintf(lck_name, sizeof(lck_name), "%s.cpu", cp->mc_name);
- cp->mc_cpu_lock_grp_attr = lck_grp_attr_alloc_init();
- cp->mc_cpu_lock_grp = lck_grp_alloc_init(lck_name,
- cp->mc_cpu_lock_grp_attr);
- cp->mc_cpu_lock_attr = lck_attr_alloc_init();
+ cp->mc_cpu_lock_grp = lck_grp_alloc_init(lck_name, LCK_GRP_ATTR_NULL);
/*
* Allocation chunk size is the object's size plus any extra size
* Initialize the bucket layer.
*/
(void) snprintf(lck_name, sizeof(lck_name), "%s.bkt", cp->mc_name);
- cp->mc_bkt_lock_grp_attr = lck_grp_attr_alloc_init();
cp->mc_bkt_lock_grp = lck_grp_alloc_init(lck_name,
- cp->mc_bkt_lock_grp_attr);
- cp->mc_bkt_lock_attr = lck_attr_alloc_init();
- lck_mtx_init(&cp->mc_bkt_lock, cp->mc_bkt_lock_grp,
- cp->mc_bkt_lock_attr);
+ LCK_GRP_ATTR_NULL);
+ lck_mtx_init(&cp->mc_bkt_lock, cp->mc_bkt_lock_grp, LCK_ATTR_NULL);
(void) snprintf(lck_name, sizeof(lck_name), "%s.sync", cp->mc_name);
- cp->mc_sync_lock_grp_attr = lck_grp_attr_alloc_init();
cp->mc_sync_lock_grp = lck_grp_alloc_init(lck_name,
- cp->mc_sync_lock_grp_attr);
- cp->mc_sync_lock_attr = lck_attr_alloc_init();
- lck_mtx_init(&cp->mc_sync_lock, cp->mc_sync_lock_grp,
- cp->mc_sync_lock_attr);
+ LCK_GRP_ATTR_NULL);
+ lck_mtx_init(&cp->mc_sync_lock, cp->mc_sync_lock_grp, LCK_ATTR_NULL);
for (btp = mcache_bkttype; chunksize <= btp->bt_minbuf; btp++) {
continue;
mcache_cpu_t *ccp = &cp->mc_cpu[c];
VERIFY(IS_P2ALIGNED(ccp, CPU_CACHE_LINE_SIZE));
- lck_mtx_init(&ccp->cc_lock, cp->mc_cpu_lock_grp,
- cp->mc_cpu_lock_attr);
+ lck_mtx_init(&ccp->cc_lock, cp->mc_cpu_lock_grp, LCK_ATTR_NULL);
ccp->cc_objs = -1;
ccp->cc_pobjs = -1;
}
cp->mc_slab_free = NULL;
cp->mc_slab_audit = NULL;
- lck_attr_free(cp->mc_bkt_lock_attr);
lck_grp_free(cp->mc_bkt_lock_grp);
- lck_grp_attr_free(cp->mc_bkt_lock_grp_attr);
-
- lck_attr_free(cp->mc_cpu_lock_attr);
lck_grp_free(cp->mc_cpu_lock_grp);
- lck_grp_attr_free(cp->mc_cpu_lock_grp_attr);
-
- lck_attr_free(cp->mc_sync_lock_attr);
lck_grp_free(cp->mc_sync_lock_grp);
- lck_grp_attr_free(cp->mc_sync_lock_grp_attr);
/*
* TODO: We need to destroy the zone here, but cannot do it
int need_bkt_resize = 0;
int need_bkt_reenable = 0;
- lck_mtx_assert(mcache_llock, LCK_MTX_ASSERT_OWNED);
+ lck_mtx_assert(&mcache_llock, LCK_MTX_ASSERT_OWNED);
mcache_bkt_ws_update(cp);
#define MCA_TRN_PREV ((mca->mca_next_trn + mca_trn_max - 1) % mca_trn_max)
__private_extern__ char *
-mcache_dump_mca(mcache_audit_t *mca)
+mcache_dump_mca(char buf[static DUMP_MCA_BUF_SIZE], mcache_audit_t *mca)
{
- if (mca_dump_buf == NULL) {
- return NULL;
- }
-
- snprintf(mca_dump_buf, DUMP_MCA_BUF_SIZE,
+ snprintf(buf, DUMP_MCA_BUF_SIZE,
"mca %p: addr %p, cache %p (%s) nxttrn %d\n"
DUMP_TRN_FMT()
DUMP_TRN_FMT(),
DUMP_TRN_FIELDS("last", MCA_TRN_LAST),
DUMP_TRN_FIELDS("previous", MCA_TRN_PREV));
- return mca_dump_buf;
+ return buf;
}
__private_extern__ void
mcache_audit_panic(mcache_audit_t *mca, void *addr, size_t offset,
int64_t expected, int64_t got)
{
+ char buf[DUMP_MCA_BUF_SIZE];
+
if (mca == NULL) {
panic("mcache_audit: buffer %p modified after free at "
"offset 0x%lx (0x%llx instead of 0x%llx)\n", addr,
panic("mcache_audit: buffer %p modified after free at offset 0x%lx "
"(0x%llx instead of 0x%llx)\n%s\n",
- addr, offset, got, expected, mcache_dump_mca(mca));
+ addr, offset, got, expected, mcache_dump_mca(buf, mca));
/* NOTREACHED */
__builtin_unreachable();
}
return rv;
}
-#if (MAC_POLICY_OPS_VERSION != 69)
+#if (MAC_POLICY_OPS_VERSION != 74)
# error "struct mac_policy_ops doesn't match definition in mac_policy.h"
#endif
/*
CHECK_SET_HOOK(mount_label_init)
CHECK_SET_HOOK(mount_label_internalize)
- .mpo_reserved38 = (mpo_reserved_hook_t *)common_hook,
- .mpo_reserved39 = (mpo_reserved_hook_t *)common_hook,
- .mpo_reserved40 = (mpo_reserved_hook_t *)common_hook,
+ CHECK_SET_HOOK(proc_check_expose_task_with_flavor)
+ CHECK_SET_HOOK(proc_check_get_task_with_flavor)
+ CHECK_SET_HOOK(proc_check_task_id_token_get_task)
CHECK_SET_HOOK(pipe_check_ioctl)
CHECK_SET_HOOK(pipe_check_kqfilter)
CHECK_SET_HOOK(socket_check_setsockopt)
CHECK_SET_HOOK(socket_check_getsockopt)
- .mpo_reserved50 = (mpo_reserved_hook_t *)common_hook,
- .mpo_reserved51 = (mpo_reserved_hook_t *)common_hook,
+ CHECK_SET_HOOK(proc_check_get_movable_control_port)
+ CHECK_SET_HOOK(proc_check_dyld_process_info_notify_register)
.mpo_reserved52 = (mpo_reserved_hook_t *)common_hook,
.mpo_reserved53 = (mpo_reserved_hook_t *)common_hook,
.mpo_reserved54 = (mpo_reserved_hook_t *)common_hook,
.mpo_reserved59 = (mpo_reserved_hook_t *)common_hook,
.mpo_reserved60 = (mpo_reserved_hook_t *)common_hook,
.mpo_reserved61 = (mpo_reserved_hook_t *)common_hook,
- .mpo_reserved62 = (mpo_reserved_hook_t *)common_hook,
+
+ CHECK_SET_HOOK(iokit_check_open_service)
CHECK_SET_HOOK(system_check_acct)
CHECK_SET_HOOK(system_check_audit)
.fo_kqfilter = fo_no_kqfilter,
};
-static lck_grp_t *psx_sem_subsys_lck_grp;
-static lck_grp_attr_t *psx_sem_subsys_lck_grp_attr;
-static lck_attr_t *psx_sem_subsys_lck_attr;
-static lck_mtx_t psx_sem_subsys_mutex;
+static LCK_GRP_DECLARE(psx_sem_subsys_lck_grp, "posix semaphores");
+static LCK_MTX_DECLARE(psx_sem_subsys_mutex, &psx_sem_subsys_lck_grp);
-#define PSEM_SUBSYS_LOCK() lck_mtx_lock(& psx_sem_subsys_mutex)
-#define PSEM_SUBSYS_UNLOCK() lck_mtx_unlock(& psx_sem_subsys_mutex)
+#define PSEM_SUBSYS_LOCK() lck_mtx_lock(&psx_sem_subsys_mutex)
+#define PSEM_SUBSYS_UNLOCK() lck_mtx_unlock(&psx_sem_subsys_mutex)
#define PSEM_SUBSYS_ASSERT_HELD() LCK_MTX_ASSERT(&psx_sem_subsys_mutex, LCK_MTX_ASSERT_OWNED)
static void psem_cache_delete(struct psemcache *pcp);
int psem_cache_purge_all(proc_t);
-
-/* Initialize the mutex governing access to the posix sem subsystem */
-__private_extern__ void
-psem_lock_init( void )
-{
- psx_sem_subsys_lck_grp_attr = lck_grp_attr_alloc_init();
-
- psx_sem_subsys_lck_grp = lck_grp_alloc_init("posix shared memory", psx_sem_subsys_lck_grp_attr);
-
- psx_sem_subsys_lck_attr = lck_attr_alloc_init();
- lck_mtx_init(&psx_sem_subsys_mutex, psx_sem_subsys_lck_grp, psx_sem_subsys_lck_attr);
-}
-
/*
* Lookup an entry in the cache
*
* allowed and the one at the front of the LRU list is in use.
* Otherwise we use the one at the front of the LRU list.
*/
- MALLOC(pcp, struct psemcache *, sizeof(struct psemcache), M_SHM, M_WAITOK | M_ZERO);
+ pcp = kheap_alloc(KM_SHM, sizeof(struct psemcache), Z_WAITOK | Z_ZERO);
if (pcp == PSEMCACHE_NULL) {
error = ENOMEM;
goto bad;
}
- MALLOC(new_pinfo, struct pseminfo *, sizeof(struct pseminfo), M_SHM, M_WAITOK | M_ZERO);
+ new_pinfo = kheap_alloc(KM_SHM, sizeof(struct pseminfo), Z_WAITOK | Z_ZERO);
if (new_pinfo == NULL) {
error = ENOSPC;
goto bad;
}
}
- MALLOC(new_pnode, struct psemnode *, sizeof(struct psemnode), M_SHM, M_WAITOK | M_ZERO);
+ new_pnode = kheap_alloc(KM_SHM, sizeof(struct psemnode), Z_WAITOK | Z_ZERO);
if (new_pnode == NULL) {
error = ENOSPC;
goto bad;
* new . and we must free them.
*/
if (incache) {
- FREE(pcp, M_SHM);
+ kheap_free(KM_SHM, pcp, sizeof(struct psemcache));
pcp = PSEMCACHE_NULL;
if (new_pinfo != PSEMINFO_NULL) {
/* return value ignored - we can't _not_ do this */
#if CONFIG_MACF
mac_posixsem_label_destroy(new_pinfo);
#endif
- FREE(new_pinfo, M_SHM);
+ kheap_free(KM_SHM, new_pinfo, sizeof(struct pseminfo));
new_pinfo = PSEMINFO_NULL;
}
}
bad_locked:
PSEM_SUBSYS_UNLOCK();
bad:
- if (pcp != PSEMCACHE_NULL) {
- FREE(pcp, M_SHM);
- }
+ kheap_free(KM_SHM, pcp, sizeof(struct psemcache));
- if (new_pnode != PSEMNODE_NULL) {
- FREE(new_pnode, M_SHM);
- }
+ kheap_free(KM_SHM, new_pnode, sizeof(struct psemnode));
if (fp != NULL) {
fp_free(p, indx, fp);
#if CONFIG_MACF
mac_posixsem_label_destroy(new_pinfo);
#endif
- FREE(new_pinfo, M_SHM);
+ kheap_free(KM_SHM, new_pinfo, sizeof(struct pseminfo));
}
if (pnbuf != NULL) {
if (!pinfo->psem_usecount) {
psem_delete(pinfo);
- FREE(pinfo, M_SHM);
+ kheap_free(KM_SHM, pinfo, sizeof(struct pseminfo));
} else {
pinfo->psem_flags |= PSEM_REMOVED;
}
psem_cache_delete(pcache);
- FREE(pcache, M_SHM);
+ kheap_free(KM_SHM, pcache, sizeof(struct psemcache));
return 0;
}
PSEM_SUBSYS_UNLOCK();
/* lock dropped as only semaphore is destroyed here */
error = psem_delete(pinfo);
- FREE(pinfo, M_SHM);
+ kheap_free(KM_SHM, pinfo, sizeof(struct pseminfo));
} else {
PSEM_SUBSYS_UNLOCK();
}
/* subsystem lock is dropped when we get here */
- FREE(pnode, M_SHM);
+ kheap_free(KM_SHM, pnode, sizeof(struct psemnode));
return error;
}
#include <mach/vm_prot.h>
#include <mach/vm_inherit.h>
#include <mach/kern_return.h>
-#include <mach/memory_object_control.h>
#include <vm/vm_map.h>
#include <vm/vm_protos.h>
/*
* Everything here is protected by a single mutex.
*/
-static lck_grp_t *psx_shm_subsys_lck_grp;
-static lck_grp_attr_t *psx_shm_subsys_lck_grp_attr;
-static lck_attr_t *psx_shm_subsys_lck_attr;
-static lck_mtx_t psx_shm_subsys_mutex;
+static LCK_GRP_DECLARE(psx_shm_subsys_lck_grp, "posix shared memory");
+static LCK_MTX_DECLARE(psx_shm_subsys_mutex, &psx_shm_subsys_lck_grp);
#define PSHM_SUBSYS_LOCK() lck_mtx_lock(& psx_shm_subsys_mutex)
#define PSHM_SUBSYS_UNLOCK() lck_mtx_unlock(& psx_shm_subsys_mutex)
#define PSHM_SUBSYS_ASSERT_HELD() LCK_MTX_ASSERT(&psx_shm_subsys_mutex, LCK_MTX_ASSERT_OWNED)
-
-__private_extern__ void
-pshm_lock_init( void )
-{
- psx_shm_subsys_lck_grp_attr = lck_grp_attr_alloc_init();
-
- psx_shm_subsys_lck_grp =
- lck_grp_alloc_init("posix shared memory", psx_shm_subsys_lck_grp_attr);
-
- psx_shm_subsys_lck_attr = lck_attr_alloc_init();
- lck_mtx_init(&psx_shm_subsys_mutex, psx_shm_subsys_lck_grp, psx_shm_subsys_lck_attr);
-}
-
/*
* Lookup an entry in the cache. Only the name is used from "look".
*/
* Allocate data structures we need. We parse the userspace name into
* a pshm_info_t, even when we don't need to O_CREAT.
*/
- MALLOC(new_pinfo, pshm_info_t *, sizeof(pshm_info_t), M_SHM, M_WAITOK | M_ZERO);
+ new_pinfo = kheap_alloc(KM_SHM, sizeof(pshm_info_t), Z_WAITOK | Z_ZERO);
if (new_pinfo == NULL) {
error = ENOSPC;
goto bad;
/*
* Will need a new pnode for the file pointer
*/
- MALLOC(new_pnode, pshmnode_t *, sizeof(pshmnode_t), M_SHM, M_WAITOK | M_ZERO);
+ new_pnode = kheap_alloc(KM_SHM, sizeof(pshmnode_t),
+ Z_WAITOK | Z_ZERO);
if (new_pnode == NULL) {
error = ENOSPC;
goto bad;
/*
* Delete any allocated unused data structures.
*/
- if (new_pnode != NULL) {
- FREE(new_pnode, M_SHM);
- }
+ kheap_free(KM_SHM, new_pnode, sizeof(pshmnode_t));
if (fp != NULL) {
fp_free(p, indx, fp);
mac_posixshm_label_destroy(&new_pinfo->pshm_hdr);
}
#endif
- FREE(new_pinfo, M_SHM);
+ kheap_free(KM_SHM, new_pinfo, sizeof(pshm_info_t));
}
return error;
}
}
/* get a list entry to track the memory object */
- MALLOC(pshmobj, pshm_mobj_t *, sizeof(pshm_mobj_t), M_SHM, M_WAITOK);
+ pshmobj = kheap_alloc(KM_SHM, sizeof(pshm_mobj_t), Z_WAITOK);
if (pshmobj == NULL) {
kret = KERN_NO_SPACE;
mach_memory_entry_port_release(mem_object);
SLIST_REMOVE_HEAD(&pinfo->pshm_mobjs, pshmo_next);
PSHM_SUBSYS_UNLOCK();
mach_memory_entry_port_release(pshmobj->pshmo_memobject);
- FREE(pshmobj, M_SHM);
+ kheap_free(KM_SHM, pshmobj, sizeof(pshm_mobj_t));
PSHM_SUBSYS_LOCK();
}
pinfo->pshm_flags &= ~PSHM_ALLOCATING;
/*
* Get the name from user args.
*/
- MALLOC(name_pinfo, pshm_info_t *, sizeof(pshm_info_t), M_SHM, M_WAITOK | M_ZERO);
+ name_pinfo = kheap_alloc(KHEAP_TEMP, sizeof(pshm_info_t),
+ Z_WAITOK | Z_ZERO);
if (name_pinfo == NULL) {
error = ENOSPC;
goto bad;
bad_unlock:
PSHM_SUBSYS_UNLOCK();
bad:
- if (name_pinfo != NULL) {
- FREE(name_pinfo, M_SHM);
- }
+ kheap_free(KHEAP_TEMP, name_pinfo, sizeof(pshm_info_t));
return error;
}
while ((pshmobj = SLIST_FIRST(&pinfo->pshm_mobjs)) != NULL) {
SLIST_REMOVE_HEAD(&pinfo->pshm_mobjs, pshmo_next);
mach_memory_entry_port_release(pshmobj->pshmo_memobject);
- FREE(pshmobj, M_SHM);
+ kheap_free(KM_SHM, pshmobj, sizeof(pshm_mobj_t));
}
/* free the pinfo itself */
- FREE(pinfo, M_SHM);
+ kheap_free(KM_SHM, pinfo, sizeof(pshm_info_t));
PSHM_SUBSYS_LOCK();
}
}
PSHM_SUBSYS_UNLOCK();
- if (pnode != NULL) {
- FREE(pnode, M_SHM);
- }
+ kheap_free(KM_SHM, pnode, sizeof(pshmnode_t));
return error;
}
#define dprintf(...) do { } while(0)
#endif
-static lck_grp_attr_t *proc_uuid_policy_subsys_lck_grp_attr;
-static lck_grp_t *proc_uuid_policy_subsys_lck_grp;
-static lck_attr_t *proc_uuid_policy_subsys_lck_attr;
-static lck_mtx_t proc_uuid_policy_subsys_mutex;
+static LCK_GRP_DECLARE(proc_uuid_policy_subsys_lck_grp,
+ "proc_uuid_policy_subsys_lock");
+static LCK_MTX_DECLARE(proc_uuid_policy_subsys_mutex,
+ &proc_uuid_policy_subsys_lck_grp);
#define PROC_UUID_POLICY_SUBSYS_LOCK() lck_mtx_lock(&proc_uuid_policy_subsys_mutex)
#define PROC_UUID_POLICY_SUBSYS_UNLOCK() lck_mtx_unlock(&proc_uuid_policy_subsys_mutex)
uint32_t flags; /* policy flag for that UUID */
};
+/*
+ * If you need accounting for KM_PROC_UUID_POLICY consider using
+ * KALLOC_HEAP_DEFINE to define a view.
+ */
+#define KM_PROC_UUID_POLICY KHEAP_DEFAULT
+
static int
proc_uuid_policy_insert(uuid_t uuid, uint32_t flags);
void
proc_uuid_policy_init(void)
{
- proc_uuid_policy_subsys_lck_grp_attr = lck_grp_attr_alloc_init();
- proc_uuid_policy_subsys_lck_grp = lck_grp_alloc_init("proc_uuid_policy_subsys_lock", proc_uuid_policy_subsys_lck_grp_attr);
- proc_uuid_policy_subsys_lck_attr = lck_attr_alloc_init();
- lck_mtx_init(&proc_uuid_policy_subsys_mutex, proc_uuid_policy_subsys_lck_grp, proc_uuid_policy_subsys_lck_attr);
-
proc_uuid_policy_hashtbl = hashinit(PROC_UUID_POLICY_HASH_SIZE, M_PROC_UUID_POLICY, &proc_uuid_policy_hash_mask);
proc_uuid_policy_table_gencount = 1;
proc_uuid_policy_count = 0;
return EINVAL;
}
- MALLOC(entry, struct proc_uuid_policy_entry *, sizeof(*entry), M_PROC_UUID_POLICY, M_WAITOK | M_ZERO);
+ entry = kheap_alloc(KM_PROC_UUID_POLICY, sizeof(struct proc_uuid_policy_entry),
+ Z_WAITOK | Z_ZERO);
memcpy(entry->uuid, uuid, sizeof(uuid_t));
entry->flags = flags;
/* The UUID is already in the list. Update the flags. */
foundentry->flags |= flags;
error = 0;
- FREE(entry, M_PROC_UUID_POLICY);
+ kheap_free(KM_PROC_UUID_POLICY, entry, sizeof(struct proc_uuid_policy_entry));
entry = NULL;
BUMP_PROC_UUID_POLICY_GENERATION_COUNT();
} else {
PROC_UUID_POLICY_SUBSYS_UNLOCK();
if (error) {
- FREE(entry, M_PROC_UUID_POLICY);
+ kheap_free(KM_PROC_UUID_POLICY, entry, sizeof(struct proc_uuid_policy_entry));
dprintf("Failed to insert proc uuid policy (%s,0x%08x), table full\n", uuidstr, flags);
} else {
dprintf("Inserted proc uuid policy (%s,0x%08x)\n", uuidstr, flags);
/* If we had found a pre-existing entry, deallocate its memory now */
if (delentry && should_delete) {
- FREE(delentry, M_PROC_UUID_POLICY);
+ kheap_free(KM_PROC_UUID_POLICY, delentry, sizeof(struct proc_uuid_policy_entry));
}
if (error) {
/* Memory deallocation happens after the hash lock is dropped */
LIST_FOREACH_SAFE(searchentry, &deletehead, entries, tmpentry) {
LIST_REMOVE(searchentry, entries);
- FREE(searchentry, M_PROC_UUID_POLICY);
+ kheap_free(KM_PROC_UUID_POLICY, searchentry,
+ sizeof(struct proc_uuid_policy_entry));
}
dprintf("Clearing proc uuid policy table\n");
int evh_debug = 0;
-MALLOC_DEFINE(M_EVENTHANDLER, "eventhandler", "Event handler records");
-
SYSCTL_NODE(_kern, OID_AUTO, eventhandler, CTLFLAG_RW | CTLFLAG_LOCKED,
0, "Eventhandler");
SYSCTL_INT(_kern_eventhandler, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
/* List of 'slow' lists */
static struct eventhandler_lists_ctxt evthdlr_lists_ctxt_glb;
-static lck_grp_attr_t *eventhandler_mutex_grp_attr;
-static lck_grp_t *eventhandler_mutex_grp;
-static lck_attr_t *eventhandler_mutex_attr;
+static LCK_GRP_DECLARE(eventhandler_mutex_grp, "eventhandler");
static unsigned int eg_size; /* size of eventhandler_entry_generic */
static struct mcache *eg_cache; /* mcache for eventhandler_entry_generic */
static unsigned int el_size; /* size of eventhandler_list */
static struct mcache *el_cache; /* mcache for eventhandler_list */
-static lck_grp_attr_t *el_lock_grp_attr;
-lck_grp_t *el_lock_grp;
-lck_attr_t *el_lock_attr;
+LCK_GRP_DECLARE(el_lock_grp, "eventhandler list");
+LCK_ATTR_DECLARE(el_lock_attr, 0, 0);
struct eventhandler_entry_generic {
struct eventhandler_entry ee;
TAILQ_INIT(&evthdlr_lists_ctxt->eventhandler_lists);
evthdlr_lists_ctxt->eventhandler_lists_initted = 1;
lck_mtx_init(&evthdlr_lists_ctxt->eventhandler_mutex,
- eventhandler_mutex_grp, eventhandler_mutex_attr);
+ &eventhandler_mutex_grp, LCK_ATTR_NULL);
}
/*
void
eventhandler_init(void)
{
- eventhandler_mutex_grp_attr = lck_grp_attr_alloc_init();
- eventhandler_mutex_grp = lck_grp_alloc_init("eventhandler",
- eventhandler_mutex_grp_attr);
- eventhandler_mutex_attr = lck_attr_alloc_init();
-
- el_lock_grp_attr = lck_grp_attr_alloc_init();
- el_lock_grp = lck_grp_alloc_init("eventhandler list",
- el_lock_grp_attr);
- el_lock_attr = lck_attr_alloc_init();
-
eventhandler_lists_ctxt_init(&evthdlr_lists_ctxt_glb);
eg_size = sizeof(struct eventhandler_entry_generic);
}
lck_mtx_unlock(&evthdlr_lists_ctxt->eventhandler_mutex);
lck_mtx_destroy(&evthdlr_lists_ctxt->eventhandler_mutex,
- eventhandler_mutex_grp);
+ &eventhandler_mutex_grp);
return;
}
return !!SBUF_ISFINISHED(s);
}
-/*!
- * @function sbuf_uionew
- *
- * @brief
- * Create a new sbuf and initialize its buffer with data from the given uio.
- *
- * @param s
- * An optional existing sbuf to initialize, or NULL to allocate a new one.
- *
- * @param uio
- * The uio describing the data to populate the sbuf with.
- *
- * @param error
- * An output parameter to report any error to.
- *
- * @returns
- * The new and/or initialized sbuf, or NULL on error. The error code is
- * reported back via @a error.
- */
-struct sbuf *
-sbuf_uionew(struct sbuf *s, struct uio *uio, int *error)
-{
- int size;
-
- if ((user_size_t)uio_resid(uio) > INT_MAX - 1) {
- *error = EINVAL;
- return NULL;
- }
-
- size = (int)uio_resid(uio);
- s = sbuf_new(s, NULL, size + 1, 0);
- if (s == NULL) {
- *error = ENOMEM;
- return NULL;
- }
-
- *error = uiomove(s->s_buf, size, uio);
- if (*error != 0) {
- sbuf_delete(s);
- return NULL;
- }
-
- s->s_len = size;
- *error = 0;
-
- return s;
-}
-
-/*!
- * @function sbuf_bcopyin
- *
- * @brief
- * Append userland data to an sbuf.
- *
- * @param s
- * The sbuf.
- *
- * @param uaddr
- * The userland address of data to append to the sbuf.
- *
- * @param len
- * The length of the data to copy from userland.
- *
- * @returns
- * 0 on success or -1 on error. Always returns -1 if the sbuf is marked as
- * overflowed.
- */
-int
-sbuf_bcopyin(struct sbuf *s, const void *uaddr, size_t len)
-{
- if (SBUF_HASOVERFLOWED(s)) {
- return -1;
- }
-
- if (len == 0) {
- return 0;
- }
-
- if (-1 == sbuf_ensure_capacity(s, len)) {
- SBUF_SETFLAG(s, SBUF_OVERFLOWED);
- return -1;
- }
-
- if (copyin(CAST_USER_ADDR_T(uaddr), &s->s_buf[s->s_len], len) != 0) {
- return -1;
- }
-
- s->s_len += (int)len;
- return 0;
-}
-
-/*!
- * @function sbuf_copyin
- *
- * @brief
- * Append a userland string to an sbuf.
- *
- * @param s
- * The sbuf.
- *
- * @param uaddr
- * The userland address of the string to append to the sbuf.
- *
- * @param len
- * The maximum length of the string to copy. If zero, the current capacity of
- * the sbuf is used.
- *
- * @returns
- * The number of bytes copied or -1 if an error occurred. Always returns -1 if
- * the sbuf is marked as overflowed.
- */
-int
-sbuf_copyin(struct sbuf *s, const void *uaddr, size_t len)
-{
- size_t done;
-
- if (SBUF_HASOVERFLOWED(s)) {
- return -1;
- }
-
- if (len == 0) {
- len = sbuf_capacity(s);
- } else if (-1 == sbuf_ensure_capacity(s, len)) {
- return -1;
- }
-
- switch (copyinstr(CAST_USER_ADDR_T(uaddr), &s->s_buf[s->s_len], len + 1, &done)) {
- case ENAMETOOLONG:
- SBUF_SETFLAG(s, SBUF_OVERFLOWED);
- s->s_len += done;
- return -1;
- case 0:
- s->s_len += done - 1;
- break;
- default:
- return -1;
- }
-
- return (int)done;
-}
-
#if DEBUG || DEVELOPMENT
/*
}
}
- SBUF_TESTING("sbuf_uionew")
- {
- SBUF_SHOULD("reject residuals that are too large")
- {
- struct sbuf *s = NULL;
- uio_t auio = NULL;
- char buf[4];
- int error = 0;
-
- buf[0] = 'A';
- buf[1] = 'B';
- buf[2] = 'C';
- buf[3] = 'D';
-
- auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
- uio_addiov(auio, (user_addr_t)buf, INT_MAX);
-
- s = sbuf_uionew(NULL, auio, &error);
- SBUF_ASSERT_EQ(NULL, s);
- SBUF_ASSERT_EQ(EINVAL, error);
-
- uio_free(auio);
- }
-
- SBUF_SHOULD("initialize using data described by the uio")
- {
- struct sbuf *s = NULL;
- uio_t auio = NULL;
- char buf[4];
- int error = 0;
-
- buf[0] = 'A';
- buf[1] = 'B';
- buf[2] = 'C';
- buf[3] = 'D';
-
- auio = uio_create(1, 0, UIO_SYSSPACE, UIO_WRITE);
- uio_addiov(auio, (user_addr_t)buf, sizeof(buf));
-
- s = sbuf_uionew(NULL, auio, &error);
- SBUF_ASSERT_NE(NULL, s);
- SBUF_ASSERT_EQ(0, error);
- SBUF_ASSERT_EQ(4, s->s_len);
- SBUF_ASSERT_EQ('A', s->s_buf[0]);
- SBUF_ASSERT_EQ('B', s->s_buf[1]);
- SBUF_ASSERT_EQ('C', s->s_buf[2]);
- SBUF_ASSERT_EQ('D', s->s_buf[3]);
-
- sbuf_delete(s);
- uio_free(auio);
- }
-
- SBUF_SHOULD("fail gracefully for bad addresses")
- {
- struct sbuf *s = NULL;
- uio_t auio = NULL;
- int error = 0;
-
- auio = uio_create(1, 0, UIO_USERSPACE, UIO_WRITE);
- uio_addiov(auio, (user_addr_t)0xdeadUL, 123);
-
- s = sbuf_uionew(NULL, auio, &error);
- SBUF_ASSERT_EQ(NULL, s);
- SBUF_ASSERT_NE(0, error);
-
- uio_free(auio);
- }
- }
-
- SBUF_TESTING("sbuf_bcopyin")
- {
- SBUF_SHOULD("succeed when len is zero")
- {
- struct sbuf *s = NULL;
- const void *uptr = (const void *)req->newptr;
-
- s = sbuf_new(NULL, NULL, 16, 0);
- SBUF_ASSERT_EQ(0, sbuf_bcopyin(s, uptr, 0));
- SBUF_ASSERT_EQ(0, s->s_len);
-
- sbuf_delete(s);
- }
-
- SBUF_SHOULD("succeed in the simple case")
- {
- struct sbuf *s = NULL;
- const void *uptr = (const void *)req->newptr;
- size_t ulen = req->newlen;
-
- s = sbuf_new(NULL, NULL, 16, 0);
- SBUF_ASSERT_EQ(0, sbuf_bcopyin(s, uptr, ulen));
- SBUF_ASSERT_EQ(ulen, (size_t)s->s_len);
-
- sbuf_delete(s);
- }
-
- SBUF_SHOULD("fail for invalid userland addresses")
- {
- struct sbuf *s = NULL;
- const void *uptr = (const void *)0xdeadUL;
- size_t ulen = req->newlen;
-
- s = sbuf_new(NULL, NULL, 16, 0);
- SBUF_ASSERT_EQ(-1, sbuf_bcopyin(s, uptr, ulen));
- SBUF_ASSERT_EQ(0, s->s_len);
-
- sbuf_delete(s);
- }
-
- SBUF_SHOULD("fail for kernel addresses")
- {
- struct sbuf *s = NULL;
- const void *uptr = "abcd";
- size_t ulen = 4;
-
- s = sbuf_new(NULL, NULL, 16, 0);
- SBUF_ASSERT_EQ(-1, sbuf_bcopyin(s, uptr, ulen));
- SBUF_ASSERT_EQ(0, s->s_len);
-
- sbuf_delete(s);
- }
-
- SBUF_SHOULD("fail if we don't have capacity for a fixed-len sbuf")
- {
- struct sbuf *s = NULL;
- const void *uptr = (const void *)req->newptr;
- size_t ulen = req->newlen;
- int len_before;
-
- s = sbuf_new(NULL, NULL, 16, SBUF_FIXEDLEN);
- SBUF_ASSERT_EQ(0, sbuf_cpy(s, "0123456789abcde"));
- len_before = s->s_len;
- SBUF_ASSERT_EQ(-1, sbuf_bcopyin(s, uptr, ulen));
- SBUF_ASSERT_EQ(len_before, s->s_len);
- SBUF_ASSERT(SBUF_ISSET(s, SBUF_OVERFLOWED));
-
- sbuf_delete(s);
- }
-
- SBUF_SHOULD("auto-extend if we don't have capacity for an auto-extend sbuf")
- {
- struct sbuf *s = NULL;
- const void *uptr = (const void *)req->newptr;
- size_t ulen = req->newlen;
- int len_before;
-
- s = sbuf_new(NULL, NULL, 16, SBUF_AUTOEXTEND);
- SBUF_ASSERT_EQ(0, sbuf_cpy(s, "0123456789abcde"));
- len_before = s->s_len;
- SBUF_ASSERT_EQ(0, sbuf_bcopyin(s, uptr, ulen));
- SBUF_ASSERT_EQ(len_before + (int)ulen, s->s_len);
- SBUF_ASSERT_NOT(SBUF_ISSET(s, SBUF_OVERFLOWED));
-
- sbuf_delete(s);
- }
-
- SBUF_SHOULD("fail if overflowed")
- {
- struct sbuf *s = NULL;
- const void *uptr = (const void *)req->newptr;
- size_t ulen = req->newlen;
-
- s = sbuf_new(NULL, NULL, 16, 0);
- SBUF_SETFLAG(s, SBUF_OVERFLOWED);
- SBUF_ASSERT_EQ(-1, sbuf_bcopyin(s, uptr, ulen));
-
- sbuf_delete(s);
- }
- }
-
- SBUF_TESTING("sbuf_copyin")
- {
- SBUF_SHOULD("succeed in the simple case")
- {
- struct sbuf *s = NULL;
-
- s = sbuf_new(NULL, NULL, 16, SBUF_AUTOEXTEND);
- SBUF_ASSERT_EQ(req->newlen + 1, sbuf_copyin(s, (const void *)req->newptr, req->newlen));
- SBUF_ASSERT_EQ(req->newlen, s->s_len);
-
- sbuf_delete(s);
- }
-
- SBUF_SHOULD("use the sbuf capacity if len is zero")
- {
- struct sbuf *s = NULL;
-
- s = sbuf_new(NULL, NULL, 16, SBUF_AUTOEXTEND);
- SBUF_ASSERT_EQ(req->newlen + 1, sbuf_copyin(s, (const void *)req->newptr, 0));
- SBUF_ASSERT_EQ(req->newlen, s->s_len);
-
- sbuf_delete(s);
- }
-
- SBUF_SHOULD("fail if we can't extend the sbuf to accommodate")
- {
- struct sbuf *s = NULL;
-
- s = sbuf_new(NULL, NULL, 16, SBUF_FIXEDLEN);
- SBUF_ASSERT_EQ(0, sbuf_cpy(s, "0123456789abcde"));
- SBUF_ASSERT_EQ(-1, sbuf_copyin(s, (const void *)req->newptr, req->newlen));
-
- sbuf_delete(s);
- }
-
- SBUF_SHOULD("auto-extend the buffer if necessary")
- {
- struct sbuf *s = NULL;
- int len_before;
-
- s = sbuf_new(NULL, NULL, 16, SBUF_AUTOEXTEND);
- SBUF_ASSERT_EQ(0, sbuf_cpy(s, "0123456789abcde"));
- len_before = s->s_len;
- SBUF_ASSERT_NE(-1, sbuf_copyin(s, (const void *)req->newptr, req->newlen));
- SBUF_ASSERT_GT(len_before, s->s_len);
-
- sbuf_delete(s);
- }
-
- SBUF_SHOULD("fail if the sbuf is overflowed")
- {
- struct sbuf *s = NULL;
-
- s = sbuf_new(NULL, NULL, 16, SBUF_AUTOEXTEND);
- SBUF_SETFLAG(s, SBUF_OVERFLOWED);
- SBUF_ASSERT_EQ(-1, sbuf_copyin(s, (const void *)req->newptr, req->newlen));
-
- sbuf_delete(s);
- }
-
- SBUF_SHOULD("fail gracefully for an invalid address")
- {
- struct sbuf *s = NULL;
-
- s = sbuf_new(NULL, NULL, 16, SBUF_AUTOEXTEND);
- SBUF_ASSERT_EQ(-1, sbuf_copyin(s, (void *)0xdeadUL, req->newlen));
-
- sbuf_delete(s);
- }
-
- SBUF_SHOULD("fail gracefully for a kernel address")
- {
- struct sbuf *s = NULL;
- const char *ptr = "abcd";
-
- s = sbuf_new(NULL, NULL, 16, SBUF_AUTOEXTEND);
- SBUF_ASSERT_EQ(-1, sbuf_copyin(s, ptr, strlen(ptr)));
-
- sbuf_delete(s);
- }
- }
-
SBUF_TEST_END;
}
/* for entitlement check */
#include <IOKit/IOBSD.h>
+/*
+ * If you need accounting for KM_SELECT consider using
+ * KALLOC_HEAP_DEFINE to define a view.
+ */
+#define KM_SELECT KHEAP_DEFAULT
/* XXX should be in a header file somewhere */
extern kern_return_t IOBSDGetPlatformUUID(__darwin_uuid_t uuid, mach_timespec_t timeoutp);
return err;
}
+void
+select_cleanup_uthread(struct _select *sel)
+{
+ kheap_free(KHEAP_DATA_BUFFERS, sel->ibits, 2 * sel->nbytes);
+ sel->ibits = sel->obits = NULL;
+ sel->nbytes = 0;
+}
+
+static int
+select_grow_uthread_cache(struct _select *sel, uint32_t nbytes)
+{
+ uint32_t *buf;
+
+ buf = kheap_alloc(KHEAP_DATA_BUFFERS, 2 * nbytes, Z_WAITOK | Z_ZERO);
+ if (buf) {
+ select_cleanup_uthread(sel);
+ sel->ibits = buf;
+ sel->obits = buf + nbytes / sizeof(uint32_t);
+ sel->nbytes = nbytes;
+ return true;
+ }
+ return false;
+}
+
+static void
+select_bzero_uthread_cache(struct _select *sel)
+{
+ bzero(sel->ibits, sel->nbytes * 2);
+}
+
/*
* Generic implementation of {,p}select. Care: we type-pun uap across the two
* syscalls, which differ slightly. The first 4 arguments (nfds and the fd sets)
struct uthread *uth;
struct _select *sel;
struct _select_data *seldata;
- int needzerofill = 1;
int count = 0;
size_t sz = 0;
* it is not a POSIX compliant error code for select().
*/
if (sel->nbytes < (3 * ni)) {
- int nbytes = 3 * ni;
-
- /* Free previous allocation, if any */
- if (sel->ibits != NULL) {
- FREE(sel->ibits, M_TEMP);
- }
- if (sel->obits != NULL) {
- FREE(sel->obits, M_TEMP);
- /* NULL out; subsequent ibits allocation may fail */
- sel->obits = NULL;
- }
-
- MALLOC(sel->ibits, u_int32_t *, nbytes, M_TEMP, M_WAITOK | M_ZERO);
- if (sel->ibits == NULL) {
- return EAGAIN;
- }
- MALLOC(sel->obits, u_int32_t *, nbytes, M_TEMP, M_WAITOK | M_ZERO);
- if (sel->obits == NULL) {
- FREE(sel->ibits, M_TEMP);
- sel->ibits = NULL;
+ if (!select_grow_uthread_cache(sel, 3 * ni)) {
return EAGAIN;
}
- sel->nbytes = nbytes;
- needzerofill = 0;
- }
-
- if (needzerofill) {
- bzero((caddr_t)sel->ibits, sel->nbytes);
- bzero((caddr_t)sel->obits, sel->nbytes);
+ } else {
+ select_bzero_uthread_cache(sel);
}
/*
if (waitq_set_is_valid(uth->uu_wqset)) {
waitq_set_deinit(uth->uu_wqset);
}
- FREE(uth->uu_wqset, M_SELECT);
+ kheap_free(KM_SELECT, uth->uu_wqset, uth->uu_wqstate_sz);
} else if (uth->uu_wqstate_sz && !uth->uu_wqset) {
panic("select: thread structure corrupt! "
"uu_wqstate_sz:%ld, wqstate_buf == NULL",
uth->uu_wqstate_sz);
}
uth->uu_wqstate_sz = sz;
- MALLOC(uth->uu_wqset, struct waitq_set *, sz, M_SELECT, M_WAITOK);
+ uth->uu_wqset = kheap_alloc(KM_SELECT, sz, Z_WAITOK);
if (!uth->uu_wqset) {
panic("can't allocate %ld bytes for wqstate buffer",
uth->uu_wqstate_sz);
u_int nfds = uap->nfds;
u_int rfds = 0;
rlim_t nofile = proc_limitgetcur(p, RLIMIT_NOFILE, TRUE);
+ size_t ni = nfds * sizeof(struct pollfd);
/*
* This is kinda bogus. We have fd limits, but that is not
}
if (nfds) {
- size_t ni = nfds * sizeof(struct pollfd);
- MALLOC(fds, struct pollfd *, ni, M_TEMP, M_WAITOK);
+ fds = kheap_alloc(KHEAP_TEMP, ni, Z_WAITOK);
if (NULL == fds) {
error = EAGAIN;
goto out;
}
out:
- if (NULL != fds) {
- FREE(fds, M_TEMP);
- }
+ kheap_free(KHEAP_TEMP, fds, ni);
kqueue_dealloc(kq);
return error;
#endif /* CONFIG_MACH_BRIDGE_RECV_TIME */
#if DEVELOPMENT || DEBUG
-#if __AMP__
+
#include <pexpert/pexpert.h>
extern int32_t sysctl_get_bound_cpuid(void);
-extern void sysctl_thread_bind_cpuid(int32_t cpuid);
+extern kern_return_t sysctl_thread_bind_cpuid(int32_t cpuid);
static int
sysctl_kern_sched_thread_bind_cpu SYSCTL_HANDLER_ARGS
{
#pragma unused(oidp, arg1, arg2)
+ /*
+ * DO NOT remove this bootarg guard or make this non-development.
+ * This kind of binding should only be used for tests and
+ * experiments in a custom configuration, never shipping code.
+ */
+
if (!PE_parse_boot_argn("enable_skstb", NULL, 0)) {
return ENOENT;
}
}
if (changed) {
- sysctl_thread_bind_cpuid(new_value);
+ kern_return_t kr = sysctl_thread_bind_cpuid(new_value);
+
+ if (kr == KERN_NOT_SUPPORTED) {
+ return ENOTSUP;
+ }
+
+ if (kr == KERN_INVALID_VALUE) {
+ return ERANGE;
+ }
}
return error;
SYSCTL_PROC(_kern, OID_AUTO, sched_thread_bind_cpu, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
0, 0, sysctl_kern_sched_thread_bind_cpu, "I", "");
+#if __AMP__
extern char sysctl_get_bound_cluster_type(void);
extern void sysctl_thread_bind_cluster_type(char cluster_type);
static int
#endif /* CONFIG_SCHED_EDGE */
#endif /* __AMP__ */
+
+/* used for testing by exception_tests */
+extern uint32_t ipc_control_port_options;
+SYSCTL_INT(_kern, OID_AUTO, ipc_control_port_options,
+ CTLFLAG_RD | CTLFLAG_LOCKED, &ipc_control_port_options, 0, "");
+
#endif /* DEVELOPMENT || DEBUG */
extern uint32_t task_exc_guard_default;
login = kinfo.persona_name[0] ? kinfo.persona_name : NULL;
if (u_idlen > 0) {
- MALLOC(persona, struct persona **, sizeof(*persona) * u_idlen,
- M_TEMP, M_WAITOK | M_ZERO);
+ persona = kheap_alloc(KHEAP_TEMP, sizeof(*persona) * u_idlen,
+ Z_WAITOK | Z_ZERO);
if (!persona) {
error = ENOMEM;
goto out;
for (size_t i = 0; i < u_idlen; i++) {
persona_put(persona[i]);
}
- FREE(persona, M_TEMP);
+ kheap_free(KHEAP_TEMP, persona, sizeof(*persona) * u_idlen);
}
(void)copyout(&k_idlen, idlenp, sizeof(u_idlen));
#include <kern/assert.h>
#include <kern/debug.h>
-#if OS_REASON_DEBUG
-#include <pexpert/pexpert.h>
-
-extern int os_reason_debug_disabled;
-#endif
-
extern int maxproc;
/*
os_refgrp_decl(static, os_reason_refgrp, "os_reason", NULL);
-#define OS_REASON_RESERVE_COUNT 100
-
static int os_reason_alloc_buffer_internal(os_reason_t cur_reason, uint32_t osr_bufsize,
zalloc_flags_t flags);
-void
-os_reason_init(void)
-{
- int reasons_allocated = 0;
-
- /*
- * We pre-fill the OS reason zone to reduce the likelihood that
- * the jetsam thread and others block when they create an exit
- * reason.
- */
- reasons_allocated = zfill(os_reason_zone, OS_REASON_RESERVE_COUNT);
- assert(reasons_allocated >= OS_REASON_RESERVE_COUNT);
-}
-
/*
* Creates a new reason and initializes it with the provided reason
* namespace and code. Also sets up the buffer and kcdata_descriptor
536 AUE_NULL ALL { int shared_region_map_and_slide_2_np(uint32_t files_count, const struct shared_file_np *files, uint32_t mappings_count, const struct shared_file_mapping_slide_np *mappings) NO_SYSCALL_STUB; }
537 AUE_NULL ALL { int pivot_root(const char *new_rootfs_path_before, const char *old_rootfs_path_after); }
538 AUE_TASKINSPECTFORPID ALL { int task_inspect_for_pid(mach_port_name_t target_tport, int pid, mach_port_name_t *t); }
-539 AUE_TASKINSPECTFORPID ALL { int task_read_for_pid(mach_port_name_t target_tport, int pid, mach_port_name_t *t); }
+539 AUE_TASKREADFORPID ALL { int task_read_for_pid(mach_port_name_t target_tport, int pid, mach_port_name_t *t); }
540 AUE_PREADV ALL { user_ssize_t sys_preadv(int fd, struct iovec *iovp, int iovcnt, off_t offset); }
541 AUE_PWRITEV ALL { user_ssize_t sys_pwritev(int fd, struct iovec *iovp, int iovcnt, off_t offset); }
542 AUE_PREADV ALL { user_ssize_t sys_preadv_nocancel(int fd, struct iovec *iovp, int iovcnt, off_t offset) NO_SYSCALL_STUB; }
struct msg *msghdrs; /* MSGTQL msg headers */
struct msqid_kernel *msqids; /* MSGMNI msqid_kernel structs (wrapping user_msqid_ds structs) */
-static lck_grp_t *sysv_msg_subsys_lck_grp;
-static lck_grp_attr_t *sysv_msg_subsys_lck_grp_attr;
-static lck_attr_t *sysv_msg_subsys_lck_attr;
-static lck_mtx_t sysv_msg_subsys_mutex;
+static LCK_GRP_DECLARE(sysv_msg_subsys_lck_grp, "sysv_msg_subsys_lock");
+static LCK_MTX_DECLARE(sysv_msg_subsys_mutex, &sysv_msg_subsys_lck_grp);
#define SYSV_MSG_SUBSYS_LOCK() lck_mtx_lock(&sysv_msg_subsys_mutex)
#define SYSV_MSG_SUBSYS_UNLOCK() lck_mtx_unlock(&sysv_msg_subsys_mutex)
-void sysv_msg_lock_init(void);
-
-
#ifdef __APPLE_API_PRIVATE
int msgmax, /* max chars in a message */
msgmni, /* max message queue identifiers */
};
#endif /* __APPLE_API_PRIVATE */
-/* Initialize the mutex governing access to the SysV msg subsystem */
-__private_extern__ void
-sysv_msg_lock_init( void )
-{
- sysv_msg_subsys_lck_grp_attr = lck_grp_attr_alloc_init();
-
- sysv_msg_subsys_lck_grp = lck_grp_alloc_init("sysv_msg_subsys_lock", sysv_msg_subsys_lck_grp_attr);
-
- sysv_msg_subsys_lck_attr = lck_attr_alloc_init();
- lck_mtx_init(&sysv_msg_subsys_mutex, sysv_msg_subsys_lck_grp, sysv_msg_subsys_lck_attr);
-}
-
static __inline__ user_time_t
sysv_msgtime(void)
{
* if this fails, fail safely and leave it uninitialized (related
* system calls will fail).
*/
- msgpool = (char *)_MALLOC(msginfo.msgmax, M_SHM, M_WAITOK);
+ msgpool = kheap_alloc(KHEAP_DATA_BUFFERS, msginfo.msgmax, Z_WAITOK);
if (msgpool == NULL) {
printf("msginit: can't allocate msgpool");
goto bad;
}
- MALLOC(msgmaps, struct msgmap *,
- sizeof(struct msgmap) * msginfo.msgseg,
- M_SHM, M_WAITOK);
+ msgmaps = kheap_alloc(KM_SHM, sizeof(struct msgmap) * msginfo.msgseg,
+ Z_WAITOK);
if (msgmaps == NULL) {
printf("msginit: can't allocate msgmaps");
goto bad;
}
- MALLOC(msghdrs, struct msg *,
- sizeof(struct msg) * msginfo.msgtql,
- M_SHM, M_WAITOK);
+ msghdrs = kheap_alloc(KM_SHM, sizeof(struct msg) * msginfo.msgtql,
+ Z_WAITOK);
if (msghdrs == NULL) {
printf("msginit: can't allocate msghdrs");
goto bad;
}
- MALLOC(msqids, struct msqid_kernel *,
- sizeof(struct msqid_kernel) * msginfo.msgmni,
- M_SHM, M_WAITOK);
+ msqids = kheap_alloc(KM_SHM,
+ sizeof(struct msqid_kernel) * msginfo.msgmni, Z_WAITOK);
if (msqids == NULL) {
printf("msginit: can't allocate msqids");
goto bad;
initted = 1;
bad:
if (!initted) {
- if (msgpool != NULL) {
- _FREE(msgpool, M_SHM);
- }
- if (msgmaps != NULL) {
- FREE(msgmaps, M_SHM);
- }
- if (msghdrs != NULL) {
- FREE(msghdrs, M_SHM);
- }
- if (msqids != NULL) {
- FREE(msqids, M_SHM);
- }
+ kheap_free(KHEAP_DATA_BUFFERS, msgpool,
+ sizeof(struct msgmap) * msginfo.msgseg);
+ kheap_free(KM_SHM, msgmaps,
+ sizeof(struct msgmap) * msginfo.msgseg);
+ kheap_free(KM_SHM, msghdrs,
+ sizeof(struct msg) * msginfo.msgtql);
+ kheap_free(KM_SHM, msqids,
+ sizeof(struct msqid_kernel) * msginfo.msgmni);
}
return initted;
}
for (len = 0; len < msgsz; len += msginfo.msgssz) {
size_t tlen;
- /* compare input (size_t) value against restrict (int) value */
- if (msgsz > (size_t)msginfo.msgssz) {
- tlen = msginfo.msgssz;
- } else {
- tlen = msgsz;
- }
+ /*
+ * copy the full segment, or less if we're at the end
+ * of the message
+ */
+ tlen = MIN(msgsz - len, (size_t)msginfo.msgssz);
if (next <= -1) {
panic("next too low #3");
}
#define MPRINTF(a)
#endif
-#define M_SYSVSEM M_TEMP
+#define KM_SYSVSEM KHEAP_DEFAULT
/* Hard system limits to avoid resource starvation / DOS attacks.
struct sem_undo *semu = NULL; /* semaphore undo pool */
-void sysv_sem_lock_init(void);
-static lck_grp_t *sysv_sem_subsys_lck_grp;
-static lck_grp_attr_t *sysv_sem_subsys_lck_grp_attr;
-static lck_attr_t *sysv_sem_subsys_lck_attr;
-static lck_mtx_t sysv_sem_subsys_mutex;
+static LCK_GRP_DECLARE(sysv_sem_subsys_lck_grp, "sysv_sem_subsys_lock");
+static LCK_MTX_DECLARE(sysv_sem_subsys_mutex, &sysv_sem_subsys_lck_grp);
#define SYSV_SEM_SUBSYS_LOCK() lck_mtx_lock(&sysv_sem_subsys_mutex)
#define SYSV_SEM_SUBSYS_UNLOCK() lck_mtx_unlock(&sysv_sem_subsys_mutex)
-
-__private_extern__ void
-sysv_sem_lock_init( void )
-{
- sysv_sem_subsys_lck_grp_attr = lck_grp_attr_alloc_init();
-
- sysv_sem_subsys_lck_grp = lck_grp_alloc_init("sysv_sem_subsys_lock", sysv_sem_subsys_lck_grp_attr);
-
- sysv_sem_subsys_lck_attr = lck_attr_alloc_init();
- lck_mtx_init(&sysv_sem_subsys_mutex, sysv_sem_subsys_lck_grp, sysv_sem_subsys_lck_attr);
-}
-
static __inline__ user_time_t
sysv_semtime(void)
{
#ifdef SEM_DEBUG
printf("growing semu[] from %d to %d\n", seminfo.semmnu, newSize);
#endif
- MALLOC(newSemu, struct sem_undo *, sizeof(struct sem_undo) * newSize,
- M_SYSVSEM, M_WAITOK | M_ZERO);
+ newSemu = kheap_alloc(KM_SYSVSEM, sizeof(struct sem_undo) * newSize,
+ Z_WAITOK | Z_ZERO);
if (NULL == newSemu) {
#ifdef SEM_DEBUG
printf("allocation failed. no changes made.\n");
}
/*
* The new elements (from newSemu[i] to newSemu[newSize-1]) have their
- * "un_proc" set to 0 (i.e. NULL) by the M_ZERO flag to MALLOC() above,
- * so they're already marked as "not in use".
+ * "un_proc" set to 0 (i.e. NULL) by the Z_ZERO flag to kheap_alloc
+ * above, so they're already marked as "not in use".
*/
/* Clean up the old array */
- if (semu) {
- FREE(semu, M_SYSVSEM);
- }
+ kheap_free(KM_SYSVSEM, semu, sizeof(struct sem_undo) * seminfo.semmnu);
semu = newSemu;
seminfo.semmnu = newSize;
#ifdef SEM_DEBUG
printf("growing sema[] from %d to %d\n", seminfo.semmni, newSize);
#endif
- MALLOC(newSema, struct semid_kernel *,
- sizeof(struct semid_kernel) * newSize,
- M_SYSVSEM, M_WAITOK | M_ZERO);
+ newSema = kheap_alloc(KM_SYSVSEM, sizeof(struct semid_kernel) * newSize,
+ Z_WAITOK | Z_ZERO);
if (NULL == newSema) {
#ifdef SEM_DEBUG
printf("allocation failed. no changes made.\n");
/*
* The new elements (from newSema[i] to newSema[newSize-1]) have their
- * "sem_base" and "sem_perm.mode" set to 0 (i.e. NULL) by the M_ZERO
- * flag to MALLOC() above, so they're already marked as "not in use".
+ * "sem_base" and "sem_perm.mode" set to 0 (i.e. NULL) by the Z_ZERO
+ * flag to kheap_alloc above, so they're already marked as "not in use".
*/
/* Clean up the old array */
- if (sema) {
- FREE(sema, M_SYSVSEM);
- }
+ kheap_free(KM_SYSVSEM, sema,
+ sizeof(struct semid_kernel) * seminfo.semmni);
sema = newSema;
seminfo.semmni = newSize;
#ifdef SEM_DEBUG
printf("growing sem_pool array from %d to %d\n", seminfo.semmns, new_pool_size);
#endif
- MALLOC(new_sem_pool, struct sem *, sizeof(struct sem) * new_pool_size,
- M_SYSVSEM, M_WAITOK | M_ZERO | M_NULL);
+ new_sem_pool = kheap_alloc(KM_SYSVSEM, sizeof(struct sem) * new_pool_size,
+ Z_WAITOK | Z_ZERO);
if (NULL == new_sem_pool) {
#ifdef SEM_DEBUG
printf("allocation failed. no changes made.\n");
sem_pool = new_sem_pool;
/* clean up the old array */
- if (sem_free != NULL) {
- FREE(sem_free, M_SYSVSEM);
- }
+ kheap_free(KM_SYSVSEM, sem_free, sizeof(struct sem) * seminfo.semmns);
seminfo.semmns = new_pool_size;
#ifdef SEM_DEBUG
if (sueptr->une_adjval == 0) {
suptr->un_cnt--;
*suepptr = sueptr->une_next;
- FREE(sueptr, M_SYSVSEM);
- sueptr = NULL;
+ kheap_free(KM_SYSVSEM, sueptr, sizeof(struct undo));
}
return 0;
}
}
/* allocate a new semaphore undo entry */
- MALLOC(new_sueptr, struct undo *, sizeof(struct undo),
- M_SYSVSEM, M_WAITOK);
+ new_sueptr = kheap_alloc(KM_SYSVSEM, sizeof(struct undo), Z_WAITOK);
if (new_sueptr == NULL) {
return ENOMEM;
}
if (semnum == -1 || sueptr->une_num == semnum) {
suptr->un_cnt--;
*suepptr = sueptr->une_next;
- FREE(sueptr, M_SYSVSEM);
+ kheap_free(KM_SYSVSEM, sueptr, sizeof(struct undo));
sueptr = *suepptr;
continue;
}
#endif
suptr->un_cnt--;
suptr->un_ent = sueptr->une_next;
- FREE(sueptr, M_SYSVSEM);
- sueptr = NULL;
+ kheap_free(KM_SYSVSEM, sueptr, sizeof(struct undo));
}
}
#if SYSV_SHM
static int shminit(void);
-static lck_grp_t *sysv_shm_subsys_lck_grp;
-static lck_grp_attr_t *sysv_shm_subsys_lck_grp_attr;
-static lck_attr_t *sysv_shm_subsys_lck_attr;
-static lck_mtx_t sysv_shm_subsys_mutex;
+static LCK_GRP_DECLARE(sysv_shm_subsys_lck_grp, "sysv_shm_subsys_lock");
+static LCK_MTX_DECLARE(sysv_shm_subsys_mutex, &sysv_shm_subsys_lck_grp);
#define SYSV_SHM_SUBSYS_LOCK() lck_mtx_lock(&sysv_shm_subsys_mutex)
#define SYSV_SHM_SUBSYS_UNLOCK() lck_mtx_unlock(&sysv_shm_subsys_mutex)
#endif /* __APPLE_API_PRIVATE */
-void sysv_shm_lock_init(void);
-
static __inline__ time_t
sysv_shmtime(void)
{
shm_handle = shm_handle_next) {
shm_handle_next = shm_handle->shm_handle_next;
mach_memory_entry_port_release(shm_handle->shm_object);
- FREE(shm_handle, M_SHM);
+ kheap_free(KM_SHM, shm_handle, sizeof(struct shm_handle));
}
shmseg->u.shm_internal = USER_ADDR_NULL; /* tunnel */
size = vm_map_round_page(shmseg->u.shm_segsz,
goto shmat_out;
}
- MALLOC(shmmap_s, struct shmmap_state *, size, M_SHM, M_WAITOK | M_NULL);
+ shmmap_s = kheap_alloc(KM_SHM, size, Z_WAITOK);
if (shmmap_s == NULL) {
shmat_ret = ENOMEM;
goto shmat_out;
goto out;
}
- MALLOC(shm_handle, struct shm_handle *, sizeof(struct shm_handle), M_SHM, M_WAITOK);
+ shm_handle = kheap_alloc(KM_SHM, sizeof(struct shm_handle), Z_WAITOK);
if (shm_handle == NULL) {
kret = KERN_NO_SPACE;
mach_memory_entry_port_release(mem_object);
shm_handle = shm_handle_next) {
shm_handle_next = shm_handle->shm_handle_next;
mach_memory_entry_port_release(shm_handle->shm_object);
- FREE(shm_handle, M_SHM);
+ kheap_free(KM_SHM, shm_handle, sizeof(struct shm_handle));
}
shmseg->u.shm_internal = USER_ADDR_NULL; /* tunnel */
}
ret = 1;
goto shmfork_out;
}
- MALLOC(shmmap_s, struct shmmap_state *, size, M_SHM, M_WAITOK);
+ shmmap_s = kheap_alloc(KM_SHM, size, Z_WAITOK);
if (shmmap_s == NULL) {
ret = 1;
goto shmfork_out;
shmcleanup(struct proc *p, int deallocate)
{
struct shmmap_state *shmmap_s;
+ size_t size = 0;
+ int nsegs = 0;
SYSV_SHM_SUBSYS_LOCK();
shmmap_s = (struct shmmap_state *)p->vm_shm;
for (; shmmap_s->shmid != SHMID_SENTINEL; shmmap_s++) {
+ nsegs++;
if (SHMID_IS_VALID(shmmap_s->shmid)) {
/*
* XXX: Should the MAC framework enforce
}
}
- FREE(p->vm_shm, M_SHM);
- p->vm_shm = NULL;
+ if (os_add_and_mul_overflow(nsegs, 1, sizeof(struct shmmap_state), &size)) {
+ panic("shmcleanup: p->vm_shm buffer was correupted\n");
+ }
+ kheap_free(KM_SHM, p->vm_shm, size);
SYSV_SHM_SUBSYS_UNLOCK();
}
return ENOMEM;
}
- MALLOC(shmsegs, struct shmid_kernel *, sz, M_SHM, M_WAITOK | M_ZERO);
+ shmsegs = zalloc_permanent(sz, ZALIGN_PTR);
if (shmsegs == NULL) {
return ENOMEM;
}
return 0;
}
-/* Initialize the mutex governing access to the SysV shm subsystem */
-__private_extern__ void
-sysv_shm_lock_init( void )
-{
- sysv_shm_subsys_lck_grp_attr = lck_grp_attr_alloc_init();
-
- sysv_shm_subsys_lck_grp = lck_grp_alloc_init("sysv_shm_subsys_lock", sysv_shm_subsys_lck_grp_attr);
-
- sysv_shm_subsys_lck_attr = lck_attr_alloc_init();
- lck_mtx_init(&sysv_shm_subsys_mutex, sysv_shm_subsys_lck_grp, sysv_shm_subsys_lck_attr);
-}
-
/* (struct sysctl_oid *oidp, void *arg1, int arg2, \
* struct sysctl_req *req) */
static int
0x14000E8 MACH_AMP_RECOMMENDATION_CHANGE
0x14000EC MACH_AMP_PERFCTL_POLICY_CHANGE
0x1400100 MACH_TURNSTILE_KERNEL_CHANGE
+0x140010C MACH_SET_RT_DEADLINE
+0x1400110 MACH_CANCEL_RT_DEADLINE
0x1400140 MACH_PSET_AVG_EXEC_TIME
0x1500000 MACH_MSGID_INVALID
0x1600000 MTX_SLEEP
0x313016C VFS_label_associate_fdesc
0x3130170 VFS_mount_check_snapshot_mount
0x3130174 VFS_check_supplemental_signature
+0X3134000 VFS_io_compression_stats
0x3CF0000 CP_OFFSET_IO
0x4010004 proc_exit
0x4010008 force_exit
0x01ab000c WORKGROUP_INTERVAL_START
0x01ab0010 WORKGROUP_INTERVAL_UPDATE
0x01ab0014 WORKGROUP_INTERVAL_FINISH
+0x01ac0000 HV_GUEST_ENTER
+0x01ac0004 HV_GUEST_ERROR
0x1e000000 SEC_ENTROPY_READ0
0x1e000004 SEC_ENTROPY_READ1
0x1e000008 SEC_ENTROPY_READ2
#include <kern/waitq.h>
#include <libkern/section_keywords.h>
-static lck_grp_t *tty_lck_grp;
-static lck_grp_attr_t *tty_lck_grp_attr;
-static lck_attr_t *tty_lck_attr;
+static LCK_GRP_DECLARE(tty_lck_grp, "tty");
__private_extern__ int ttnread(struct tty *tp);
static void ttyecho(int c, struct tty *tp);
}
-/*
- * tty_init
- *
- * Initialize the tty line discipline subsystem.
- *
- * Parameters: void
- *
- * Returns: void
- *
- * Locks: No ttys can be allocated and no tty locks can be used
- * until after this function is called
- *
- * Notes: The intent of this is to set up a log group attribute,
- * lock group, and loc atribute for subsequent per-tty locks.
- * This function is called early in bsd_init(), prior to the
- * console device initialization.
- */
-void
-tty_init(void)
-{
- tty_lck_grp_attr = lck_grp_attr_alloc_init();
- tty_lck_grp = lck_grp_alloc_init("tty", tty_lck_grp_attr);
- tty_lck_attr = lck_attr_alloc_init();
-}
-
-
/*
* tty_lock
*
{
struct tty *tp;
- MALLOC(tp, struct tty *, sizeof(struct tty), M_TTYS, M_WAITOK | M_ZERO);
+ tp = kheap_alloc(KM_TTYS, sizeof(struct tty), Z_WAITOK | Z_ZERO);
if (tp != NULL) {
/* XXX: default to TTYCLSIZE(1024) chars for now */
clalloc(&tp->t_rawq, TTYCLSIZE, 1);
clalloc(&tp->t_canq, TTYCLSIZE, 1);
/* output queue doesn't need quoting */
clalloc(&tp->t_outq, TTYCLSIZE, 0);
- lck_mtx_init(&tp->t_lock, tty_lck_grp, tty_lck_attr);
+ lck_mtx_init(&tp->t_lock, &tty_lck_grp, LCK_ATTR_NULL);
klist_init(&tp->t_rsel.si_note);
klist_init(&tp->t_wsel.si_note);
tp->t_refcnt = 1;
clfree(&tp->t_rawq);
clfree(&tp->t_canq);
clfree(&tp->t_outq);
- lck_mtx_destroy(&tp->t_lock, tty_lck_grp);
- FREE(tp, M_TTYS);
+ lck_mtx_destroy(&tp->t_lock, &tty_lck_grp);
+ kheap_free(KM_TTYS, tp, sizeof(struct tty));
}
}
DEVFS_UNLOCK();
- MALLOC(new_ptmx_ioctl, struct ptmx_ioctl *, sizeof(struct ptmx_ioctl), M_TTYS, M_WAITOK | M_ZERO);
+ new_ptmx_ioctl = kheap_alloc(KM_TTYS, sizeof(struct ptmx_ioctl),
+ Z_WAITOK | Z_ZERO);
if (new_ptmx_ioctl == NULL) {
return NULL;
}
if ((new_ptmx_ioctl->pt_tty = ttymalloc()) == NULL) {
- FREE(new_ptmx_ioctl, M_TTYS);
+ kheap_free(KM_TTYS, new_ptmx_ioctl, sizeof(struct ptmx_ioctl));
return NULL;
}
if ((_state.pis_total - _state.pis_free) >= ptmx_max) {
ttyfree(new_ptmx_ioctl->pt_tty);
DEVFS_UNLOCK();
- FREE(new_ptmx_ioctl, M_TTYS);
+ kheap_free(KM_TTYS, new_ptmx_ioctl, sizeof(struct ptmx_ioctl));
return NULL;
}
if (_state.pis_free == 0) {
struct ptmx_ioctl **new_pis_ioctl_list;
struct ptmx_ioctl **old_pis_ioctl_list = NULL;
+ size_t old_pis_total = 0;
/* Yes. */
- MALLOC(new_pis_ioctl_list, struct ptmx_ioctl **, sizeof(struct ptmx_ioctl *) * (_state.pis_total + PTMX_GROW_VECTOR), M_TTYS, M_WAITOK | M_ZERO);
+ new_pis_ioctl_list = kheap_alloc(KM_TTYS,
+ sizeof(struct ptmx_ioctl *) * (_state.pis_total + PTMX_GROW_VECTOR),
+ Z_WAITOK | Z_ZERO);
if (new_pis_ioctl_list == NULL) {
ttyfree(new_ptmx_ioctl->pt_tty);
DEVFS_UNLOCK();
- FREE(new_ptmx_ioctl, M_TTYS);
+ kheap_free(KM_TTYS, new_ptmx_ioctl, sizeof(struct ptmx_ioctl));
return NULL;
}
/* If this is not the first time, copy the old over */
bcopy(_state.pis_ioctl_list, new_pis_ioctl_list, sizeof(struct ptmx_ioctl *) * _state.pis_total);
old_pis_ioctl_list = _state.pis_ioctl_list;
+ old_pis_total = _state.pis_total;
_state.pis_ioctl_list = new_pis_ioctl_list;
_state.pis_free += PTMX_GROW_VECTOR;
_state.pis_total += PTMX_GROW_VECTOR;
- if (old_pis_ioctl_list) {
- FREE(old_pis_ioctl_list, M_TTYS);
- }
+ kheap_free(KM_TTYS, old_pis_ioctl_list,
+ sizeof(struct ptmx_ioctl *) * old_pis_total);
}
/* is minor in range now? */
if (minor < 0 || minor >= _state.pis_total) {
ttyfree(new_ptmx_ioctl->pt_tty);
DEVFS_UNLOCK();
- FREE(new_ptmx_ioctl, M_TTYS);
+ kheap_free(KM_TTYS, new_ptmx_ioctl, sizeof(struct ptmx_ioctl));
return NULL;
}
if (_state.pis_ioctl_list[minor] != NULL) {
ttyfree(new_ptmx_ioctl->pt_tty);
DEVFS_UNLOCK();
- FREE(new_ptmx_ioctl, M_TTYS);
+ kheap_free(KM_TTYS, new_ptmx_ioctl, sizeof(struct ptmx_ioctl));
/* Special error value so we know to redrive the open, we've been raced */
return (struct ptmx_ioctl*)-1;
devfs_remove(old_ptmx_ioctl->pt_devhandle);
}
ttyfree(old_ptmx_ioctl->pt_tty);
- FREE(old_ptmx_ioctl, M_TTYS);
+ kheap_free(KM_TTYS, old_ptmx_ioctl, sizeof(struct ptmx_ioctl));
}
return 0; /* Success */
}
csblob->csb_hashtype->cs_init(&context);
+ ptrauth_utils_auth_blob_generic(entitlements,
+ ntohl(entitlements->length),
+ OS_PTRAUTH_DISCRIMINATOR("cs_blob.csb_entitlements_blob_signature"),
+ PTRAUTH_ADDR_DIVERSIFY,
+ csblob->csb_entitlements_blob_signature);
csblob->csb_hashtype->cs_update(&context, entitlements, ntohl(entitlements->length));
csblob->csb_hashtype->cs_final(computed_hash, &context);
if (blob->csb_entitlements_blob) {
/* We need to add a slot for the entitlements */
+ ptrauth_utils_auth_blob_generic(blob->csb_entitlements_blob,
+ ntohl(blob->csb_entitlements_blob->length),
+ OS_PTRAUTH_DISCRIMINATOR("cs_blob.csb_entitlements_blob_signature"),
+ PTRAUTH_ADDR_DIVERSIFY,
+ blob->csb_entitlements_blob_signature);
+
new_blob_size += sizeof(CS_BlobIndex);
new_blob_size += ntohl(blob->csb_entitlements_blob->length);
}
new_superblob->index[1].type = htonl(CSSLOT_ENTITLEMENTS);
new_superblob->index[1].offset = htonl((uint32_t)ent_offset);
+ ptrauth_utils_auth_blob_generic(blob->csb_entitlements_blob,
+ ntohl(blob->csb_entitlements_blob->length),
+ OS_PTRAUTH_DISCRIMINATOR("cs_blob.csb_entitlements_blob_signature"),
+ PTRAUTH_ADDR_DIVERSIFY,
+ blob->csb_entitlements_blob_signature);
+
memcpy((void *)(new_blob_addr + ent_offset), blob->csb_entitlements_blob, ntohl(blob->csb_entitlements_blob->length));
new_cd = (CS_CodeDirectory *)(new_blob_addr + cd_offset);
}
/* New Code Directory is ready for use, swap it out in the blob structure */
- ubc_cs_blob_deallocate(blob->csb_mem_kaddr, blob->csb_mem_size);
+ ubc_cs_blob_deallocate((vm_offset_t)blob->csb_mem_kaddr, blob->csb_mem_size);
blob->csb_mem_size = new_blob_size;
- blob->csb_mem_kaddr = new_blob_addr;
+ blob->csb_mem_kaddr = (void *)new_blob_addr;
blob->csb_cd = cd;
blob->csb_entitlements_blob = entitlements;
+ if (blob->csb_entitlements_blob != NULL) {
+ blob->csb_entitlements_blob_signature = ptrauth_utils_sign_blob_generic(blob->csb_entitlements_blob,
+ ntohl(blob->csb_entitlements_blob->length),
+ OS_PTRAUTH_DISCRIMINATOR("cs_blob.csb_entitlements_blob_signature"),
+ PTRAUTH_ADDR_DIVERSIFY);
+ }
/* The blob has some cached attributes of the Code Directory, so update those */
/* fill in the new blob */
blob->csb_mem_size = size;
blob->csb_mem_offset = 0;
- blob->csb_mem_kaddr = *addr;
+ blob->csb_mem_kaddr = (void *)*addr;
blob->csb_flags = 0;
blob->csb_signer_type = CS_SIGNER_TYPE_UNKNOWN;
blob->csb_platform_binary = 0;
blob->csb_cd = cd;
blob->csb_entitlements_blob = entitlements; /* may be NULL, not yet validated */
+ if (blob->csb_entitlements_blob != NULL) {
+ blob->csb_entitlements_blob_signature = ptrauth_utils_sign_blob_generic(blob->csb_entitlements_blob,
+ ntohl(blob->csb_entitlements_blob->length),
+ OS_PTRAUTH_DISCRIMINATOR("cs_blob.csb_entitlements_blob_signature"),
+ PTRAUTH_ADDR_DIVERSIFY);
+ }
blob->csb_hashtype = cs_find_md(cd->hashType);
if (blob->csb_hashtype == NULL || blob->csb_hashtype->cs_digest_size > sizeof(hash)) {
panic("validated CodeDirectory but unsupported type");
{
if (blob != NULL) {
if (blob->csb_mem_kaddr) {
- ubc_cs_blob_deallocate(blob->csb_mem_kaddr, blob->csb_mem_size);
- blob->csb_mem_kaddr = 0;
+ ubc_cs_blob_deallocate((vm_offset_t)blob->csb_mem_kaddr, blob->csb_mem_size);
+ blob->csb_mem_kaddr = NULL;
}
if (blob->csb_entitlements != NULL) {
osobject_release(blob->csb_entitlements);
goto out;
}
- ubc_cs_blob_deallocate(blob->csb_mem_kaddr, blob->csb_mem_size);
+ ubc_cs_blob_deallocate((vm_offset_t)blob->csb_mem_kaddr, blob->csb_mem_size);
- blob->csb_mem_kaddr = new_mem_kaddr;
+ blob->csb_mem_kaddr = (void *)new_mem_kaddr;
blob->csb_mem_size = new_mem_size;
blob->csb_cd = new_cd;
blob->csb_entitlements_blob = new_entitlements;
+ if (blob->csb_entitlements_blob != NULL) {
+ blob->csb_entitlements_blob_signature = ptrauth_utils_sign_blob_generic(blob->csb_entitlements_blob,
+ ntohl(blob->csb_entitlements_blob->length),
+ OS_PTRAUTH_DISCRIMINATOR("cs_blob.csb_entitlements_blob_signature"),
+ PTRAUTH_ADDR_DIVERSIFY);
+ }
blob->csb_reconstituted = true;
}
#endif
}
/* blob data has been released */
- kaddr = blob->csb_mem_kaddr;
+ kaddr = (vm_offset_t)blob->csb_mem_kaddr;
if (kaddr == 0) {
continue;
}
static void domain_sched_timeout(void);
static void domain_timeout(void *);
-lck_grp_t *domain_proto_mtx_grp;
-lck_attr_t *domain_proto_mtx_attr;
-static lck_grp_attr_t *domain_proto_mtx_grp_attr;
-decl_lck_mtx_data(static, domain_proto_mtx);
-decl_lck_mtx_data(static, domain_timeout_mtx);
+static LCK_GRP_DECLARE(domain_proto_mtx_grp, "domain");
+static LCK_ATTR_DECLARE(domain_proto_mtx_attr, 0, 0);
+static LCK_MTX_DECLARE_ATTR(domain_proto_mtx,
+ &domain_proto_mtx_grp, &domain_proto_mtx_attr);
+static LCK_MTX_DECLARE_ATTR(domain_timeout_mtx,
+ &domain_proto_mtx_grp, &domain_proto_mtx_attr);
u_int64_t _net_uptime;
u_int64_t _net_uptime_ms;
VERIFY(dp->dom_flags & DOM_ATTACHED);
if (!(dp->dom_flags & DOM_INITIALIZED)) {
- lck_mtx_init(&dp->dom_mtx_s, domain_proto_mtx_grp,
- domain_proto_mtx_attr);
+ lck_mtx_init(&dp->dom_mtx_s, &domain_proto_mtx_grp,
+ &domain_proto_mtx_attr);
dp->dom_mtx = &dp->dom_mtx_s;
TAILQ_INIT(&dp->dom_protosw);
if (dp->dom_init != NULL) {
/* NOTREACHED */
}
- dp = _MALLOC(sizeof(*dp), M_TEMP, M_WAITOK | M_ZERO);
+ dp = kheap_alloc(KHEAP_DEFAULT, sizeof(struct domain), Z_WAITOK | Z_ZERO);
if (dp == NULL) {
/*
* There is really nothing better than to panic here,
TAILQ_FOREACH_SAFE(pp1, &dp1->dom_protosw, pr_entry, pp2) {
detach_proto(pp1, dp1);
if (pp1->pr_usrreqs->pru_flags & PRUF_OLD) {
- FREE(pp1->pr_usrreqs, M_TEMP);
+ kheap_free(KHEAP_DEFAULT, pp1->pr_usrreqs, sizeof(struct pr_usrreqs));
}
if (pp1->pr_flags & PR_OLD) {
- FREE(pp1, M_TEMP);
+ kheap_free(KHEAP_DEFAULT, pp1, sizeof(struct protosw));
}
}
detach_domain(dp1);
- FREE(dp1, M_TEMP);
+ kheap_free(KHEAP_DEFAULT, dp1, sizeof(struct domain));
} else {
error = EPFNOSUPPORT;
}
/* NOTREACHED */
}
- pru = _MALLOC(sizeof(*pru), M_TEMP, M_WAITOK | M_ZERO);
+ pru = kheap_alloc(KHEAP_DEFAULT, sizeof(struct pr_usrreqs),
+ Z_WAITOK | Z_ZERO);
if (pru == NULL) {
error = ENOMEM;
goto done;
pru->pru_soreceive = opru->pru_soreceive;
pru->pru_sopoll = opru->pru_sopoll;
- pp = _MALLOC(sizeof(*pp), M_TEMP, M_WAITOK | M_ZERO);
+ pp = kheap_alloc(KHEAP_DEFAULT, sizeof(struct protosw), Z_WAITOK | Z_ZERO);
if (pp == NULL) {
error = ENOMEM;
goto done;
"error %d\n", __func__, odp->dom_family,
odp->dom_name, opp->pr_protocol, error);
- if (pru != NULL) {
- FREE(pru, M_TEMP);
- }
- if (pp != NULL) {
- FREE(pp, M_TEMP);
- }
+ kheap_free(KHEAP_DEFAULT, pru, sizeof(struct pr_usrreqs));
+ kheap_free(KHEAP_DEFAULT, pp, sizeof(struct protosw));
}
domain_guard_release(guard);
detach_proto(pp, dp);
if (pp->pr_usrreqs->pru_flags & PRUF_OLD) {
- FREE(pp->pr_usrreqs, M_TEMP);
+ kheap_free(KHEAP_DEFAULT, pp->pr_usrreqs, sizeof(struct pr_usrreqs));
}
if (pp->pr_flags & PR_OLD) {
- FREE(pp, M_TEMP);
+ kheap_free(KHEAP_DEFAULT, pp, sizeof(struct protosw));
}
return 0;
}
detach_proto(pp, dp);
if (pp->pr_usrreqs->pru_flags & PRUF_OLD) {
- FREE(pp->pr_usrreqs, M_TEMP);
+ kheap_free(KHEAP_DEFAULT, pp->pr_usrreqs, sizeof(struct pr_usrreqs));
}
if (pp->pr_flags & PR_OLD) {
- FREE(pp, M_TEMP);
+ kheap_free(KHEAP_DEFAULT, pp, sizeof(struct protosw));
}
done:
domain_guard_t guard;
eventhandler_lists_ctxt_init(&protoctl_evhdlr_ctxt);
- /*
- * allocate lock group attribute and group for domain mutexes
- */
- domain_proto_mtx_grp_attr = lck_grp_attr_alloc_init();
-
- domain_proto_mtx_grp = lck_grp_alloc_init("domain",
- domain_proto_mtx_grp_attr);
-
- /*
- * allocate the lock attribute for per domain mutexes
- */
- domain_proto_mtx_attr = lck_attr_alloc_init();
-
- lck_mtx_init(&domain_proto_mtx, domain_proto_mtx_grp,
- domain_proto_mtx_attr);
- lck_mtx_init(&domain_timeout_mtx, domain_proto_mtx_grp,
- domain_proto_mtx_attr);
guard = domain_guard_deploy();
/*
#include <sys/domain.h>
#include <sys/queue.h>
#include <sys/proc.h>
+#include <sys/filedesc.h>
+#include <sys/file_internal.h>
#include <dev/random/randomdev.h>
#include <kern/queue.h>
#include <kern/sched_prim.h>
#include <kern/backtrace.h>
-#include <kern/cpu_number.h>
+#include <kern/percpu.h>
#include <kern/zalloc.h>
#include <libkern/OSAtomic.h>
#include <libkern/libkern.h>
#include <os/log.h>
+#include <os/ptrtools.h>
#include <IOKit/IOMapper.h>
/* TODO: should be in header file */
/* kernel translater */
-extern vm_offset_t kmem_mb_alloc(vm_map_t, int, int, kern_return_t *);
extern ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va);
extern vm_map_t mb_map; /* special map */
"OTHERS" };
/* Global lock */
-decl_lck_mtx_data(static, mbuf_mlock_data);
-static lck_mtx_t *mbuf_mlock = &mbuf_mlock_data;
-static lck_attr_t *mbuf_mlock_attr;
-static lck_grp_t *mbuf_mlock_grp;
-static lck_grp_attr_t *mbuf_mlock_grp_attr;
+static LCK_GRP_DECLARE(mbuf_mlock_grp, "mbuf");
+static LCK_MTX_DECLARE(mbuf_mlock_data, &mbuf_mlock_grp);
+static lck_mtx_t *const mbuf_mlock = &mbuf_mlock_data;
/* Back-end (common) layer */
static uint64_t mb_expand_cnt;
static struct mtrace *mleak_top_trace[MLEAK_NUM_TRACES];
/* Lock to protect mleak tables from concurrent modification */
-decl_lck_mtx_data(static, mleak_lock_data);
-static lck_mtx_t *mleak_lock = &mleak_lock_data;
-static lck_attr_t *mleak_lock_attr;
-static lck_grp_t *mleak_lock_grp;
-static lck_grp_attr_t *mleak_lock_grp_attr;
+static LCK_GRP_DECLARE(mleak_lock_grp, "mleak_lock");
+static LCK_MTX_DECLARE(mleak_lock_data, &mleak_lock_grp);
+static lck_mtx_t *const mleak_lock = &mleak_lock_data;
/* *Failed* large allocations. */
struct mtracelarge {
static void mtracelarge_register(size_t size);
/* Lock to protect the completion callback table */
-static lck_grp_attr_t *mbuf_tx_compl_tbl_lck_grp_attr = NULL;
-static lck_attr_t *mbuf_tx_compl_tbl_lck_attr = NULL;
-static lck_grp_t *mbuf_tx_compl_tbl_lck_grp = NULL;
-decl_lck_rw_data(, mbuf_tx_compl_tbl_lck_rw_data);
-lck_rw_t *mbuf_tx_compl_tbl_lock = &mbuf_tx_compl_tbl_lck_rw_data;
+static LCK_GRP_DECLARE(mbuf_tx_compl_tbl_lck_grp, "mbuf_tx_compl_tbl");
+LCK_RW_DECLARE(mbuf_tx_compl_tbl_lock, &mbuf_tx_compl_tbl_lck_grp);
extern u_int32_t high_sb_max;
* anything beyond that (up to type 255) is considered a corner case.
*/
typedef struct {
- unsigned int cpu_mtypes[MT_MAX];
-} __attribute__((aligned(MAX_CPU_CACHE_LINE_SIZE), packed)) mtypes_cpu_t;
-
-typedef struct {
- mtypes_cpu_t mbs_cpu[1];
+ unsigned int cpu_mtypes[MT_MAX];
} mbuf_mtypes_t;
-static mbuf_mtypes_t *mbuf_mtypes; /* per-CPU statistics */
-
-#define MBUF_MTYPES_SIZE(n) \
- __builtin_offsetof(mbuf_mtypes_t, mbs_cpu[n])
-
-#define MTYPES_CPU(p) \
- ((mtypes_cpu_t *)(void *)((char *)(p) + MBUF_MTYPES_SIZE(cpu_number())))
+static mbuf_mtypes_t PERCPU_DATA(mbuf_mtypes);
#define mtype_stat_add(type, n) { \
if ((unsigned)(type) < MT_MAX) { \
- mtypes_cpu_t *mbs = MTYPES_CPU(mbuf_mtypes); \
+ mbuf_mtypes_t *mbs = PERCPU_GET(mbuf_mtypes); \
atomic_add_32(&mbs->cpu_mtypes[type], n); \
} else if ((unsigned)(type) < (unsigned)MBSTAT_MTYPES_MAX) { \
atomic_add_16((int16_t *)&mbstat.m_mtypes[type], n); \
static void
mbuf_mtypes_sync(boolean_t locked)
{
- int m, n;
- mtypes_cpu_t mtc;
+ mbuf_mtypes_t mtc;
if (locked) {
LCK_MTX_ASSERT(mbuf_mlock, LCK_MTX_ASSERT_OWNED);
}
- bzero(&mtc, sizeof(mtc));
- for (m = 0; m < ncpu; m++) {
- mtypes_cpu_t *scp = &mbuf_mtypes->mbs_cpu[m];
- mtypes_cpu_t temp;
-
- bcopy(&scp->cpu_mtypes, &temp.cpu_mtypes,
- sizeof(temp.cpu_mtypes));
-
- for (n = 0; n < MT_MAX; n++) {
- mtc.cpu_mtypes[n] += temp.cpu_mtypes[n];
+ mtc = *PERCPU_GET_MASTER(mbuf_mtypes);
+ percpu_foreach_secondary(mtype, mbuf_mtypes) {
+ for (int n = 0; n < MT_MAX; n++) {
+ mtc.cpu_mtypes[n] += mtype->cpu_mtypes[n];
}
}
+
if (!locked) {
lck_mtx_lock(mbuf_mlock);
}
- for (n = 0; n < MT_MAX; n++) {
+ for (int n = 0; n < MT_MAX; n++) {
mbstat.m_mtypes[n] = mtc.cpu_mtypes[n];
}
if (!locked) {
unsigned int b, c, s;
int m, config_mbuf_jumbo = 0;
- MALLOC(omb_stat, struct omb_stat *, OMB_STAT_SIZE(NELEM(mbuf_table)),
- M_TEMP, M_WAITOK | M_ZERO);
- VERIFY(omb_stat != NULL);
+ omb_stat = zalloc_permanent(OMB_STAT_SIZE(NELEM(mbuf_table)),
+ ZALIGN(struct omb_stat));
- MALLOC(mb_stat, mb_stat_t *, MB_STAT_SIZE(NELEM(mbuf_table)),
- M_TEMP, M_WAITOK | M_ZERO);
- VERIFY(mb_stat != NULL);
+ mb_stat = zalloc_permanent(MB_STAT_SIZE(NELEM(mbuf_table)),
+ ZALIGN(mb_stat_t));
mb_stat->mbs_cnt = NELEM(mbuf_table);
for (m = 0; m < NELEM(mbuf_table); m++) {
bool
mbuf_class_under_pressure(struct mbuf *m)
{
- int mclass = mbuf_get_class(m); // TODO - how can we get the class easily???
+ int mclass = mbuf_get_class(m);
+
+ if (m_total(mclass) - m_infree(mclass) >= (m_maxlimit(mclass) * mb_memory_pressure_percentage) / 100) {
+ /*
+ * The above computation does not include the per-CPU cached objects.
+ * As a fast-path check this is good-enough. But now we do
+ * the "slower" count of the cached objects to know exactly the
+ * number of active mbufs in use.
+ *
+ * We do not take the mbuf_lock here to avoid lock-contention. Numbers
+ * might be slightly off but we don't try to be 100% accurate.
+ * At worst, we drop a packet that we shouldn't have dropped or
+ * we might go slightly above our memory-pressure threshold.
+ */
+ mcache_t *cp = m_cache(mclass);
+ mcache_cpu_t *ccp = &cp->mc_cpu[0];
+
+ int bktsize = os_access_once(ccp->cc_bktsize);
+ uint32_t bl_total = os_access_once(cp->mc_full.bl_total);
+ uint32_t cached = 0;
+ int i;
+
+ for (i = 0; i < ncpu; i++) {
+ ccp = &cp->mc_cpu[i];
+
+ int cc_objs = os_access_once(ccp->cc_objs);
+ if (cc_objs > 0) {
+ cached += cc_objs;
+ }
+
+ int cc_pobjs = os_access_once(ccp->cc_pobjs);
+ if (cc_pobjs > 0) {
+ cached += cc_pobjs;
+ }
+ }
+ cached += (bl_total * bktsize);
- if (m_total(mclass) >= (m_maxlimit(mclass) * mb_memory_pressure_percentage) / 100) {
- os_log(OS_LOG_DEFAULT,
- "%s memory-pressure on mbuf due to class %u, total %u max %u",
- __func__, mclass, m_total(mclass), m_maxlimit(mclass));
- return true;
+ if (m_total(mclass) - m_infree(mclass) - cached >= (m_maxlimit(mclass) * mb_memory_pressure_percentage) / 100) {
+ os_log(OS_LOG_DEFAULT,
+ "%s memory-pressure on mbuf due to class %u, total %u free %u cached %u max %u",
+ __func__, mclass, m_total(mclass), m_infree(mclass), cached, m_maxlimit(mclass));
+ return true;
+ }
}
return false;
{
unsigned int m;
unsigned int initmcl = 0;
- void *buf;
thread_t thread = THREAD_NULL;
microuptime(&mb_start);
/* Setup the mbuf table */
mbuf_table_init();
- /* Global lock for common layer */
- mbuf_mlock_grp_attr = lck_grp_attr_alloc_init();
- mbuf_mlock_grp = lck_grp_alloc_init("mbuf", mbuf_mlock_grp_attr);
- mbuf_mlock_attr = lck_attr_alloc_init();
- lck_mtx_init(mbuf_mlock, mbuf_mlock_grp, mbuf_mlock_attr);
-
/*
* Allocate cluster slabs table:
*
*/
maxslabgrp =
(P2ROUNDUP(nmbclusters, (MBSIZE >> MCLSHIFT)) << MCLSHIFT) >> MBSHIFT;
- MALLOC(slabstbl, mcl_slabg_t * *, maxslabgrp * sizeof(mcl_slabg_t *),
- M_TEMP, M_WAITOK | M_ZERO);
- VERIFY(slabstbl != NULL);
+ slabstbl = zalloc_permanent(maxslabgrp * sizeof(mcl_slabg_t *),
+ ZALIGN(mcl_slabg_t));
/*
* Allocate audit structures, if needed:
int l;
mcl_audit_t *mclad;
maxclaudit = ((maxslabgrp << MBSHIFT) >> PAGE_SHIFT);
- MALLOC(mclaudit, mcl_audit_t *, maxclaudit * sizeof(*mclaudit),
- M_TEMP, M_WAITOK | M_ZERO);
- VERIFY(mclaudit != NULL);
+ mclaudit = zalloc_permanent(maxclaudit * sizeof(*mclaudit),
+ ZALIGN(mcl_audit_t));
for (l = 0, mclad = mclaudit; l < maxclaudit; l++) {
- MALLOC(mclad[l].cl_audit, mcache_audit_t * *,
- NMBPG * sizeof(mcache_audit_t *),
- M_TEMP, M_WAITOK | M_ZERO);
- VERIFY(mclad[l].cl_audit != NULL);
+ mclad[l].cl_audit = zalloc_permanent(NMBPG * sizeof(mcache_audit_t *),
+ ZALIGN_PTR);
}
mcl_audit_con_cache = mcache_create("mcl_audit_contents",
/* Enable mbuf leak logging, with a lock to protect the tables */
- mleak_lock_grp_attr = lck_grp_attr_alloc_init();
- mleak_lock_grp = lck_grp_alloc_init("mleak_lock", mleak_lock_grp_attr);
- mleak_lock_attr = lck_attr_alloc_init();
- lck_mtx_init(mleak_lock, mleak_lock_grp, mleak_lock_attr);
-
mleak_activate();
/*
* before alignment is not saved.
*/
ncpu = ml_wait_max_cpus();
- MALLOC(buf, void *, MBUF_MTYPES_SIZE(ncpu) + CPU_CACHE_LINE_SIZE,
- M_TEMP, M_WAITOK);
- VERIFY(buf != NULL);
-
- mbuf_mtypes = (mbuf_mtypes_t *)P2ROUNDUP((intptr_t)buf,
- CPU_CACHE_LINE_SIZE);
- bzero(mbuf_mtypes, MBUF_MTYPES_SIZE(ncpu));
/* Calculate the number of pages assigned to the cluster pool */
mcl_pages = (nmbclusters << MCLSHIFT) / PAGE_SIZE;
- MALLOC(mcl_paddr, ppnum_t *, mcl_pages * sizeof(ppnum_t),
- M_TEMP, M_WAITOK);
- VERIFY(mcl_paddr != NULL);
+ mcl_paddr = zalloc_permanent(mcl_pages * sizeof(ppnum_t),
+ ZALIGN(ppnum_t));
/* Register with the I/O Bus mapper */
mcl_paddr_base = IOMapperIOVMAlloc(mcl_pages);
- bzero((char *)mcl_paddr, mcl_pages * sizeof(ppnum_t));
embutl = (mbutl + (nmbclusters * MCLBYTES));
VERIFY(((embutl - mbutl) % MBIGCLBYTES) == 0);
}
/* allocate space for mbuf_dump_buf */
- MALLOC(mbuf_dump_buf, char *, MBUF_DUMP_BUF_SIZE, M_TEMP, M_WAITOK);
- VERIFY(mbuf_dump_buf != NULL);
+ mbuf_dump_buf = zalloc_permanent(MBUF_DUMP_BUF_SIZE, ZALIGN_NONE);
if (mbuf_debug & MCF_DEBUG) {
printf("%s: MLEN %d, MHLEN %d\n", __func__,
(nmbclusters << MCLSHIFT) >> MBSHIFT,
(nclusters << MCLSHIFT) >> MBSHIFT,
(njcl << MCLSHIFT) >> MBSHIFT);
-
- /* initialize lock form tx completion callback table */
- mbuf_tx_compl_tbl_lck_grp_attr = lck_grp_attr_alloc_init();
- if (mbuf_tx_compl_tbl_lck_grp_attr == NULL) {
- panic("%s: lck_grp_attr_alloc_init failed", __func__);
- /* NOTREACHED */
- }
- mbuf_tx_compl_tbl_lck_grp = lck_grp_alloc_init("mbuf_tx_compl_tbl",
- mbuf_tx_compl_tbl_lck_grp_attr);
- if (mbuf_tx_compl_tbl_lck_grp == NULL) {
- panic("%s: lck_grp_alloc_init failed", __func__);
- /* NOTREACHED */
- }
- mbuf_tx_compl_tbl_lck_attr = lck_attr_alloc_init();
- if (mbuf_tx_compl_tbl_lck_attr == NULL) {
- panic("%s: lck_attr_alloc_init failed", __func__);
- /* NOTREACHED */
- }
- lck_rw_init(mbuf_tx_compl_tbl_lock, mbuf_tx_compl_tbl_lck_grp,
- mbuf_tx_compl_tbl_lck_attr);
}
/*
}
}
+static vm_offset_t
+kmem_mb_alloc(vm_map_t mbmap, int size, int physContig, kern_return_t *err)
+{
+ vm_offset_t addr = 0;
+ kern_return_t kr = KERN_SUCCESS;
+
+ if (!physContig) {
+ kr = kernel_memory_allocate(mbmap, &addr, size, 0,
+ KMA_KOBJECT | KMA_LOMEM, VM_KERN_MEMORY_MBUF);
+ } else {
+ kr = kmem_alloc_contig(mbmap, &addr, size, PAGE_MASK, 0xfffff,
+ 0, KMA_KOBJECT | KMA_LOMEM, VM_KERN_MEMORY_MBUF);
+ }
+
+ if (kr != KERN_SUCCESS) {
+ addr = 0;
+ }
+ if (err) {
+ *err = kr;
+ }
+
+ return addr;
+}
+
/*
* Allocate some number of mbuf clusters and place on cluster freelist.
*/
}
}
+static bool mbuf_watchdog_defunct_active = false;
+
+static uint32_t
+mbuf_watchdog_socket_space(struct socket *so)
+{
+ if (so == NULL) {
+ return 0;
+ }
+
+ return so->so_snd.sb_mbcnt + so->so_rcv.sb_mbcnt;
+}
+
+struct mbuf_watchdog_defunct_args {
+ struct proc *top_app;
+ uint32_t top_app_space_used;
+};
+
+static int
+mbuf_watchdog_defunct_iterate(proc_t p, void *arg)
+{
+ struct fileproc *fp = NULL;
+ struct mbuf_watchdog_defunct_args *args =
+ (struct mbuf_watchdog_defunct_args *)arg;
+ uint32_t space_used = 0;
+
+ proc_fdlock(p);
+ fdt_foreach(fp, p) {
+ struct fileglob *fg = fp->fp_glob;
+ struct socket *so = NULL;
+
+ if (FILEGLOB_DTYPE(fg) != DTYPE_SOCKET) {
+ continue;
+ }
+ so = (struct socket *)fp->fp_glob->fg_data;
+ /*
+ * We calculate the space without the socket
+ * lock because we don't want to be blocked
+ * by another process that called send() and
+ * is stuck waiting for mbufs.
+ *
+ * These variables are 32-bit so we don't have
+ * to worry about incomplete reads.
+ */
+ space_used += mbuf_watchdog_socket_space(so);
+ }
+ proc_fdunlock(p);
+ if (space_used > args->top_app_space_used) {
+ if (args->top_app != NULL) {
+ proc_rele(args->top_app);
+ }
+ args->top_app = p;
+ args->top_app_space_used = space_used;
+
+ return PROC_CLAIMED;
+ } else {
+ return PROC_RETURNED;
+ }
+}
+
+extern char *proc_name_address(void *p);
+
+static void
+mbuf_watchdog_defunct(thread_call_param_t arg0, thread_call_param_t arg1)
+{
+#pragma unused(arg0, arg1)
+ struct mbuf_watchdog_defunct_args args = {};
+ struct fileproc *fp = NULL;
+
+ proc_iterate(PROC_ALLPROCLIST,
+ mbuf_watchdog_defunct_iterate, &args, NULL, NULL);
+
+ /*
+ * Defunct all sockets from this app.
+ */
+ if (args.top_app != NULL) {
+ os_log(OS_LOG_DEFAULT, "%s: defuncting all sockets from %s.%d",
+ __func__,
+ proc_name_address(args.top_app),
+ proc_pid(args.top_app));
+ proc_fdlock(args.top_app);
+ fdt_foreach(fp, args.top_app) {
+ struct fileglob *fg = fp->fp_glob;
+ struct socket *so = NULL;
+
+ if (FILEGLOB_DTYPE(fg) != DTYPE_SOCKET) {
+ continue;
+ }
+ so = (struct socket *)fp->fp_glob->fg_data;
+ socket_lock(so, 0);
+ if (sosetdefunct(args.top_app, so,
+ SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL,
+ TRUE) == 0) {
+ sodefunct(args.top_app, so,
+ SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL);
+ }
+ socket_unlock(so, 0);
+ }
+ proc_fdunlock(args.top_app);
+ proc_rele(args.top_app);
+ mbstat.m_forcedefunct++;
+ }
+ mbuf_watchdog_defunct_active = false;
+}
+
/*
* Called during slab (blocking and non-blocking) allocation. If there
* is at least one waiter, and the time since the first waiter is blocked
{
struct timeval now;
unsigned int since;
+ static thread_call_t defunct_tcall = NULL;
if (mb_waiters == 0 || !mb_watchdog) {
return;
}
+ LCK_MTX_ASSERT(mbuf_mlock, LCK_MTX_ASSERT_OWNED);
+
microuptime(&now);
since = now.tv_sec - mb_wdtstart.tv_sec;
+
+ /*
+ * Check if we are about to panic the system due
+ * to lack of mbufs and start defuncting sockets
+ * from processes that use too many sockets.
+ *
+ * We're always called with the mbuf_mlock held,
+ * so that also protects mbuf_watchdog_defunct_active.
+ */
+ if (since >= MB_WDT_MAXTIME / 2 && !mbuf_watchdog_defunct_active) {
+ /*
+ * Start a thread to defunct sockets
+ * from apps that are over-using their socket
+ * buffers.
+ */
+ if (defunct_tcall == NULL) {
+ defunct_tcall =
+ thread_call_allocate_with_options(mbuf_watchdog_defunct,
+ NULL,
+ THREAD_CALL_PRIORITY_KERNEL,
+ THREAD_CALL_OPTIONS_ONCE);
+ }
+ if (defunct_tcall != NULL) {
+ mbuf_watchdog_defunct_active = true;
+ thread_call_enter(defunct_tcall);
+ }
+ }
if (since >= MB_WDT_MAXTIME) {
panic_plain("%s: %d waiters stuck for %u secs\n%s", __func__,
mb_waiters, since, mbuf_dump());
lck_mtx_unlock(mbuf_mlock);
/* This is a new buffer; create the slabs group for it */
- MALLOC(slg, mcl_slabg_t *, sizeof(*slg), M_TEMP,
- M_WAITOK | M_ZERO);
- MALLOC(slg->slg_slab, mcl_slab_t *, sizeof(mcl_slab_t) * NSLABSPMB,
- M_TEMP, M_WAITOK | M_ZERO);
- VERIFY(slg != NULL && slg->slg_slab != NULL);
+ slg = zalloc_permanent_type(mcl_slabg_t);
+ slg->slg_slab = zalloc_permanent(sizeof(mcl_slab_t) * NSLABSPMB,
+ ZALIGN(mcl_slab_t));
lck_mtx_lock(mbuf_mlock);
/*
static void
mcl_audit_mcheck_panic(struct mbuf *m)
{
+ char buf[DUMP_MCA_BUF_SIZE];
mcache_audit_t *mca;
MRANGE(m);
mca = mcl_audit_buf2mca(MC_MBUF, (mcache_obj_t *)m);
panic("mcl_audit: freed mbuf %p with type 0x%x (instead of 0x%x)\n%s\n",
- m, (u_int16_t)m->m_type, MT_FREE, mcache_dump_mca(mca));
+ m, (u_int16_t)m->m_type, MT_FREE, mcache_dump_mca(buf, mca));
+ /* NOTREACHED */
+}
+
+__abortlike
+static void
+mcl_audit_verify_nextptr_panic(void *next, mcache_audit_t *mca)
+{
+ char buf[DUMP_MCA_BUF_SIZE];
+ panic("mcl_audit: buffer %p modified after free at offset 0: "
+ "%p out of range [%p-%p)\n%s\n",
+ mca->mca_addr, next, mbutl, embutl, mcache_dump_mca(buf, mca));
/* NOTREACHED */
}
{
if (next != NULL && !MBUF_IN_MAP(next) &&
(next != (void *)MCACHE_FREE_PATTERN || !mclverify)) {
- panic("mcl_audit: buffer %p modified after free at offset 0: "
- "%p out of range [%p-%p)\n%s\n",
- mca->mca_addr, next, mbutl, embutl, mcache_dump_mca(mca));
- /* NOTREACHED */
+ mcl_audit_verify_nextptr_panic(next, mca);
}
}
mleak_alloc_buckets * sizeof(struct mallocation);
vm_size_t trace_size = mleak_trace_buckets * sizeof(struct mtrace);
- MALLOC(mleak_allocations, struct mallocation *, alloc_size,
- M_TEMP, M_WAITOK | M_ZERO);
- VERIFY(mleak_allocations != NULL);
+ mleak_allocations = zalloc_permanent(alloc_size, ZALIGN(struct mallocation));
+ mleak_traces = zalloc_permanent(trace_size, ZALIGN(struct mtrace));
+ mleak_stat = zalloc_permanent(MLEAK_STAT_SIZE(MLEAK_NUM_TRACES),
+ ZALIGN(mleak_stat_t));
- MALLOC(mleak_traces, struct mtrace *, trace_size,
- M_TEMP, M_WAITOK | M_ZERO);
- VERIFY(mleak_traces != NULL);
-
- MALLOC(mleak_stat, mleak_stat_t *, MLEAK_STAT_SIZE(MLEAK_NUM_TRACES),
- M_TEMP, M_WAITOK | M_ZERO);
- VERIFY(mleak_stat != NULL);
mleak_stat->ml_cnt = MLEAK_NUM_TRACES;
#ifdef __LP64__
mleak_stat->ml_isaddr64 = 1;
LCK_MTX_ASSERT(mbuf_mlock, LCK_MTX_ASSERT_OWNED);
if (mbwdog_logging == NULL) {
- mbwdog_logging = _MALLOC(mbwdog_logging_size,
- M_TEMP, M_ZERO | M_NOWAIT);
- if (mbwdog_logging == NULL) {
- return;
- }
+ /*
+ * This might block under a mutex, which isn't really great,
+ * but this happens once, so we'll live.
+ */
+ mbwdog_logging = zalloc_permanent(mbwdog_logging_size,
+ ZALIGN_NONE);
}
va_start(ap, fmt);
vsnprintf(p, sizeof(p), fmt, ap);
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_LOCKED,
0, 0, sysctl_mbwdog_log, "A", "");
-static int mbtest_val;
-static int mbtest_running;
-
-static void
-mbtest_thread(__unused void *arg)
-{
- int i;
- int scale_down = 1;
- int iterations = 250;
- int allocations = nmbclusters;
- iterations = iterations / scale_down;
- allocations = allocations / scale_down;
- printf("%s thread starting\n", __func__);
- for (i = 0; i < iterations; i++) {
- unsigned int needed = allocations;
- struct mbuf *m1, *m2, *m3;
-
- if (njcl > 0) {
- needed = allocations;
- m3 = m_getpackets_internal(&needed, 0, M_DONTWAIT, 0, M16KCLBYTES);
- m_freem_list(m3);
- }
-
- needed = allocations;
- m2 = m_getpackets_internal(&needed, 0, M_DONTWAIT, 0, MBIGCLBYTES);
- m_freem_list(m2);
-
- m1 = m_getpackets_internal(&needed, 0, M_DONTWAIT, 0, MCLBYTES);
- m_freem_list(m1);
- }
-
- printf("%s thread ending\n", __func__);
-
- OSDecrementAtomic(&mbtest_running);
- wakeup_one((caddr_t)&mbtest_running);
-}
-
-static void
-sysctl_mbtest(void)
-{
- /* We launch three threads - wait for all of them */
- OSIncrementAtomic(&mbtest_running);
- OSIncrementAtomic(&mbtest_running);
- OSIncrementAtomic(&mbtest_running);
-
- thread_call_func_delayed((thread_call_func_t)mbtest_thread, NULL, 10);
- thread_call_func_delayed((thread_call_func_t)mbtest_thread, NULL, 10);
- thread_call_func_delayed((thread_call_func_t)mbtest_thread, NULL, 10);
-
- while (mbtest_running) {
- msleep((caddr_t)&mbtest_running, NULL, PUSER, "mbtest_running", NULL);
- }
-}
-
-static int
-mbtest SYSCTL_HANDLER_ARGS
-{
-#pragma unused(arg1, arg2)
- int error = 0, val, oldval = mbtest_val;
-
- val = oldval;
- error = sysctl_handle_int(oidp, &val, 0, req);
- if (error || !req->newptr) {
- return error;
- }
-
- if (val != oldval) {
- sysctl_mbtest();
- }
-
- mbtest_val = val;
-
- return error;
-}
#endif // DEBUG || DEVELOPMENT
static void
SYSCTL_DECL(_kern_ipc);
#if DEBUG || DEVELOPMENT
-SYSCTL_PROC(_kern_ipc, OID_AUTO, mbtest,
- CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &mbtest_val, 0, &mbtest, "I",
- "Toggle to test mbufs");
#endif
SYSCTL_PROC(_kern_ipc, KIPC_MBSTAT, mbstat,
CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
/*
- * Copyright (c) 1998-2020 Apple Inc. All rights reserved.
+ * Copyright (c) 1998-2021 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
static int socketinit_done;
static struct zone *so_cache_zone;
-static lck_grp_t *so_cache_mtx_grp;
-static lck_attr_t *so_cache_mtx_attr;
-static lck_grp_attr_t *so_cache_mtx_grp_attr;
-static lck_mtx_t *so_cache_mtx;
+static LCK_GRP_DECLARE(so_cache_mtx_grp, "so_cache");
+static LCK_MTX_DECLARE(so_cache_mtx, &so_cache_mtx_grp);
#include <machine/limits.h>
PE_parse_boot_argn("socket_debug", &socket_debug,
sizeof(socket_debug));
- /*
- * allocate lock group attribute and group for socket cache mutex
- */
- so_cache_mtx_grp_attr = lck_grp_attr_alloc_init();
- so_cache_mtx_grp = lck_grp_alloc_init("so_cache",
- so_cache_mtx_grp_attr);
-
- /*
- * allocate the lock attribute for socket cache mutex
- */
- so_cache_mtx_attr = lck_attr_alloc_init();
-
- /* cached sockets mutex */
- so_cache_mtx = lck_mtx_alloc_init(so_cache_mtx_grp, so_cache_mtx_attr);
- if (so_cache_mtx == NULL) {
- panic("%s: unable to allocate so_cache_mtx\n", __func__);
- /* NOTREACHED */
- }
STAILQ_INIT(&so_cache_head);
so_cache_zone_element_size = (vm_size_t)(sizeof(struct socket) + 4
soextbkidlestat.so_xbkidle_rcvhiwat = SO_IDLE_BK_IDLE_RCV_HIWAT;
in_pcbinit();
- sflt_init();
socket_tclass_init();
#if MULTIPATH
mp_pcbinit();
caddr_t temp;
uintptr_t offset;
- lck_mtx_lock(so_cache_mtx);
+ lck_mtx_lock(&so_cache_mtx);
if (!STAILQ_EMPTY(&so_cache_head)) {
VERIFY(cached_sock_count > 0);
STAILQ_NEXT((*so), so_cache_ent) = NULL;
cached_sock_count--;
- lck_mtx_unlock(so_cache_mtx);
+ lck_mtx_unlock(&so_cache_mtx);
temp = (*so)->so_saved_pcb;
bzero((caddr_t)*so, sizeof(struct socket));
(*so)->so_saved_pcb = temp;
} else {
- lck_mtx_unlock(so_cache_mtx);
+ lck_mtx_unlock(&so_cache_mtx);
*so = zalloc_flags(so_cache_zone, how | Z_ZERO);
static void
cached_sock_free(struct socket *so)
{
- lck_mtx_lock(so_cache_mtx);
+ lck_mtx_lock(&so_cache_mtx);
so_cache_time = net_uptime();
if (++cached_sock_count > max_cached_sock_count) {
--cached_sock_count;
- lck_mtx_unlock(so_cache_mtx);
+ lck_mtx_unlock(&so_cache_mtx);
zfree(so_cache_zone, so);
} else {
if (so_cache_hw < cached_sock_count) {
STAILQ_INSERT_TAIL(&so_cache_head, so, so_cache_ent);
so->cache_timestamp = so_cache_time;
- lck_mtx_unlock(so_cache_mtx);
+ lck_mtx_unlock(&so_cache_mtx);
}
}
int n_freed = 0;
boolean_t rc = FALSE;
- lck_mtx_lock(so_cache_mtx);
+ lck_mtx_lock(&so_cache_mtx);
so_cache_timeouts++;
so_cache_time = net_uptime();
rc = TRUE;
}
- lck_mtx_unlock(so_cache_mtx);
+ lck_mtx_unlock(&so_cache_mtx);
return rc;
}
if (error) {
if (error == EJUSTRETURN) {
error = 0;
- clen = 0;
- control = NULL;
- top = NULL;
+ goto packet_consumed;
}
goto out_locked;
}
return error;
}
+/*
+ * When peeking SCM_RIGHTS, the actual file descriptors are not yet created
+ * so clear the data portion in order not to leak the file pointers
+ */
+static void
+sopeek_scm_rights(struct mbuf *rights)
+{
+ struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
+
+ if (cm->cmsg_type == SCM_RIGHTS) {
+ memset(cm + 1, 0, cm->cmsg_len - sizeof(*cm));
+ }
+}
+
/*
* Process one or more MT_CONTROL mbufs present before any data mbufs
* in the first mbuf chain on the socket buffer. If MSG_PEEK, we
error = ENOBUFS;
goto done;
}
+
+ sopeek_scm_rights(*controlp);
+
controlp = &(*controlp)->m_next;
}
m = m->m_next;
} else if (type == MT_OOBDATA) {
break;
}
+
+ if (m->m_type != MT_OOBDATA && m->m_type != MT_DATA &&
+ m->m_type != MT_HEADER) {
+ break;
+ }
/*
* Make sure to allways set MSG_OOB event when getting
* out of band data inline.
&ev.ev_data, sizeof(ev));
}
}
- if (socksa != NULL) {
- FREE(socksa, M_SONAME);
- }
- if (peersa != NULL) {
- FREE(peersa, M_SONAME);
- }
+ FREE(socksa, M_SONAME);
+ FREE(peersa, M_SONAME);
}
static int recv_msg_array_is_valid(struct recv_msg_elem *, u_int);
static int internalize_recv_msghdr_array(const void *, int, int,
u_int, struct user_msghdr_x *, struct recv_msg_elem *);
-static u_int externalize_recv_msghdr_array(void *, int, int, u_int,
- const struct user_msghdr_x *, struct recv_msg_elem *);
+static u_int externalize_recv_msghdr_array(struct proc *, struct socket *, void *, u_int,
+ struct user_msghdr_x *, struct recv_msg_elem *, int *);
static struct recv_msg_elem *alloc_recv_msg_array(u_int count);
static void free_recv_msg_array(struct recv_msg_elem *, u_int);
*retval = (int)(len - uio_resid(uiop));
}
bad:
- if (to != NULL && want_free) {
+ if (want_free) {
FREE(to, M_SONAME);
}
out:
KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
+ size_of_msghdr = IS_64BIT_PROCESS(p) ?
+ sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
+
if (uap->flags & MSG_SKIPCFIL) {
error = EPERM;
goto out;
uap->cnt = somaxsendmsgx;
}
- user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x),
- M_TEMP, M_WAITOK | M_ZERO);
+ user_msg_x = kheap_alloc(KHEAP_TEMP,
+ uap->cnt * sizeof(struct user_msghdr_x), Z_WAITOK | Z_ZERO);
if (user_msg_x == NULL) {
- DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__);
+ DBG_PRINTF("%s kheap_alloc user_msg_x failed\n", __func__);
error = ENOMEM;
goto out;
}
- uiop = _MALLOC(uap->cnt * sizeof(struct uio *),
- M_TEMP, M_WAITOK | M_ZERO);
+ uiop = kheap_alloc(KHEAP_TEMP,
+ uap->cnt * sizeof(struct uio *), Z_WAITOK | Z_ZERO);
if (uiop == NULL) {
- DBG_PRINTF("%s _MALLOC() uiop failed\n", __func__);
+ DBG_PRINTF("%s kheap_alloc uiop failed\n", __func__);
error = ENOMEM;
goto out;
}
- size_of_msghdr = IS_64BIT_PROCESS(p) ?
- sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
-
- umsgp = _MALLOC(uap->cnt * size_of_msghdr,
- M_TEMP, M_WAITOK | M_ZERO);
+ umsgp = kheap_alloc(KHEAP_TEMP,
+ uap->cnt * size_of_msghdr, Z_WAITOK | Z_ZERO);
if (umsgp == NULL) {
- printf("%s _MALLOC() user_msg_x failed\n", __func__);
+ printf("%s kheap_alloc user_msg_x failed\n", __func__);
error = ENOMEM;
goto out;
}
if (need_drop) {
file_drop(uap->s);
}
- if (umsgp != NULL) {
- _FREE(umsgp, M_TEMP);
- }
+ kheap_free(KHEAP_TEMP, umsgp, uap->cnt * size_of_msghdr);
if (uiop != NULL) {
free_uio_array(uiop, uap->cnt);
- _FREE(uiop, M_TEMP);
- }
- if (user_msg_x != NULL) {
- _FREE(user_msg_x, M_TEMP);
+ kheap_free(KHEAP_TEMP, uiop,
+ uap->cnt * sizeof(struct uio *));
}
+ kheap_free(KHEAP_TEMP, user_msg_x,
+ uap->cnt * sizeof(struct user_msghdr_x));
KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
&mp->msg_controllen, &mp->msg_flags, so);
}
out:
- if (fromsa) {
- FREE(fromsa, M_SONAME);
- }
+ FREE(fromsa, M_SONAME);
if (control) {
m_freem(control);
}
KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
+ size_of_msghdr = IS_64BIT_PROCESS(p) ?
+ sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
+
error = file_socket(uap->s, &so);
if (error) {
goto out;
error = EBADF;
goto out;
}
+ /*
+ * Support only a subset of message flags
+ */
+ if (uap->flags & ~(MSG_PEEK | MSG_WAITALL | MSG_DONTWAIT | MSG_NEEDSA | MSG_NBIO)) {
+ return EOPNOTSUPP;
+ }
/*
* Input parameter range check
*/
uap->cnt = somaxrecvmsgx;
}
- user_msg_x = _MALLOC(uap->cnt * sizeof(struct user_msghdr_x),
- M_TEMP, M_WAITOK | M_ZERO);
+ user_msg_x = kheap_alloc(KHEAP_TEMP,
+ uap->cnt * sizeof(struct user_msghdr_x), Z_WAITOK | Z_ZERO);
if (user_msg_x == NULL) {
- DBG_PRINTF("%s _MALLOC() user_msg_x failed\n", __func__);
+ DBG_PRINTF("%s kheap_alloc user_msg_x failed\n", __func__);
error = ENOMEM;
goto out;
}
error = ENOMEM;
goto out;
}
- size_of_msghdr = IS_64BIT_PROCESS(p) ?
- sizeof(struct user64_msghdr_x) : sizeof(struct user32_msghdr_x);
- umsgp = _MALLOC(uap->cnt * size_of_msghdr, M_TEMP, M_WAITOK | M_ZERO);
+ umsgp = kheap_alloc(KHEAP_TEMP,
+ uap->cnt * size_of_msghdr, Z_WAITOK | Z_ZERO);
if (umsgp == NULL) {
- DBG_PRINTF("%s _MALLOC() umsgp failed\n", __func__);
+ DBG_PRINTF("%s kheap_alloc umsgp failed\n", __func__);
error = ENOMEM;
goto out;
}
&recv_msg_elem->controlp : NULL;
error = so->so_proto->pr_usrreqs->pru_soreceive(so, psa,
- auio, (struct mbuf **)0, controlp, &flags);
+ auio, (struct mbuf **)NULL, controlp, &flags);
if (error) {
break;
}
* We have some data
*/
recv_msg_elem->which |= SOCK_MSG_DATA;
+ /*
+ * Set the messages flags for this packet
+ */
+ flags &= ~MSG_DONTWAIT;
+ recv_msg_elem->flags = flags;
/*
* Stop on partial copy
*/
- if (flags & (MSG_RCVMORE | MSG_TRUNC)) {
+ if (recv_msg_elem->flags & (MSG_RCVMORE | MSG_TRUNC)) {
break;
}
}
- if ((uap->flags & MSG_DONTWAIT) == 0) {
- flags &= ~MSG_DONTWAIT;
- }
- uap->flags = flags;
}
len_after = recv_msg_array_resid(recv_msg_array, uap->cnt);
}
}
- uiocnt = externalize_recv_msghdr_array(umsgp,
- IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
- UIO_READ, uap->cnt, user_msg_x, recv_msg_array);
+ uiocnt = externalize_recv_msghdr_array(p, so, umsgp,
+ uap->cnt, user_msg_x, recv_msg_array, &error);
+ if (error != 0) {
+ goto out;
+ }
error = copyout(umsgp, uap->msgp, uap->cnt * size_of_msghdr);
if (error) {
}
*retval = (int)(uiocnt);
- for (i = 0; i < uap->cnt; i++) {
- struct user_msghdr_x *mp = user_msg_x + i;
- struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
- struct sockaddr *fromsa = recv_msg_elem->psa;
-
- if (mp->msg_name) {
- error = copyout_sa(fromsa, mp->msg_name,
- &mp->msg_namelen);
- if (error) {
- goto out;
- }
- }
- if (mp->msg_control) {
- error = copyout_control(p, recv_msg_elem->controlp,
- mp->msg_control, &mp->msg_controllen,
- &mp->msg_flags, so);
- if (error) {
- goto out;
- }
- }
- }
out:
if (need_drop) {
file_drop(uap->s);
}
- if (umsgp != NULL) {
- _FREE(umsgp, M_TEMP);
- }
- if (recv_msg_array != NULL) {
- free_recv_msg_array(recv_msg_array, uap->cnt);
- }
- if (user_msg_x != NULL) {
- _FREE(user_msg_x, M_TEMP);
- }
+ kheap_free(KHEAP_TEMP, umsgp, uap->cnt * size_of_msghdr);
+ free_recv_msg_array(recv_msg_array, uap->cnt);
+ kheap_free(KHEAP_TEMP, user_msg_x,
+ uap->cnt * sizeof(struct user_msghdr_x));
KERNEL_DEBUG(DBG_FNC_RECVMSG_X | DBG_FUNC_END, error, 0, 0, 0, 0);
gotnothing:
error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
bad:
- if (sa) {
- FREE(sa, M_SONAME);
- }
+ FREE(sa, M_SONAME);
out:
file_drop(uap->fdes);
return error;
gotnothing:
error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t));
bad:
- if (sa) {
- FREE(sa, M_SONAME);
- }
+ FREE(sa, M_SONAME);
out:
file_drop(uap->fdes);
return error;
}
u_int
-externalize_recv_msghdr_array(void *dst, int spacetype, int direction,
- u_int count, const struct user_msghdr_x *src,
- struct recv_msg_elem *recv_msg_array)
+externalize_recv_msghdr_array(struct proc *p, struct socket *so, void *dst,
+ u_int count, struct user_msghdr_x *src,
+ struct recv_msg_elem *recv_msg_array, int *ret_error)
{
u_int i;
- int seenlast = 0;
u_int retcnt = 0;
+ int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
+
+ *ret_error = 0;
for (i = 0; i < count; i++) {
- const struct user_msghdr_x *user_msg = src + i;
+ struct user_msghdr_x *user_msg = src + i;
struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
- user_ssize_t len;
+ user_ssize_t len = 0;
+ int error;
len = user_msg->msg_datalen - uio_resid(recv_msg_elem->uio);
- if (direction == UIO_READ) {
- if ((recv_msg_elem->which & SOCK_MSG_DATA) == 0) {
- seenlast = 1;
+ if ((recv_msg_elem->which & SOCK_MSG_DATA)) {
+ retcnt++;
+
+
+ if (recv_msg_elem->which & SOCK_MSG_SA) {
+ error = copyout_sa(recv_msg_elem->psa, user_msg->msg_name,
+ &user_msg->msg_namelen);
+ if (error != 0) {
+ *ret_error = error;
+ return 0;
+ }
}
- } else {
- if (user_msg->msg_datalen != 0 && len == 0) {
- seenlast = 1;
+ if (recv_msg_elem->which & SOCK_MSG_CONTROL) {
+ error = copyout_control(p, recv_msg_elem->controlp,
+ user_msg->msg_control, &user_msg->msg_controllen,
+ &recv_msg_elem->flags, so);
+ if (error != 0) {
+ *ret_error = error;
+ return 0;
+ }
}
}
- if (seenlast == 0) {
- retcnt++;
- }
-
if (spacetype == UIO_USERSPACE64) {
- struct user64_msghdr_x *msghdr64;
-
- msghdr64 = ((struct user64_msghdr_x *)dst) + i;
+ struct user64_msghdr_x *msghdr64 = ((struct user64_msghdr_x *)dst) + i;
- msghdr64->msg_flags = user_msg->msg_flags;
+ msghdr64->msg_namelen = user_msg->msg_namelen;
+ msghdr64->msg_controllen = user_msg->msg_controllen;
+ msghdr64->msg_flags = recv_msg_elem->flags;
msghdr64->msg_datalen = len;
} else {
- struct user32_msghdr_x *msghdr32;
-
- msghdr32 = ((struct user32_msghdr_x *)dst) + i;
+ struct user32_msghdr_x *msghdr32 = ((struct user32_msghdr_x *)dst) + i;
- msghdr32->msg_flags = user_msg->msg_flags;
+ msghdr32->msg_namelen = user_msg->msg_namelen;
+ msghdr32->msg_controllen = user_msg->msg_controllen;
+ msghdr32->msg_flags = recv_msg_elem->flags;
msghdr32->msg_datalen = (user32_size_t)len;
}
}
struct recv_msg_elem *
alloc_recv_msg_array(u_int count)
{
- struct recv_msg_elem *recv_msg_array;
-
- recv_msg_array = _MALLOC(count * sizeof(struct recv_msg_elem),
- M_TEMP, M_WAITOK | M_ZERO);
-
- return recv_msg_array;
+ return kheap_alloc(KHEAP_TEMP,
+ count * sizeof(struct recv_msg_elem), Z_WAITOK | Z_ZERO);
}
void
free_recv_msg_array(struct recv_msg_elem *recv_msg_array, u_int count)
{
- u_int i;
-
- for (i = 0; i < count; i++) {
+ if (recv_msg_array == NULL) {
+ return;
+ }
+ for (uint32_t i = 0; i < count; i++) {
struct recv_msg_elem *recv_msg_elem = recv_msg_array + i;
if (recv_msg_elem->uio != NULL) {
uio_free(recv_msg_elem->uio);
}
- if (recv_msg_elem->psa != NULL) {
- _FREE(recv_msg_elem->psa, M_TEMP);
- }
+ _FREE(recv_msg_elem->psa, M_TEMP);
if (recv_msg_elem->controlp != NULL) {
m_freem(recv_msg_elem->controlp);
}
}
- _FREE(recv_msg_array, M_TEMP);
+ kheap_free(KHEAP_TEMP, recv_msg_array,
+ count * sizeof(struct recv_msg_elem));
}
static unp_gen_t unp_gencnt;
static u_int unp_count;
-static lck_attr_t *unp_mtx_attr;
-static lck_grp_t *unp_mtx_grp;
-static lck_grp_attr_t *unp_mtx_grp_attr;
-static lck_rw_t unp_list_mtx;
-
-static lck_mtx_t unp_disconnect_lock;
-static lck_mtx_t unp_connect_lock;
-static lck_mtx_t uipc_lock;
+static LCK_ATTR_DECLARE(unp_mtx_attr, 0, 0);
+static LCK_GRP_DECLARE(unp_mtx_grp, "unp_list");
+static LCK_RW_DECLARE_ATTR(unp_list_mtx, &unp_mtx_grp, &unp_mtx_attr);
+
+static LCK_MTX_DECLARE_ATTR(unp_disconnect_lock, &unp_mtx_grp, &unp_mtx_attr);
+static LCK_MTX_DECLARE_ATTR(unp_connect_lock, &unp_mtx_grp, &unp_mtx_attr);
+static LCK_MTX_DECLARE_ATTR(uipc_lock, &unp_mtx_grp, &unp_mtx_attr);
+
static u_int disconnect_in_progress;
static struct unp_head unp_shead, unp_dhead;
}
bzero(unp, sizeof(*unp));
- lck_mtx_init(&unp->unp_mtx,
- unp_mtx_grp, unp_mtx_attr);
+ lck_mtx_init(&unp->unp_mtx, &unp_mtx_grp, &unp_mtx_attr);
lck_rw_lock_exclusive(&unp_list_mtx);
LIST_INIT(&unp->unp_refs);
return 0;
}
- MALLOC(unp_list, struct unpcb **, n * sizeof(*unp_list),
- M_TEMP, M_WAITOK);
+ size_t unp_list_len = n * sizeof(*unp_list);
+ unp_list = kheap_alloc(KHEAP_TEMP, unp_list_len, Z_WAITOK);
if (unp_list == 0) {
lck_rw_done(&unp_list_mtx);
return ENOMEM;
xug.xug_count = unp_count;
error = SYSCTL_OUT(req, &xug, sizeof(xug));
}
- FREE(unp_list, M_TEMP);
+ kheap_free(KHEAP_TEMP, unp_list, unp_list_len);
lck_rw_done(&unp_list_mtx);
return error;
}
return 0;
}
- MALLOC(unp_list, struct unpcb **, n * sizeof(*unp_list),
- M_TEMP, M_WAITOK);
+ size_t unp_list_size = n * sizeof(*unp_list);
+ unp_list = kheap_alloc(KHEAP_TEMP, unp_list_size, Z_WAITOK);
if (unp_list == 0) {
lck_rw_done(&unp_list_mtx);
return ENOMEM;
xug.xug_count = unp_count;
error = SYSCTL_OUT(req, &xug, sizeof(xug));
}
- FREE(unp_list, M_TEMP);
+ kheap_free(KHEAP_TEMP, unp_list, unp_list_size);
lck_rw_done(&unp_list_mtx);
return error;
}
int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof(int);
int f, error = 0;
- MALLOC(fileproc_l, struct fileproc **,
- newfds * sizeof(struct fileproc *), M_TEMP, M_WAITOK);
+ fileproc_l = kheap_alloc(KHEAP_TEMP,
+ newfds * sizeof(struct fileproc *), Z_WAITOK);
if (fileproc_l == NULL) {
error = ENOMEM;
goto discard;
}
discard:
- if (fileproc_l != NULL) {
- FREE(fileproc_l, M_TEMP);
- }
+ kheap_free(KHEAP_TEMP, fileproc_l,
+ newfds * sizeof(struct fileproc *));
if (error) {
for (i = 0; i < newfds; i++) {
unp_discard(*rp, p);
_CASSERT(UIPC_MAX_CMSG_FD >= (MCLBYTES / sizeof(int)));
LIST_INIT(&unp_dhead);
LIST_INIT(&unp_shead);
-
- /*
- * allocate lock group attribute and group for udp pcb mutexes
- */
- unp_mtx_grp_attr = lck_grp_attr_alloc_init();
-
- unp_mtx_grp = lck_grp_alloc_init("unp_list", unp_mtx_grp_attr);
-
- unp_mtx_attr = lck_attr_alloc_init();
-
- lck_mtx_init(&uipc_lock, unp_mtx_grp, unp_mtx_attr);
- lck_rw_init(&unp_list_mtx, unp_mtx_grp, unp_mtx_attr);
- lck_mtx_init(&unp_disconnect_lock, unp_mtx_grp, unp_mtx_attr);
- lck_mtx_init(&unp_connect_lock, unp_mtx_grp, unp_mtx_attr);
}
#ifndef MIN
*
* 91/09/19, bsy@cs.cmu.edu
*/
- MALLOC(extra_ref, struct fileglob **, nfiles * sizeof(struct fileglob *),
- M_TEMP, M_WAITOK);
+ size_t extra_ref_size = nfiles * sizeof(struct fileglob *);
+ extra_ref = kheap_alloc(KHEAP_TEMP, extra_ref_size, Z_WAITOK);
if (extra_ref == NULL) {
goto bail;
}
fg_drop(PROC_NULL, *fpp);
}
- FREE(extra_ref, M_TEMP);
+ kheap_free(KHEAP_TEMP, extra_ref, extra_ref_size);
+
bail:
lck_mtx_lock(&uipc_lock);
unp_gcing = 0;
lck_mtx_unlock(mutex_held);
- lck_mtx_destroy(&unp->unp_mtx, unp_mtx_grp);
+ lck_mtx_destroy(&unp->unp_mtx, &unp_mtx_grp);
zfree(unp_zone, unp);
unp_gc();
static struct vsock_transport * _Atomic the_vsock_transport = NULL;
static ZONE_DECLARE(vsockpcb_zone, "vsockpcbzone",
sizeof(struct vsockpcb), ZC_NONE);
+static LCK_GRP_DECLARE(vsock_lock_grp, "vsock");
static struct vsockpcbinfo vsockinfo;
static uint32_t vsock_sendspace = VSOCK_MAX_PACKET_SIZE * 8;
struct vsockpcb *match = NULL;
struct vsockpcb *pcb = NULL;
- lck_rw_lock_shared(vsockinfo.bound_lock);
+ lck_rw_lock_shared(&vsockinfo.bound_lock);
LIST_FOREACH(pcb, &vsockinfo.bound, bound) {
// Source cid and port must match. Only destination port must match. (Allows for a changing CID during migration)
socket_lock(pcb->so, 1);
socket_lock(match->so, 1);
preferred = match;
}
- lck_rw_done(vsockinfo.bound_lock);
+ lck_rw_done(&vsockinfo.bound_lock);
return preferred;
}
struct vsockpcb *pcb_match = NULL;
socket_unlock(pcb->so, 0);
- lck_rw_lock_exclusive(vsockinfo.bound_lock);
+ lck_rw_lock_exclusive(&vsockinfo.bound_lock);
LIST_FOREACH(pcb_match, &vsockinfo.bound, bound) {
socket_lock(pcb_match->so, 1);
if (pcb == pcb_match ||
pcb->remote_address = (struct vsock_address) { .cid = remote_cid, .port = remote_port };
LIST_INSERT_HEAD(&vsockinfo.bound, pcb, bound);
}
- lck_rw_done(vsockinfo.bound_lock);
+ lck_rw_done(&vsockinfo.bound_lock);
return taken ? EADDRINUSE : 0;
}
if (!is_locked) {
socket_unlock(pcb->so, 0);
- lck_rw_lock_exclusive(vsockinfo.bound_lock);
+ lck_rw_lock_exclusive(&vsockinfo.bound_lock);
socket_lock(pcb->so, 0);
if (!pcb->bound.le_prev) {
- lck_rw_done(vsockinfo.bound_lock);
+ lck_rw_done(&vsockinfo.bound_lock);
return;
}
}
pcb->bound.le_prev = NULL;
if (!is_locked) {
- lck_rw_done(vsockinfo.bound_lock);
+ lck_rw_done(&vsockinfo.bound_lock);
}
}
}
struct sockaddr_vm *addr;
- MALLOC(addr, struct sockaddr_vm *, sizeof(*addr), M_SONAME, M_WAITOK);
+ MALLOC(addr, struct sockaddr_vm *, sizeof(*addr), M_SONAME,
+ M_WAITOK | M_ZERO);
if (!addr) {
return NULL;
}
- bzero(addr, sizeof(*addr));
addr->svm_len = sizeof(*addr);
addr->svm_family = AF_VSOCK;
addr->svm_port = address->port;
struct vsockpcb *pcb = NULL;
struct vsockpcb *tmp_pcb = NULL;
- lck_rw_lock_exclusive(vsockinfo.bound_lock);
+ lck_rw_lock_exclusive(&vsockinfo.bound_lock);
LIST_FOREACH_SAFE(pcb, &vsockinfo.bound, bound, tmp_pcb) {
// Disconnect this transport's sockets. Listen and bind sockets must stay alive.
socket_lock(pcb->so, 1);
}
socket_unlock(pcb->so, 1);
}
- lck_rw_done(vsockinfo.bound_lock);
+ lck_rw_done(&vsockinfo.bound_lock);
return error;
}
}
// Get the generation count and the count of all vsock sockets.
- lck_rw_lock_shared(vsockinfo.all_lock);
+ lck_rw_lock_shared(&vsockinfo.all_lock);
uint64_t n = vsockinfo.all_pcb_count;
vsock_gen_t gen_count = vsockinfo.vsock_gencnt;
- lck_rw_done(vsockinfo.all_lock);
+ lck_rw_done(&vsockinfo.all_lock);
const size_t xpcb_len = sizeof(struct xvsockpcb);
struct xvsockpgen xvg;
return 0;
}
- lck_rw_lock_shared(vsockinfo.all_lock);
+ lck_rw_lock_shared(&vsockinfo.all_lock);
n = 0;
struct vsockpcb *pcb = NULL;
// Update the generation count to match the sockets being returned.
gen_count = vsockinfo.vsock_gencnt;
- lck_rw_done(vsockinfo.all_lock);
+ lck_rw_done(&vsockinfo.all_lock);
if (!error) {
/*
}
// Add to the list of all vsock sockets.
- lck_rw_lock_exclusive(vsockinfo.all_lock);
+ lck_rw_lock_exclusive(&vsockinfo.all_lock);
TAILQ_INSERT_TAIL(&vsockinfo.all, pcb, all);
vsockinfo.all_pcb_count++;
pcb->vsock_gencnt = ++vsockinfo.vsock_gencnt;
- lck_rw_done(vsockinfo.all_lock);
+ lck_rw_done(&vsockinfo.all_lock);
return 0;
}
}
// Remove from the list of all vsock sockets.
- lck_rw_lock_exclusive(vsockinfo.all_lock);
+ lck_rw_lock_exclusive(&vsockinfo.all_lock);
TAILQ_REMOVE(&vsockinfo.all, pcb, all);
pcb->all.tqe_next = NULL;
pcb->all.tqe_prev = NULL;
vsockinfo.all_pcb_count--;
vsockinfo.vsock_gencnt++;
- lck_rw_done(vsockinfo.all_lock);
+ lck_rw_done(&vsockinfo.all_lock);
// Deallocate any resources.
zfree(vsockpcb_zone, pcb);
}
// Setup VSock protocol info struct.
- vsockinfo.vsock_lock_grp_attr = lck_grp_attr_alloc_init();
- vsockinfo.vsock_lock_grp = lck_grp_alloc_init("vsock", vsockinfo.vsock_lock_grp_attr);
- vsockinfo.vsock_lock_attr = lck_attr_alloc_init();
- if ((vsockinfo.all_lock = lck_rw_alloc_init(vsockinfo.vsock_lock_grp, vsockinfo.vsock_lock_attr)) == NULL ||
- (vsockinfo.bound_lock = lck_rw_alloc_init(vsockinfo.vsock_lock_grp, vsockinfo.vsock_lock_attr)) == NULL) {
- panic("%s: unable to allocate PCB lock\n", __func__);
- /* NOTREACHED */
- }
- lck_mtx_init(&vsockinfo.port_lock, vsockinfo.vsock_lock_grp, vsockinfo.vsock_lock_attr);
+ lck_rw_init(&vsockinfo.all_lock, &vsock_lock_grp, LCK_ATTR_NULL);
+ lck_rw_init(&vsockinfo.bound_lock, &vsock_lock_grp, LCK_ATTR_NULL);
+ lck_mtx_init(&vsockinfo.port_lock, &vsock_lock_grp, LCK_ATTR_NULL);
TAILQ_INIT(&vsockinfo.all);
LIST_INIT(&vsockinfo.bound);
vsockinfo.last_port = VMADDR_PORT_ANY;
.Fa dst
shares its data blocks with the
.Fa src
-file but has its own copy of attributes, extended attributes and ACL's which are identical to
+file but has its own copy of attributes and extended attributes which are identical to
those of the named file
.Fa src
with the exceptions listed below
The caller is not the super-user, and the
.Nm mount()
was not done by the user.
+.It Bq Er EPERM
+A system policy denied the operation.
.It Bq Er ENOTDIR
A component of the path is not a directory.
.It Bq Er EINVAL
#define BIND_NHASH(vp) (&bind_node_hashtbl[((((uintptr_t)vp) >> vnsz2log) + (uintptr_t)vnode_mount(vp)) & bind_hash_mask])
static LIST_HEAD(bind_node_hashhead, bind_node) * bind_node_hashtbl;
-static lck_mtx_t bind_hashmtx;
-static lck_attr_t * bind_hashlck_attr;
-static lck_grp_t * bind_hashlck_grp;
-static lck_grp_attr_t * bind_hashlck_grp_attr;
+static LCK_GRP_DECLARE(bind_hashlck_grp, "com.apple.filesystems.bindfs");
+static LCK_MTX_DECLARE(bind_hashmtx, &bind_hashlck_grp);
static u_long bind_hash_mask;
/* xnu doesn't have hashes built into vnodes. This mimics what freebsd does
static int bind_hashins(struct mount *, struct bind_node *, struct vnode **);
-int
-bindfs_init_lck(lck_mtx_t * lck)
-{
- int error = 1;
- if (lck && bind_hashlck_grp && bind_hashlck_attr) {
- lck_mtx_init(lck, bind_hashlck_grp, bind_hashlck_attr);
- error = 0;
- }
- return error;
-}
-
-int
-bindfs_destroy_lck(lck_mtx_t * lck)
-{
- int error = 1;
- if (lck && bind_hashlck_grp) {
- lck_mtx_destroy(lck, bind_hashlck_grp);
- error = 0;
- }
- return error;
-}
-
/*
* Initialise cache headers
*/
{
BINDFSDEBUG("%s\n", __FUNCTION__);
- /* assuming for now that this happens immediately and by default after fs
- * installation */
- bind_hashlck_grp_attr = lck_grp_attr_alloc_init();
- if (bind_hashlck_grp_attr == NULL) {
- goto error;
- }
- bind_hashlck_grp = lck_grp_alloc_init("com.apple.filesystems.bindfs", bind_hashlck_grp_attr);
- if (bind_hashlck_grp == NULL) {
- goto error;
- }
- bind_hashlck_attr = lck_attr_alloc_init();
- if (bind_hashlck_attr == NULL) {
- goto error;
- }
-
bind_node_hashtbl = hashinit(BIND_HASH_SIZE, M_TEMP, &bind_hash_mask);
if (bind_node_hashtbl == NULL) {
goto error;
}
- lck_mtx_init(&bind_hashmtx, bind_hashlck_grp, bind_hashlck_attr);
BINDFSDEBUG("%s finished\n", __FUNCTION__);
return 0;
error:
printf("BINDFS: failed to initialize globals\n");
- if (bind_hashlck_grp_attr) {
- lck_grp_attr_free(bind_hashlck_grp_attr);
- bind_hashlck_grp_attr = NULL;
- }
- if (bind_hashlck_grp) {
- lck_grp_free(bind_hashlck_grp);
- bind_hashlck_grp = NULL;
- }
- if (bind_hashlck_attr) {
- lck_attr_free(bind_hashlck_attr);
- bind_hashlck_attr = NULL;
- }
return KERN_FAILURE;
}
{
/* This gets called when the fs is uninstalled, there wasn't an exact
* equivalent in vfsops */
- lck_mtx_destroy(&bind_hashmtx, bind_hashlck_grp);
hashdestroy(bind_node_hashtbl, M_TEMP, bind_hash_mask);
- if (bind_hashlck_grp_attr) {
- lck_grp_attr_free(bind_hashlck_grp_attr);
- bind_hashlck_grp_attr = NULL;
- }
- if (bind_hashlck_grp) {
- lck_grp_free(bind_hashlck_grp);
- bind_hashlck_grp = NULL;
- }
- if (bind_hashlck_attr) {
- lck_attr_free(bind_hashlck_attr);
- bind_hashlck_attr = NULL;
- }
return 0;
}
if (error == 0) {
*ap->a_vpp = vp;
}
- }
-
- /* if we got lvp, drop the iocount from VNOP_LOOKUP */
- if (lvp != NULL) {
- vnode_put(lvp);
+ /* if we got lvp, drop the iocount from VNOP_LOOKUP */
+ if (lvp != NULL) {
+ vnode_put(lvp);
+ }
}
return error;
struct dirent *dep;
size_t bytesread;
bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
- MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
+ bufptr = kheap_alloc(KHEAP_TEMP, bufsize, Z_WAITOK);
if (bufptr == NULL) {
return ENOMEM;
}
uio_setoffset(uio, uio_offset(auio));
}
uio_free(auio);
- FREE(bufptr, M_TEMP);
+ kheap_free(KHEAP_TEMP, bufptr, bufsize);
} else {
error = VNOP_READDIR(lvp, ap->a_uio, ap->a_flags, ap->a_eofflag, ap->a_numdirent, ap->a_context);
vnode_put(lvp);
__BEGIN_DECLS
int bindfs_init(struct vfsconf * vfsp);
-int bindfs_init_lck(lck_mtx_t * lck);
-int bindfs_destroy_lck(lck_mtx_t * lck);
int bindfs_destroy(void);
int bind_nodeget(
struct mount * mp, struct vnode * lowervp, struct vnode * dvp, struct vnode ** vpp, struct componentname * cnp, int root);
static int fdesc_attr(int fd, struct vnode_attr *vap, vfs_context_t a_context);
-lck_mtx_t fdesc_mtx;
-lck_grp_t *fdesc_lckgrp;
+static LCK_GRP_DECLARE(fdesc_lckgrp, "fdesc");
+static LCK_MTX_DECLARE(fdesc_mtx, &fdesc_lckgrp);
static void
fdesc_lock(void)
/* XXX Make sure you have the right path... */
fdhashtbl = hashinit(NFDCACHE, M_CACHE, &fdhash);
- fdesc_lckgrp = lck_grp_alloc_init("fdesc", NULL);
- lck_mtx_init(&fdesc_mtx, fdesc_lckgrp, NULL);
DEVFS_LOCK();
dev_add_entry("fd", rootdir, DEV_DEVFD, NULL, NULL, NULL, &direntp);
*vpp = dvp;
if ((error = vnode_get(dvp))) {
- return error;
+ goto bad;
}
return 0;
}
int (*clone)(dev_t dev, int action), const char *fmt, va_list ap);
-lck_grp_t * devfs_lck_grp;
-lck_grp_attr_t * devfs_lck_grp_attr;
-lck_attr_t * devfs_lck_attr;
-lck_mtx_t devfs_mutex;
-lck_mtx_t devfs_attr_mutex;
+static LCK_GRP_DECLARE(devfs_lck_grp, "devfs_lock");
+LCK_MTX_DECLARE(devfs_mutex, &devfs_lck_grp);
+LCK_MTX_DECLARE(devfs_attr_mutex, &devfs_lck_grp);
os_refgrp_decl(static, devfs_refgrp, "devfs", NULL);
{
int error;
- devfs_lck_grp_attr = lck_grp_attr_alloc_init();
- devfs_lck_grp = lck_grp_alloc_init("devfs_lock", devfs_lck_grp_attr);
-
- devfs_lck_attr = lck_attr_alloc_init();
-
- lck_mtx_init(&devfs_mutex, devfs_lck_grp, devfs_lck_attr);
- lck_mtx_init(&devfs_attr_mutex, devfs_lck_grp, devfs_lck_attr);
-
DEVFS_LOCK();
error = dev_add_entry("root", NULL, DEV_DIR, NULL, NULL, NULL, &dev_root);
DEVFS_UNLOCK();
vfs_context_t ctx = vfs_context_kernel();
char fsname[] = "devfs";
- error = kernel_mount(fsname, NULLVP, NULLVP, mntname, NULL, 0, MNT_DONTBROWSE, KERNEL_MOUNT_NOAUTH, ctx);
+ error = kernel_mount(fsname, NULLVP, NULLVP, mntname, NULL, 0, MNT_DONTBROWSE, KERNEL_MOUNT_NOAUTH | KERNEL_MOUNT_DEVFS, ctx);
if (error) {
printf("devfs_kernel_mount: kernel_mount failed: %d\n", error);
return error;
* For the moment, mockfs is not marked in vfs_conf.c as being threadsafe.
*/
-extern lck_attr_t * mockfs_mtx_attr;
-extern lck_grp_attr_t * mockfs_grp_attr;
-extern lck_grp_t * mockfs_mtx_grp;
-
struct mockfs_mount {
lck_mtx_t mockfs_mnt_mtx; /* Mount-wide (and tree-wide) mutex */
mockfs_fsnode_t mockfs_root; /* Root of the node tree */
#include <sys/mount_internal.h>
#include <sys/vnode_internal.h>
-lck_attr_t * mockfs_mtx_attr = (lck_attr_t *) 0;
-lck_grp_attr_t * mockfs_grp_attr = (lck_grp_attr_t *) 0;
-lck_grp_t * mockfs_mtx_grp = (lck_grp_t *) 0;
+static LCK_GRP_DECLARE(mockfs_mtx_grp, "mockfs-mutex");
int mockfs_mountroot(mount_t mp, vnode_t rvp, __unused vfs_context_t ctx);
}
}
- lck_mtx_init(&mockfs_mount_data->mockfs_mnt_mtx, mockfs_mtx_grp, mockfs_mtx_attr);
+ lck_mtx_init(&mockfs_mount_data->mockfs_mnt_mtx, &mockfs_mtx_grp, LCK_ATTR_NULL);
/*
* All of the needed nodes/structures have been set up; now we just need to establish the relationships
mockfs_fsnode_destroy(root_fsnode);
}
if (mockfs_mount_data) {
- lck_mtx_destroy(&mockfs_mount_data->mockfs_mnt_mtx, mockfs_mtx_grp);
+ lck_mtx_destroy(&mockfs_mount_data->mockfs_mnt_mtx, &mockfs_mtx_grp);
FREE(mockfs_mount_data, M_TEMP);
}
}
panic("mockfs_unmount: Failed to destroy the fsnode tree");
}
- lck_mtx_destroy(&mockfs_mnt->mockfs_mnt_mtx, mockfs_mtx_grp);
+ lck_mtx_destroy(&mockfs_mnt->mockfs_mnt_mtx, &mockfs_mtx_grp);
FREE(mockfs_mnt, M_TEMP);
mp->mnt_data = NULL;
return 0;
}
-/*
- * mockfs_init:
- * Run once (during VFS initialization); takes care of generic mockfs initialization (which for now, means
- * global lock information).
- *
- * Returns 0 on success, or an error.
- */
int
mockfs_init(__unused struct vfsconf * vfsc)
{
- mockfs_mtx_attr = lck_attr_alloc_init();
- mockfs_grp_attr = lck_grp_attr_alloc_init();
- mockfs_mtx_grp = lck_grp_alloc_init("mockfs-mutex", mockfs_grp_attr);
-
- /*
- * If we've failed to allocate this early in boot, something is horrendously wrong; it should be fine to
- * panic (for now).
- */
- if (!mockfs_mtx_attr || !mockfs_grp_attr || !mockfs_mtx_grp) {
- panic("mockfs_init failed to allocate lock information");
- }
-
return 0;
}
#define NULL_NHASH(vp) (&null_node_hashtbl[((((uintptr_t)vp) >> vnsz2log) + (uintptr_t)vnode_mount(vp)) & null_hash_mask])
static LIST_HEAD(null_node_hashhead, null_node) * null_node_hashtbl;
-static lck_mtx_t null_hashmtx;
-static lck_attr_t * null_hashlck_attr;
-static lck_grp_t * null_hashlck_grp;
-static lck_grp_attr_t * null_hashlck_grp_attr;
+static LCK_GRP_DECLARE(null_hashlck_grp, "com.apple.filesystems.nullfs");
+static LCK_MTX_DECLARE(null_hashmtx, &null_hashlck_grp);
static u_long null_hash_mask;
/* os x doesn't have hashes built into vnode. gonna try doing what freebsd does
static int null_hashins(struct mount *, struct null_node *, struct vnode **);
-int
+void
nullfs_init_lck(lck_mtx_t * lck)
{
- int error = 1;
- if (lck && null_hashlck_grp && null_hashlck_attr) {
- lck_mtx_init(lck, null_hashlck_grp, null_hashlck_attr);
- error = 0;
- }
- return error;
+ lck_mtx_init(lck, &null_hashlck_grp, LCK_ATTR_NULL);
}
-int
+void
nullfs_destroy_lck(lck_mtx_t * lck)
{
- int error = 1;
- if (lck && null_hashlck_grp) {
- lck_mtx_destroy(lck, null_hashlck_grp);
- error = 0;
- }
- return error;
+ lck_mtx_destroy(lck, &null_hashlck_grp);
}
/*
nullfs_init(__unused struct vfsconf * vfsp)
{
NULLFSDEBUG("%s\n", __FUNCTION__);
-
- /* assuming for now that this happens immediately and by default after fs
- * installation */
- null_hashlck_grp_attr = lck_grp_attr_alloc_init();
- if (null_hashlck_grp_attr == NULL) {
- goto error;
- }
- null_hashlck_grp = lck_grp_alloc_init("com.apple.filesystems.nullfs", null_hashlck_grp_attr);
- if (null_hashlck_grp == NULL) {
- goto error;
- }
- null_hashlck_attr = lck_attr_alloc_init();
- if (null_hashlck_attr == NULL) {
- goto error;
- }
-
- lck_mtx_init(&null_hashmtx, null_hashlck_grp, null_hashlck_attr);
null_node_hashtbl = hashinit(NULL_HASH_SIZE, M_TEMP, &null_hash_mask);
NULLFSDEBUG("%s finished\n", __FUNCTION__);
return 0;
-error:
- printf("NULLFS: failed to get lock element\n");
- if (null_hashlck_grp_attr) {
- lck_grp_attr_free(null_hashlck_grp_attr);
- null_hashlck_grp_attr = NULL;
- }
- if (null_hashlck_grp) {
- lck_grp_free(null_hashlck_grp);
- null_hashlck_grp = NULL;
- }
- if (null_hashlck_attr) {
- lck_attr_free(null_hashlck_attr);
- null_hashlck_attr = NULL;
- }
- return KERN_FAILURE;
}
int
-nullfs_uninit()
+nullfs_uninit(void)
{
/* This gets called when the fs is uninstalled, there wasn't an exact
* equivalent in vfsops */
- lck_mtx_destroy(&null_hashmtx, null_hashlck_grp);
hashdestroy(null_node_hashtbl, M_TEMP, null_hash_mask);
- if (null_hashlck_grp_attr) {
- lck_grp_attr_free(null_hashlck_grp_attr);
- null_hashlck_grp_attr = NULL;
- }
- if (null_hashlck_grp) {
- lck_grp_free(null_hashlck_grp);
- null_hashlck_grp = NULL;
- }
- if (null_hashlck_attr) {
- lck_attr_free(null_hashlck_attr);
- null_hashlck_attr = NULL;
- }
return 0;
}
vnode_ref(vp);
vnode_put(vp);
- error = nullfs_init_lck(&xmp->nullm_lock);
- if (error) {
- goto error;
- }
+ nullfs_init_lck(&xmp->nullm_lock);
xmp->nullm_rootvp = vp;
error = vnode_getattr(lvp, &va, ctx);
if (error || !VATTR_IS_SUPPORTED(&va, va_parentid)) {
+ if (!error) {
+ error = ENOTSUP;
+ }
goto end;
}
if (error == 0) {
*ap->a_vpp = vp;
}
- }
-
- /* if we got lvp, drop the iocount from VNOP_LOOKUP */
- if (lvp != NULL) {
- vnode_put(lvp);
+ /* if we got lvp, drop the iocount from VNOP_LOOKUP */
+ if (lvp != NULL) {
+ vnode_put(lvp);
+ }
}
nullfs_cleanup_patched_context(null_mp, ectx);
__BEGIN_DECLS
int nullfs_init(struct vfsconf * vfsp);
-int nullfs_init_lck(lck_mtx_t * lck);
-int nullfs_destroy_lck(lck_mtx_t * lck);
+void nullfs_init_lck(lck_mtx_t * lck);
+void nullfs_destroy_lck(lck_mtx_t * lck);
int nullfs_uninit(void);
int null_nodeget(
struct mount * mp, struct vnode * lowervp, struct vnode * dvp, struct vnode ** vpp, struct componentname * cnp, int root);
static int routefserr_setlabel(__unused struct vnop_setlabel_args * args);
-lck_grp_t * routefs_lck_grp;
-lck_grp_attr_t * routefs_lck_grp_attr;
-lck_attr_t * routefs_lck_attr;
-lck_mtx_t routefs_mutex;
+LCK_GRP_DECLARE(routefs_lck_grp, "routefs_lock");
+LCK_MTX_DECLARE(routefs_mutex, &routefs_lck_grp);;
#define ROUTEFS_LOCK() lck_mtx_lock(&routefs_mutex)
#define ROUTEFS_UNLOCK() lck_mtx_unlock(&routefs_mutex)
-static int _lock_inited = 0;
static boolean_t _fs_alreadyMounted = FALSE; /* atleast a mount of this filesystem is present */
static int
routefs_init(__unused struct vfsconf *vfsp)
{
- routefs_lck_grp_attr = lck_grp_attr_alloc_init();
- routefs_lck_grp = lck_grp_alloc_init("routefs_lock", routefs_lck_grp_attr);
- routefs_lck_attr = lck_attr_alloc_init();
- lck_mtx_init(&routefs_mutex, routefs_lck_grp, routefs_lck_attr);
- _lock_inited = 1;
-
return 0;
}
#include <sys/kdebug.h>
#include <libkern/section_keywords.h>
+#if CONFIG_IO_COMPRESSION_STATS
+#include <vfs/vfs_io_compression_stats.h>
+#endif /* CONFIG_IO_COMPRESSION_STATS */
+
/* XXX following three prototypes should be in a header file somewhere */
extern dev_t chrtoblk(dev_t dev);
extern boolean_t iskmemdev(dev_t dev);
SYSCTL_INT(_debug, OID_AUTO, lowpri_throttle_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &lowpri_throttle_enabled, 0, "");
-static lck_grp_t *throttle_lock_grp;
-static lck_attr_t *throttle_lock_attr;
-static lck_grp_attr_t *throttle_lock_grp_attr;
+static LCK_GRP_DECLARE(throttle_lock_grp, "throttle I/O");
/*
if ((info->throttle_refcnt == 0) && (info->throttle_alloc)) {
DEBUG_ALLOC_THROTTLE_INFO("Freeing info = %p\n", info);
- lck_mtx_destroy(&info->throttle_lock, throttle_lock_grp);
+ lck_mtx_destroy(&info->throttle_lock, &throttle_lock_grp);
FREE(info, M_TEMP);
}
return oldValue;
#if CONFIG_IOSCHED
int iosched;
#endif
- /*
- * allocate lock group attribute and group
- */
- throttle_lock_grp_attr = lck_grp_attr_alloc_init();
- throttle_lock_grp = lck_grp_alloc_init("throttle I/O", throttle_lock_grp_attr);
/* Update throttle parameters based on device tree configuration */
throttle_init_throttle_window();
- /*
- * allocate the lock attribute
- */
- throttle_lock_attr = lck_attr_alloc_init();
-
for (i = 0; i < LOWPRI_MAX_NUM_DEV; i++) {
info = &_throttle_io_info[i];
- lck_mtx_init(&info->throttle_lock, throttle_lock_grp, throttle_lock_attr);
+ lck_mtx_init(&info->throttle_lock, &throttle_lock_grp, LCK_ATTR_NULL);
info->throttle_timer_call = thread_call_allocate((thread_call_func_t)throttle_timer, (thread_call_param_t)info);
for (level = 0; level <= THROTTLE_LEVEL_END; level++) {
DEBUG_ALLOC_THROTTLE_INFO("Creating info = %p\n", info, info );
info->throttle_alloc = TRUE;
- lck_mtx_init(&info->throttle_lock, throttle_lock_grp, throttle_lock_attr);
+ lck_mtx_init(&info->throttle_lock, &throttle_lock_grp, LCK_ATTR_NULL);
info->throttle_timer_call = thread_call_allocate((thread_call_func_t)throttle_timer, (thread_call_param_t)info);
for (level = 0; level <= THROTTLE_LEVEL_END; level++) {
return proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO);
}
+int
+throttle_thread_io_tier_above_metadata(void)
+{
+ return throttle_get_thread_effective_io_policy() < IOSCHED_METADATA_TIER;
+}
+
void
throttle_info_reset_window(uthread_t ut)
{
#if CONFIG_IOSCHED
/*
- * For I/O Scheduling, we currently do not have a way to track and expedite metadata I/Os.
- * To ensure we dont get into priority inversions due to metadata I/Os, we use the following rules:
- * For metadata reads, ceil all I/Os to IOSCHED_METADATA_TIER & mark them passive if the I/O tier was upgraded
- * For metadata writes, unconditionally mark them as IOSCHED_METADATA_TIER and passive
+ * For metadata reads, ceil the I/O tier to IOSCHED_METADATA_EXPEDITED_TIER if they are expedited, otherwise
+ * ceil it to IOSCHED_METADATA_TIER. Mark them passive if the I/O tier was upgraded.
+ * For metadata writes, set the I/O tier to IOSCHED_METADATA_EXPEDITED_TIER if they are expedited. Otherwise
+ * set it to IOSCHED_METADATA_TIER. In addition, mark them as passive.
*/
if (bap->ba_flags & BA_META) {
if ((mp && (mp->mnt_ioflags & MNT_IOFLAGS_IOSCHED_SUPPORTED)) || (bap->ba_flags & BA_IO_SCHEDULED)) {
if (bp->b_flags & B_READ) {
- if (io_tier > IOSCHED_METADATA_TIER) {
+ if ((bap->ba_flags & BA_EXPEDITED_META_IO) && (io_tier > IOSCHED_METADATA_EXPEDITED_TIER)) {
+ io_tier = IOSCHED_METADATA_EXPEDITED_TIER;
+ passive = 1;
+ } else if (io_tier > IOSCHED_METADATA_TIER) {
io_tier = IOSCHED_METADATA_TIER;
passive = 1;
}
} else {
- io_tier = IOSCHED_METADATA_TIER;
+ if (bap->ba_flags & BA_EXPEDITED_META_IO) {
+ io_tier = IOSCHED_METADATA_EXPEDITED_TIER;
+ } else {
+ io_tier = IOSCHED_METADATA_TIER;
+ }
passive = 1;
}
}
buf_kernel_addrperm_addr(bp), bdev, buf_blkno(bp), buf_count(bp), 0);
}
+#if CONFIG_IO_COMPRESSION_STATS
+ io_compression_stats(bp);
+#endif /* CONFIG_IO_COMPRESSION_STATS */
thread_update_io_stats(current_thread(), buf_count(bp), code);
if (mp != NULL) {
#define CLASSQ_PKT_INITIALIZER(_p) \
(classq_pkt_t){ .cp_mbuf = NULL, .cp_ptype = QP_INVALID }
+#define CLASSQ_PKT_INIT(_p) do { \
+ (_p)->cp_ptype = QP_INVALID; \
+ (_p)->cp_mbuf = NULL; \
+} while (0)
+
#define CLASSQ_PKT_INIT_MBUF(_p, _m) do { \
(_p)->cp_ptype = QP_MBUF; \
(_p)->cp_mbuf = (_m); \
#define CLASSQF_ECN (CLASSQF_ECN4 | CLASSQF_ECN6)
extern u_int32_t classq_verbose;
+#if DEBUG || DEVELOPMENT
+extern uint16_t fq_codel_quantum;
+#endif /* DEBUG || DEVELOPMENT */
SYSCTL_DECL(_net_classq);
if (ptype == QP_MBUF) {
MBUFQ_INIT(&fq->fq_mbufq);
}
+ CLASSQ_PKT_INIT(&fq->fq_dq_head);
+ CLASSQ_PKT_INIT(&fq->fq_dq_tail);
+ fq->fq_in_dqlist = false;
return fq;
}
void
fq_destroy(fq_t *fq)
{
+ VERIFY(fq->fq_flags & FQF_DESTROYED);
VERIFY(fq_empty(fq));
VERIFY(!(fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW)));
VERIFY(fq->fq_bytes == 0);
*/
FQ_SET_DELAY_HIGH(flowq);
fq_cl->fcl_stat.fcl_dequeue_stall++;
+ os_log_error(OS_LOG_DEFAULT, "%s: dequeue stall num: %d, "
+ "scidx: %d, flow: 0x%x, iface: %s", __func__,
+ fq_cl->fcl_stat.fcl_dequeue_stall, flowq->fq_sc_index,
+ flowq->fq_flowhash, if_name(fqs->fqs_ifq->ifcq_ifp));
}
}
/* Set the return code correctly */
if (__improbable(fc_adv == 1 && droptype != DTYPE_FORCED)) {
- if (fq_if_add_fcentry(fqs, pkt, pkt_flowid, pkt_flowsrc,
- fq_cl)) {
+ if (fq_if_add_fcentry(fqs, pkt, pkt_flowsrc, fq, fq_cl)) {
fq->fq_flags |= FQF_FLOWCTL_ON;
/* deliver flow control advisory error */
if (droptype == DTYPE_NODROP) {
*/
if (fq_empty(fq) && !(fq->fq_flags &
(FQF_NEW_FLOW | FQF_OLD_FLOW))) {
- fq_if_destroy_flow(fqs, fq_cl, fq);
+ fq_if_destroy_flow(fqs, fq_cl, fq, true);
fq = NULL;
}
} else {
if (fq->fq_min_qdelay > fqs->fqs_target_qdelay) {
if (!FQ_IS_DELAYHIGH(fq)) {
FQ_SET_DELAY_HIGH(fq);
+ os_log_error(OS_LOG_DEFAULT,
+ "%s: high delay idx: %d, %llu, flow: 0x%x, "
+ "iface: %s", __func__, fq->fq_sc_index,
+ fq->fq_min_qdelay, fq->fq_flowhash,
+ if_name(fqs->fqs_ifq->ifcq_ifp));
}
} else {
FQ_CLEAR_DELAY_HIGH(fq);
#define FQF_NEW_FLOW 0x04 /* Currently on new flows queue */
#define FQF_OLD_FLOW 0x08 /* Currently on old flows queue */
#define FQF_FLOWCTL_ON 0x10 /* Currently flow controlled */
+#define FQF_DESTROYED 0x80 /* flowq destroyed */
uint8_t fq_flags; /* flags */
uint8_t fq_sc_index; /* service_class index */
- int16_t fq_deficit; /* Deficit for scheduling */
+ int32_t fq_deficit; /* Deficit for scheduling */
uint32_t fq_bytes; /* Number of bytes in the queue */
uint64_t fq_min_qdelay; /* min queue delay for Codel */
uint64_t fq_updatetime; /* next update interval */
STAILQ_ENTRY(flowq) fq_actlink; /* for new/old flow queues */
uint32_t fq_flowhash; /* Flow hash */
classq_pkt_type_t fq_ptype; /* Packet type */
+ /* temporary packet queue for dequeued packets */
+ classq_pkt_t fq_dq_head;
+ classq_pkt_t fq_dq_tail;
+ STAILQ_ENTRY(flowq) fq_dqlink; /* entry on dequeue flow list */
+ bool fq_in_dqlist;
} fq_t;
#define fq_mbufq __fq_pktq_u.__mbufq
CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_update_interval,
"update interval in nanoseconds");
+#if DEBUG || DEVELOPMENT
+uint32_t ifclassq_flow_control_adv = 1; /* flow control advisory */
+SYSCTL_UINT(_net_classq, OID_AUTO, flow_control_adv,
+ CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_flow_control_adv, 1,
+ "enable/disable flow control advisory");
+
+uint16_t fq_codel_quantum = 0;
+#endif /* DEBUG || DEVELOPMENT */
+
void
classq_init(void)
{
_CASSERT(MBUF_TC_BE == 0);
_CASSERT(MBUF_SC_BE == 0);
_CASSERT(IFCQ_SC_MAX == MBUF_SC_MAX_CLASSES);
-
+#if DEBUG || DEVELOPMENT
+ PE_parse_boot_argn("fq_codel_quantum", &fq_codel_quantum,
+ sizeof(fq_codel_quantum));
+ PE_parse_boot_argn("ifclassq_target_qdelay", &ifclassq_target_qdelay,
+ sizeof(ifclassq_target_qdelay));
+ PE_parse_boot_argn("ifclassq_update_interval",
+ &ifclassq_update_interval, sizeof(ifclassq_update_interval));
+#endif /* DEBUG || DEVELOPMENT */
fq_codel_init();
}
enum cqdq_op;
enum cqrq;
+#if DEBUG || DEVELOPMENT
+extern uint32_t ifclassq_flow_control_adv;
+#endif /* DEBUG || DEVELOPMENT */
+
typedef int (*ifclassq_enq_func)(struct ifclassq *, classq_pkt_t *,
boolean_t *);
typedef void (*ifclassq_deq_func)(struct ifclassq *, classq_pkt_t *);
}
}
-static bool
-cfil_socket_safe_lock(struct inpcb *inp)
-{
- if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
- socket_lock(inp->inp_socket, 1);
- if (in_pcb_checkstate(inp, WNT_RELEASE, 1) != WNT_STOPUSING) {
- return true;
- }
- socket_unlock(inp->inp_socket, 1);
- }
- return false;
-}
-
/*
- * cfil_socket_safe_lock_rip -
- * This routine attempts to lock the rip socket safely.
- * The passed in ripcbinfo is assumed to be locked and must be unlocked (regardless
- * of success/failure) before calling socket_unlock(). This is to avoid double
- * locking since rip_unlock() will lock ripcbinfo if it needs to dispose inpcb when
+ * cfil_socket_safe_lock -
+ * This routine attempts to lock the socket safely.
+ *
+ * The passed in pcbinfo is assumed to be locked and must be unlocked once the
+ * inp state is safeguarded and before we attempt to lock/unlock the socket.
+ * This is to prevent getting blocked by socket_lock() while holding the pcbinfo
+ * lock, avoiding potential deadlock with other processes contending for the same
+ * resources. This is also to avoid double locking the pcbinfo for rip sockets
+ * since rip_unlock() will lock ripcbinfo if it needs to dispose inpcb when
* so_usecount is 0.
*/
static bool
-cfil_socket_safe_lock_rip(struct inpcb *inp, struct inpcbinfo *pcbinfo)
+cfil_socket_safe_lock(struct inpcb *inp, struct inpcbinfo *pcbinfo)
{
struct socket *so = NULL;
VERIFY(pcbinfo != NULL);
if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
+ // Safeguarded the inp state, unlock pcbinfo before locking socket.
+ lck_rw_done(pcbinfo->ipi_lock);
+
so = inp->inp_socket;
socket_lock(so, 1);
if (in_pcb_checkstate(inp, WNT_RELEASE, 1) != WNT_STOPUSING) {
- lck_rw_done(pcbinfo->ipi_lock);
return true;
}
+ } else {
+ // Failed to safeguarded the inp state, unlock pcbinfo and abort.
+ lck_rw_done(pcbinfo->ipi_lock);
}
- lck_rw_done(pcbinfo->ipi_lock);
-
if (so) {
socket_unlock(so, 1);
}
inp->inp_flowhash == flowhash &&
(inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt &&
inp->inp_socket->so_cfil != NULL) {
- if (cfil_socket_safe_lock(inp)) {
+ if (cfil_socket_safe_lock(inp, pcbinfo)) {
so = inp->inp_socket;
}
- break;
+ /* pcbinfo is already unlocked, we are done. */
+ goto done;
}
}
lck_rw_done(pcbinfo->ipi_lock);
inp->inp_socket != NULL &&
inp->inp_socket->so_cfil_db != NULL &&
(inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt) {
- if (cfil_socket_safe_lock(inp)) {
+ if (cfil_socket_safe_lock(inp, pcbinfo)) {
so = inp->inp_socket;
}
- break;
+ /* pcbinfo is already unlocked, we are done. */
+ goto done;
}
}
lck_rw_done(pcbinfo->ipi_lock);
inp->inp_socket != NULL &&
inp->inp_socket->so_cfil_db != NULL &&
(inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt) {
- if (cfil_socket_safe_lock_rip(inp, pcbinfo)) {
+ if (cfil_socket_safe_lock(inp, pcbinfo)) {
so = inp->inp_socket;
}
/* pcbinfo is already unlocked, we are done. */
inp->inp_socket != NULL &&
uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
*cfil_attached = (inp->inp_socket->so_cfil != NULL);
- if (cfil_socket_safe_lock(inp)) {
+ if (cfil_socket_safe_lock(inp, pcbinfo)) {
so = inp->inp_socket;
}
- break;
+ /* pcbinfo is already unlocked, we are done. */
+ goto done;
}
}
lck_rw_done(pcbinfo->ipi_lock);
inp->inp_socket != NULL &&
uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
*cfil_attached = (inp->inp_socket->so_cfil_db != NULL);
- if (cfil_socket_safe_lock(inp)) {
+ if (cfil_socket_safe_lock(inp, pcbinfo)) {
so = inp->inp_socket;
}
- break;
+ /* pcbinfo is already unlocked, we are done. */
+ goto done;
}
}
lck_rw_done(pcbinfo->ipi_lock);
struct cfil_entry *entry;
struct cfe_buf *entrybuf;
struct cfil_queue *pending_q;
+ struct cfil_entry *iter_entry = NULL;
CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
(uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
passlen = entrybuf->cfe_pass_offset - pending_q->q_start;
+ if (cfil_queue_empty(pending_q)) {
+ for (iter_entry = SLIST_NEXT(entry, cfe_order_link);
+ iter_entry != NULL;
+ iter_entry = SLIST_NEXT(iter_entry, cfe_order_link)) {
+ error = cfil_data_service_ctl_q(so, cfil_info, CFI_ENTRY_KCUNIT(cfil_info, iter_entry), outgoing);
+ /* 0 means passed so we can continue */
+ if (error != 0) {
+ break;
+ }
+ }
+ goto done;
+ }
+
/*
* Locate the chunks of data that we can pass to the next filter
* A data chunk must be on mbuf boundaries
*/
curlen = 0;
while ((data = cfil_queue_first(pending_q)) != NULL) {
- struct cfil_entry *iter_entry;
datalen = cfil_data_length(data, NULL, NULL);
#if DATA_DEBUG
}
}
+done:
CFIL_INFO_VERIFY(cfil_info);
return error;
cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: GC CLEAN UP");
#endif
+ for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
+ /* Let the filters know of the closing */
+ if (cfil_dispatch_closed_event(so, cfil_info, kcunit) != 0) {
+ goto unlock;
+ }
+ }
+
cfil_db_delete_entry(db, hash_entry);
CFIL_INFO_FREE(cfil_info);
OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
}
goto next;
}
- if ((m->m_flags & M_PROMISC) != 0) {
+ /*
+ * A VLAN interface receives VLAN-tagged packets by attaching
+ * its PF_VLAN protocol to a parent interface. When a VLAN
+ * interface is a member of a bridge, the parent interface
+ * receives VLAN-tagged M_PROMISC packets. A VLAN-tagged
+ * M_PROMISC packet must be processed by the VLAN protocol
+ * so that it can be sent up the stack via
+ * dlil_input_packet_list(). That allows the bridge interface's
+ * input filter, attached to the VLAN interface, to process
+ * the packet.
+ */
+ if (protocol_family != PF_VLAN &&
+ (m->m_flags & M_PROMISC) != 0) {
m_freem(m);
goto next;
}
if ((raw != 0) && (ifp->if_family == IFNET_FAMILY_ETHERNET)) {
uint8_t vlan_encap_len = 0;
- if ((old_proto_family == PF_VLAN) &&
- ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0)) {
+ if ((m->m_pkthdr.csum_flags & CSUM_VLAN_ENCAP_PRESENT) != 0) {
vlan_encap_len = ETHER_VLAN_ENCAP_LEN;
}
m->m_pkthdr.pkt_hdr = mtod(m, char *) + ETHER_HDR_LEN + vlan_encap_len;
m->m_pkthdr.csum_tx_start += ETHER_VLAN_ENCAP_LEN;
m->m_pkthdr.csum_tx_stuff += ETHER_VLAN_ENCAP_LEN;
}
+ m->m_pkthdr.csum_flags |= CSUM_VLAN_ENCAP_PRESENT;
}
err = dlil_output(p, PF_VLAN, m, NULL, NULL, 1, &adv);
struct inpcb;
/* Private, internal implementation functions */
-extern void sflt_init(void);
extern int sflt_permission_check(struct inpcb *inp);
extern void sflt_initsock(struct socket *so);
extern void sflt_termsock(struct socket *so);
/*
- * Copyright (c) 2013-2020 Apple Inc. All rights reserved.
+ * Copyright (c) 2013-2021 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <corecrypto/cchmac.h>
#include <corecrypto/ccsha2.h>
#include <os/refcnt.h>
+#include <mach-o/loader.h>
#include <net/network_agent.h>
#include <net/necp.h>
#include <netinet/flow_divert_proto.h>
u_int32_t necp_drop_all_order = 0;
u_int32_t necp_drop_all_level = 0;
-#define NECP_LOOPBACK_PASS_ALL 1 // Pass all loopback traffic
-#define NECP_LOOPBACK_PASS_WITH_FILTER 2 // Pass all loopback traffic, but activate content filter and/or flow divert if applicable
-
-#if defined(XNU_TARGET_OS_OSX)
-#define NECP_LOOPBACK_PASS_DEFAULT NECP_LOOPBACK_PASS_WITH_FILTER
-#else
-#define NECP_LOOPBACK_PASS_DEFAULT NECP_LOOPBACK_PASS_ALL
-#endif
-
-u_int32_t necp_pass_loopback = NECP_LOOPBACK_PASS_DEFAULT;
+u_int32_t necp_pass_loopback = NECP_LOOPBACK_PASS_ALL;
u_int32_t necp_pass_keepalives = 1; // 0=Off, 1=On
u_int32_t necp_pass_interpose = 1; // 0=Off, 1=On
u_int32_t necp_restrict_multicast = 1; // 0=Off, 1=On
#define NECP_KERNEL_CONDITION_SIGNING_IDENTIFIER 0x10000000
#define NECP_KERNEL_CONDITION_PACKET_FILTER_TAGS 0x20000000
#define NECP_KERNEL_CONDITION_IS_LOOPBACK 0x40000000
+#define NECP_KERNEL_CONDITION_DELEGATE_IS_PLATFORM_BINARY 0x80000000
#define NECP_MAX_POLICY_RESULT_SIZE 512
#define NECP_MAX_ROUTE_RULES_ARRAY_SIZE 1024
struct necp_socket_info {
pid_t pid;
+ int32_t pid_version;
uid_t uid;
union necp_sockaddr_union local_addr;
union necp_sockaddr_union remote_addr;
unsigned is_platform_binary : 1;
unsigned used_responsible_pid : 1;
unsigned is_loopback : 1;
- unsigned __pad_bits : 4;
+ unsigned real_is_platform_binary : 1;
+ unsigned is_delegated : 1;
+ unsigned __pad_bits : 6;
};
static lck_grp_attr_t *necp_kernel_policy_grp_attr = NULL;
static bool necp_policy_delete(struct necp_session *session, struct necp_session_policy *policy);
static void necp_policy_apply_all(struct necp_session *session);
-static necp_kernel_policy_id necp_kernel_socket_policy_add(necp_policy_order order, u_int32_t session_order, int session_pid, u_int32_t condition_mask, u_int32_t condition_negated_mask, necp_app_id cond_app_id, necp_app_id cond_real_app_id, char *cond_custom_entitlement, u_int32_t cond_account_id, char *domain, pid_t cond_pid, uid_t cond_uid, ifnet_t cond_bound_interface, struct necp_policy_condition_tc_range cond_traffic_class, u_int16_t cond_protocol, union necp_sockaddr_union *cond_local_start, union necp_sockaddr_union *cond_local_end, u_int8_t cond_local_prefix, union necp_sockaddr_union *cond_remote_start, union necp_sockaddr_union *cond_remote_end, u_int8_t cond_remote_prefix, struct necp_policy_condition_agent_type *cond_agent_type, struct necp_policy_condition_sdk_version *cond_sdk_version, u_int32_t cond_client_flags, char *cond_signing_identifier, u_int16_t cond_packet_filter_tags, necp_kernel_policy_result result, necp_kernel_policy_result_parameter result_parameter);
+static necp_kernel_policy_id necp_kernel_socket_policy_add(necp_policy_order order, u_int32_t session_order, int session_pid, u_int32_t condition_mask, u_int32_t condition_negated_mask, necp_app_id cond_app_id, necp_app_id cond_real_app_id, char *cond_custom_entitlement, u_int32_t cond_account_id, char *domain, pid_t cond_pid, int32_t cond_pidversion, uid_t cond_uid, ifnet_t cond_bound_interface, struct necp_policy_condition_tc_range cond_traffic_class, u_int16_t cond_protocol, union necp_sockaddr_union *cond_local_start, union necp_sockaddr_union *cond_local_end, u_int8_t cond_local_prefix, union necp_sockaddr_union *cond_remote_start, union necp_sockaddr_union *cond_remote_end, u_int8_t cond_remote_prefix, struct necp_policy_condition_agent_type *cond_agent_type, struct necp_policy_condition_sdk_version *cond_sdk_version, u_int32_t cond_client_flags, char *cond_signing_identifier, u_int16_t cond_packet_filter_tags, necp_kernel_policy_result result, necp_kernel_policy_result_parameter result_parameter);
static bool necp_kernel_socket_policy_delete(necp_kernel_policy_id policy_id);
static bool necp_kernel_socket_policies_reprocess(void);
static bool necp_kernel_socket_policies_update_uuid_table(void);
static struct necp_uuid_id_mapping *necp_uuid_lookup_uuid_with_service_id_locked(u_int32_t local_id);
static u_int32_t necp_create_uuid_service_id_mapping(uuid_t uuid);
static bool necp_remove_uuid_service_id_mapping(uuid_t uuid);
+static bool necp_remove_uuid_service_id_mapping_with_service_id(u_int32_t service_id);
struct necp_string_id_mapping {
LIST_ENTRY(necp_string_id_mapping) chain;
struct necp_route_rule {
LIST_ENTRY(necp_route_rule) chain;
u_int32_t id;
- u_int32_t default_action;
+ u_int32_t netagent_id;
+ u_int8_t default_action;
u_int8_t cellular_action;
u_int8_t wifi_action;
u_int8_t wired_action;
static u_int32_t necp_create_route_rule(struct necp_route_rule_list *list, u_int8_t *route_rules_array, u_int32_t route_rules_array_size);
static bool necp_remove_route_rule(struct necp_route_rule_list *list, u_int32_t route_rule_id);
static bool necp_route_is_allowed(struct rtentry *route, ifnet_t interface, u_int32_t route_rule_id, u_int32_t *interface_type_denied);
+static uint32_t necp_route_get_netagent(struct rtentry *route, u_int32_t route_rule_id);
static struct necp_route_rule *necp_lookup_route_rule_locked(struct necp_route_rule_list *list, u_int32_t route_rule_id);
static inline void necp_get_parent_cred_result(proc_t proc, struct necp_socket_info *info);
validated = TRUE;
break;
}
+ case NECP_POLICY_CONDITION_DELEGATE_IS_PLATFORM_BINARY: {
+ validated = TRUE;
+ break;
+ }
default: {
validated = FALSE;
break;
validated = TRUE;
break;
}
+ case NECP_ROUTE_RULE_USE_NETAGENT: {
+ u_int32_t rule_length = necp_policy_condition_get_value_length_from_buffer(buffer, length);
+ validated = (rule_length >= sizeof(uuid_t));
+ break;
+ }
default: {
validated = FALSE;
break;
num_conditions++;
}
if (condition_mask & NECP_KERNEL_CONDITION_PID) {
- condition_tlv_length += sizeof(pid_t);
+ condition_tlv_length += (sizeof(pid_t) + sizeof(int32_t));
num_conditions++;
}
if (condition_mask & NECP_KERNEL_CONDITION_UID) {
if (condition_mask & NECP_KERNEL_CONDITION_IS_LOOPBACK) {
num_conditions++;
}
+ if (condition_mask & NECP_KERNEL_CONDITION_DELEGATE_IS_PLATFORM_BINARY) {
+ num_conditions++;
+ }
}
condition_tlv_length += num_conditions * (sizeof(u_int8_t) + sizeof(u_int32_t)); // These are for the condition TLVs. The space for "value" is already accounted for above.
}
}
if (condition_mask & NECP_KERNEL_CONDITION_PID) {
- cond_buf_cursor = necp_buffer_write_tlv(cond_buf_cursor, NECP_POLICY_CONDITION_PID, sizeof(policy->cond_pid), &policy->cond_pid,
+ uint8_t pid_buffer[sizeof(policy->cond_pid) + sizeof(policy->cond_pid_version)] = { };
+ memcpy(pid_buffer, &policy->cond_pid, sizeof(policy->cond_pid));
+ memcpy(pid_buffer + sizeof(policy->cond_pid), &policy->cond_pid_version, sizeof(policy->cond_pid_version));
+ cond_buf_cursor = necp_buffer_write_tlv(cond_buf_cursor, NECP_POLICY_CONDITION_PID, sizeof(pid_buffer), &pid_buffer,
cond_buf, condition_tlv_length);
}
if (condition_mask & NECP_KERNEL_CONDITION_UID) {
if (condition_mask & NECP_KERNEL_CONDITION_IS_LOOPBACK) {
cond_buf_cursor = necp_buffer_write_tlv(cond_buf_cursor, NECP_POLICY_CONDITION_FLOW_IS_LOOPBACK, 0, "", cond_buf, condition_tlv_length);
}
+ if (condition_mask & NECP_KERNEL_CONDITION_DELEGATE_IS_PLATFORM_BINARY) {
+ cond_buf_cursor = necp_buffer_write_tlv(cond_buf_cursor, NECP_POLICY_CONDITION_DELEGATE_IS_PLATFORM_BINARY, 0, "", cond_buf, condition_tlv_length);
+ }
}
cursor = necp_buffer_write_tlv(cursor, NECP_TLV_POLICY_CONDITION, cond_buf_cursor - cond_buf, cond_buf, tlv_buffer, total_allocated_bytes);
char *cond_custom_entitlement = NULL;
char *cond_signing_identifier = NULL;
pid_t cond_pid = 0;
+ int32_t cond_pid_version = 0;
uid_t cond_uid = 0;
necp_app_id cond_app_id = 0;
necp_app_id cond_real_app_id = 0;
master_condition_negated_mask |= NECP_KERNEL_CONDITION_PID;
}
memcpy(&cond_pid, condition_value, sizeof(cond_pid));
+ if (condition_length >= (sizeof(pid_t) + sizeof(cond_pid_version))) {
+ memcpy(&cond_pid_version, (condition_value + sizeof(pid_t)), sizeof(cond_pid_version));
+ }
socket_only_conditions = TRUE;
}
break;
socket_only_conditions = TRUE;
break;
}
+ case NECP_POLICY_CONDITION_DELEGATE_IS_PLATFORM_BINARY: {
+ master_condition_mask |= NECP_KERNEL_CONDITION_DELEGATE_IS_PLATFORM_BINARY;
+ if (condition_is_negative) {
+ master_condition_negated_mask |= NECP_KERNEL_CONDITION_DELEGATE_IS_PLATFORM_BINARY;
+ }
+ socket_only_conditions = TRUE;
+ break;
+ }
default: {
break;
}
}
if (socket_layer_non_id_conditions) {
- necp_kernel_policy_id policy_id = necp_kernel_socket_policy_add(policy->order, session->session_order, session->proc_pid, master_condition_mask, master_condition_negated_mask, cond_app_id, cond_real_app_id, cond_custom_entitlement, cond_account_id, cond_domain, cond_pid, cond_uid, cond_bound_interface, cond_traffic_class, cond_protocol, &cond_local_start, &cond_local_end, cond_local_prefix, &cond_remote_start, &cond_remote_end, cond_remote_prefix, &cond_agent_type, &cond_sdk_version, cond_client_flags, cond_signing_identifier, cond_packet_filter_tags, ultimate_result, ultimate_result_parameter);
+ necp_kernel_policy_id policy_id = necp_kernel_socket_policy_add(policy->order, session->session_order, session->proc_pid, master_condition_mask, master_condition_negated_mask, cond_app_id, cond_real_app_id, cond_custom_entitlement, cond_account_id, cond_domain, cond_pid, cond_pid_version, cond_uid, cond_bound_interface, cond_traffic_class, cond_protocol, &cond_local_start, &cond_local_end, cond_local_prefix, &cond_remote_start, &cond_remote_end, cond_remote_prefix, &cond_agent_type, &cond_sdk_version, cond_client_flags, cond_signing_identifier, cond_packet_filter_tags, ultimate_result, ultimate_result_parameter);
if (policy_id == 0) {
NECPLOG0(LOG_DEBUG, "Error applying socket kernel policy");
return newid;
}
-#define NECP_KERNEL_VALID_SOCKET_CONDITIONS (NECP_KERNEL_CONDITION_APP_ID | NECP_KERNEL_CONDITION_REAL_APP_ID | NECP_KERNEL_CONDITION_DOMAIN | NECP_KERNEL_CONDITION_ACCOUNT_ID | NECP_KERNEL_CONDITION_PID | NECP_KERNEL_CONDITION_UID | NECP_KERNEL_CONDITION_ALL_INTERFACES | NECP_KERNEL_CONDITION_BOUND_INTERFACE | NECP_KERNEL_CONDITION_TRAFFIC_CLASS | NECP_KERNEL_CONDITION_PROTOCOL | NECP_KERNEL_CONDITION_LOCAL_START | NECP_KERNEL_CONDITION_LOCAL_END | NECP_KERNEL_CONDITION_LOCAL_PREFIX | NECP_KERNEL_CONDITION_REMOTE_START | NECP_KERNEL_CONDITION_REMOTE_END | NECP_KERNEL_CONDITION_REMOTE_PREFIX | NECP_KERNEL_CONDITION_ENTITLEMENT | NECP_KERNEL_CONDITION_CUSTOM_ENTITLEMENT | NECP_KERNEL_CONDITION_AGENT_TYPE | NECP_KERNEL_CONDITION_HAS_CLIENT | NECP_KERNEL_CONDITION_LOCAL_NETWORKS | NECP_KERNEL_CONDITION_CLIENT_FLAGS | NECP_KERNEL_CONDITION_LOCAL_EMPTY | NECP_KERNEL_CONDITION_REMOTE_EMPTY | NECP_KERNEL_CONDITION_PLATFORM_BINARY | NECP_KERNEL_CONDITION_SDK_VERSION | NECP_KERNEL_CONDITION_SIGNING_IDENTIFIER | NECP_KERNEL_CONDITION_PACKET_FILTER_TAGS | NECP_KERNEL_CONDITION_IS_LOOPBACK)
+#define NECP_KERNEL_VALID_SOCKET_CONDITIONS (NECP_KERNEL_CONDITION_APP_ID | NECP_KERNEL_CONDITION_REAL_APP_ID | NECP_KERNEL_CONDITION_DOMAIN | NECP_KERNEL_CONDITION_ACCOUNT_ID | NECP_KERNEL_CONDITION_PID | NECP_KERNEL_CONDITION_UID | NECP_KERNEL_CONDITION_ALL_INTERFACES | NECP_KERNEL_CONDITION_BOUND_INTERFACE | NECP_KERNEL_CONDITION_TRAFFIC_CLASS | NECP_KERNEL_CONDITION_PROTOCOL | NECP_KERNEL_CONDITION_LOCAL_START | NECP_KERNEL_CONDITION_LOCAL_END | NECP_KERNEL_CONDITION_LOCAL_PREFIX | NECP_KERNEL_CONDITION_REMOTE_START | NECP_KERNEL_CONDITION_REMOTE_END | NECP_KERNEL_CONDITION_REMOTE_PREFIX | NECP_KERNEL_CONDITION_ENTITLEMENT | NECP_KERNEL_CONDITION_CUSTOM_ENTITLEMENT | NECP_KERNEL_CONDITION_AGENT_TYPE | NECP_KERNEL_CONDITION_HAS_CLIENT | NECP_KERNEL_CONDITION_LOCAL_NETWORKS | NECP_KERNEL_CONDITION_CLIENT_FLAGS | NECP_KERNEL_CONDITION_LOCAL_EMPTY | NECP_KERNEL_CONDITION_REMOTE_EMPTY | NECP_KERNEL_CONDITION_PLATFORM_BINARY | NECP_KERNEL_CONDITION_SDK_VERSION | NECP_KERNEL_CONDITION_SIGNING_IDENTIFIER | NECP_KERNEL_CONDITION_PACKET_FILTER_TAGS | NECP_KERNEL_CONDITION_IS_LOOPBACK | NECP_KERNEL_CONDITION_DELEGATE_IS_PLATFORM_BINARY)
static necp_kernel_policy_id
-necp_kernel_socket_policy_add(necp_policy_order order, u_int32_t session_order, int session_pid, u_int32_t condition_mask, u_int32_t condition_negated_mask, necp_app_id cond_app_id, necp_app_id cond_real_app_id, char *cond_custom_entitlement, u_int32_t cond_account_id, char *cond_domain, pid_t cond_pid, uid_t cond_uid, ifnet_t cond_bound_interface, struct necp_policy_condition_tc_range cond_traffic_class, u_int16_t cond_protocol, union necp_sockaddr_union *cond_local_start, union necp_sockaddr_union *cond_local_end, u_int8_t cond_local_prefix, union necp_sockaddr_union *cond_remote_start, union necp_sockaddr_union *cond_remote_end, u_int8_t cond_remote_prefix, struct necp_policy_condition_agent_type *cond_agent_type, struct necp_policy_condition_sdk_version *cond_sdk_version, u_int32_t cond_client_flags, char *cond_signing_identifier, u_int16_t cond_packet_filter_tags, necp_kernel_policy_result result, necp_kernel_policy_result_parameter result_parameter)
+necp_kernel_socket_policy_add(necp_policy_order order, u_int32_t session_order, int session_pid, u_int32_t condition_mask, u_int32_t condition_negated_mask, necp_app_id cond_app_id, necp_app_id cond_real_app_id, char *cond_custom_entitlement, u_int32_t cond_account_id, char *cond_domain, pid_t cond_pid, int32_t cond_pid_version, uid_t cond_uid, ifnet_t cond_bound_interface, struct necp_policy_condition_tc_range cond_traffic_class, u_int16_t cond_protocol, union necp_sockaddr_union *cond_local_start, union necp_sockaddr_union *cond_local_end, u_int8_t cond_local_prefix, union necp_sockaddr_union *cond_remote_start, union necp_sockaddr_union *cond_remote_end, u_int8_t cond_remote_prefix, struct necp_policy_condition_agent_type *cond_agent_type, struct necp_policy_condition_sdk_version *cond_sdk_version, u_int32_t cond_client_flags, char *cond_signing_identifier, u_int16_t cond_packet_filter_tags, necp_kernel_policy_result result, necp_kernel_policy_result_parameter result_parameter)
{
struct necp_kernel_socket_policy *new_kernel_policy = NULL;
struct necp_kernel_socket_policy *tmp_kernel_policy = NULL;
}
if (new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_PID) {
new_kernel_policy->cond_pid = cond_pid;
+ new_kernel_policy->cond_pid_version = cond_pid_version;
}
if (new_kernel_policy->condition_mask & NECP_KERNEL_CONDITION_UID) {
new_kernel_policy->cond_uid = cond_uid;
}
if (compared_policy->condition_mask & NECP_KERNEL_CONDITION_PID &&
- compared_policy->cond_pid != policy->cond_pid) {
+ (compared_policy->cond_pid != policy->cond_pid || compared_policy->cond_pid_version != policy->cond_pid_version)) {
continue;
}
}
static struct necp_route_rule *
-necp_lookup_route_rule_by_contents_locked(struct necp_route_rule_list *list, u_int32_t default_action, u_int8_t cellular_action, u_int8_t wifi_action, u_int8_t wired_action, u_int8_t expensive_action, u_int8_t constrained_action, u_int32_t *if_indices, u_int8_t *if_actions)
+necp_lookup_route_rule_by_contents_locked(struct necp_route_rule_list *list, u_int8_t default_action, u_int8_t cellular_action, u_int8_t wifi_action, u_int8_t wired_action, u_int8_t expensive_action, u_int8_t constrained_action, u_int32_t *if_indices, u_int8_t *if_actions, uuid_t netagent_uuid)
{
struct necp_route_rule *searchentry = NULL;
struct necp_route_rule *foundentry = NULL;
break;
}
}
- if (!match_failed && count_a == count_b) {
- foundentry = searchentry;
- break;
+
+ if (match_failed || count_a != count_b) {
+ continue;
+ }
+
+ bool has_agent_a = uuid_is_null(netagent_uuid);
+ bool has_agent_b = (searchentry->netagent_id != 0);
+ if (has_agent_a != has_agent_b) {
+ continue;
}
+
+ if (has_agent_a) {
+ struct necp_uuid_id_mapping *mapping = necp_uuid_lookup_uuid_with_service_id_locked(searchentry->netagent_id);
+ if (mapping == NULL) {
+ // Bad mapping, doesn't match
+ continue;
+ }
+ if (uuid_compare(mapping->uuid, netagent_uuid) != 0) {
+ // UUIDs don't match
+ continue;
+ }
+ }
+
+ // Rules match!
+ foundentry = searchentry;
+ break;
}
}
size_t offset = 0;
u_int32_t route_rule_id = 0;
struct necp_route_rule *existing_rule = NULL;
- u_int32_t default_action = NECP_ROUTE_RULE_ALLOW_INTERFACE;
+ u_int8_t default_action = NECP_ROUTE_RULE_ALLOW_INTERFACE;
u_int8_t cellular_action = NECP_ROUTE_RULE_NONE;
u_int8_t wifi_action = NECP_ROUTE_RULE_NONE;
u_int8_t wired_action = NECP_ROUTE_RULE_NONE;
u_int8_t if_actions[MAX_ROUTE_RULE_INTERFACES];
memset(&if_actions, 0, sizeof(if_actions));
+ uuid_t netagent_uuid = {};
+
LCK_RW_ASSERT(&necp_kernel_policy_lock, LCK_RW_ASSERT_EXCLUSIVE);
if (route_rules_array == NULL || route_rules_array_size == 0) {
}
// Process rules
- while (offset < route_rules_array_size) {
+ while ((offset + sizeof(u_int8_t) + sizeof(u_int32_t)) < route_rules_array_size) {
ifnet_t rule_interface = NULL;
char interface_name[IFXNAMSIZ];
u_int32_t length = 0;
u_int8_t *value = necp_buffer_get_tlv_value(route_rules_array, offset, &length);
+ if (offset + sizeof(u_int8_t) + sizeof(u_int32_t) + length > route_rules_array_size) {
+ // Invalid TLV goes beyond end of the rules array
+ break;
+ }
+
+ // Increment offset for the next time through the loop
+ offset += sizeof(u_int8_t) + sizeof(u_int32_t) + length;
+
u_int8_t rule_type = necp_policy_condition_get_type_from_buffer(value, length);
u_int8_t rule_flags = necp_policy_condition_get_flags_from_buffer(value, length);
u_int32_t rule_length = necp_policy_condition_get_value_length_from_buffer(value, length);
continue;
}
+ if (rule_type == NECP_ROUTE_RULE_USE_NETAGENT) {
+ if (rule_length < sizeof(uuid_t)) {
+ // Too short, skip
+ continue;
+ }
+
+ if (!uuid_is_null(netagent_uuid)) {
+ if (uuid_compare(netagent_uuid, rule_value) != 0) {
+ // UUIDs don't match, skip
+ continue;
+ }
+ } else {
+ // Copy out agent UUID
+ memcpy(netagent_uuid, rule_value, sizeof(netagent_uuid));
+ }
+
+ // Adjust remaining length
+ rule_value += sizeof(netagent_uuid);
+ rule_length -= sizeof(netagent_uuid);
+ }
+
if (rule_length == 0) {
if (rule_flags & NECP_ROUTE_RULE_FLAG_CELLULAR) {
cellular_action = rule_type;
if (rule_flags == 0) {
default_action = rule_type;
}
- offset += sizeof(u_int8_t) + sizeof(u_int32_t) + length;
continue;
}
if (num_valid_indices >= MAX_ROUTE_RULE_INTERFACES) {
- offset += sizeof(u_int8_t) + sizeof(u_int32_t) + length;
continue;
}
ifnet_release(rule_interface);
}
}
- offset += sizeof(u_int8_t) + sizeof(u_int32_t) + length;
}
- existing_rule = necp_lookup_route_rule_by_contents_locked(list, default_action, cellular_action, wifi_action, wired_action, expensive_action, constrained_action, if_indices, if_actions);
+ existing_rule = necp_lookup_route_rule_by_contents_locked(list, default_action, cellular_action, wifi_action, wired_action, expensive_action, constrained_action, if_indices, if_actions, netagent_uuid);
if (existing_rule != NULL) {
route_rule_id = existing_rule->id;
os_ref_retain_locked(&existing_rule->refcount);
if (new_rule != NULL) {
memset(new_rule, 0, sizeof(struct necp_route_rule));
route_rule_id = new_rule->id = necp_get_new_route_rule_id(false);
+ if (!uuid_is_null(netagent_uuid)) {
+ new_rule->netagent_id = necp_create_uuid_service_id_mapping(netagent_uuid);
+ }
new_rule->default_action = default_action;
new_rule->cellular_action = cellular_action;
new_rule->wifi_action = wifi_action;
if (existing_rule != NULL) {
if (os_ref_release_locked(&existing_rule->refcount) == 0) {
necp_remove_aggregate_route_rule_for_id(existing_rule->id);
+ necp_remove_uuid_service_id_mapping_with_service_id(existing_rule->netagent_id);
LIST_REMOVE(existing_rule, chain);
FREE(existing_rule, M_NECP);
}
return FALSE;
}
+static bool
+necp_remove_uuid_service_id_mapping_with_service_id(u_int32_t service_id)
+{
+ struct necp_uuid_id_mapping *existing_mapping = NULL;
+
+ if (service_id == 0) {
+ return TRUE;
+ }
+
+ LCK_RW_ASSERT(&necp_kernel_policy_lock, LCK_RW_ASSERT_EXCLUSIVE);
+
+ existing_mapping = necp_uuid_lookup_uuid_with_service_id_locked(service_id);
+ if (existing_mapping != NULL) {
+ if (os_ref_release_locked(&existing_mapping->refcount) == 0) {
+ LIST_REMOVE(existing_mapping, chain);
+ FREE(existing_mapping, M_NECP);
+ }
+ return TRUE;
+ }
+
+ return FALSE;
+}
static bool
necp_kernel_socket_policies_update_uuid_table(void)
const uint32_t sdk = proc_sdk(proc);
// Enforce for iOS, linked on or after version 14
- // If the caller set `check_minor_version`, only enforce starting at 14.TBD
+ // If the caller set `check_minor_version`, only enforce starting at 14.5
if (platform != PLATFORM_IOS ||
sdk == 0 ||
(sdk >> 16) < 14 ||
-#if 0
- (check_minor_version && (sdk >> 16) == 14 && ((sdk >> 8) & 0xff) < TBD)) {
-#else
- (check_minor_version)) {
-#endif
+ (check_minor_version && (sdk >> 16) == 14 && ((sdk >> 8) & 0xff) < 5)) {
return false;
}
#define NECP_KERNEL_ADDRESS_TYPE_CONDITIONS (NECP_KERNEL_CONDITION_LOCAL_START | NECP_KERNEL_CONDITION_LOCAL_END | NECP_KERNEL_CONDITION_LOCAL_PREFIX | NECP_KERNEL_CONDITION_REMOTE_START | NECP_KERNEL_CONDITION_REMOTE_END | NECP_KERNEL_CONDITION_REMOTE_PREFIX | NECP_KERNEL_CONDITION_LOCAL_EMPTY | NECP_KERNEL_CONDITION_REMOTE_EMPTY | NECP_KERNEL_CONDITION_LOCAL_NETWORKS)
static void
-necp_application_fillout_info_locked(uuid_t application_uuid, uuid_t real_application_uuid, uuid_t responsible_application_uuid, char *account, char *domain, pid_t pid, uid_t uid, u_int16_t protocol, u_int32_t bound_interface_index, u_int32_t traffic_class, union necp_sockaddr_union *local_addr, union necp_sockaddr_union *remote_addr, u_int16_t local_port, u_int16_t remote_port, bool has_client, proc_t proc, proc_t responsible_proc, u_int32_t drop_order, u_int32_t client_flags, struct necp_socket_info *info, bool is_loopback)
+necp_application_fillout_info_locked(uuid_t application_uuid, uuid_t real_application_uuid, uuid_t responsible_application_uuid, char *account, char *domain, pid_t pid, int32_t pid_version, uid_t uid, u_int16_t protocol, u_int32_t bound_interface_index, u_int32_t traffic_class, union necp_sockaddr_union *local_addr, union necp_sockaddr_union *remote_addr, u_int16_t local_port, u_int16_t remote_port, bool has_client, proc_t real_proc, proc_t proc, proc_t responsible_proc, u_int32_t drop_order, u_int32_t client_flags, struct necp_socket_info *info, bool is_loopback, bool is_delegated)
{
memset(info, 0, sizeof(struct necp_socket_info));
info->pid = pid;
+ info->pid_version = pid_version;
info->uid = uid;
info->protocol = protocol;
info->bound_interface_index = bound_interface_index;
info->drop_order = drop_order;
info->client_flags = client_flags;
info->is_loopback = is_loopback;
+ info->is_delegated = is_delegated;
if (necp_kernel_application_policies_condition_mask & NECP_KERNEL_CONDITION_APP_ID && !uuid_is_null(application_uuid)) {
struct necp_uuid_id_mapping *existing_mapping = necp_uuid_lookup_app_id_locked(application_uuid);
info->is_platform_binary = necp_is_platform_binary(proc) ? true : false;
}
+ if (necp_kernel_application_policies_condition_mask & NECP_KERNEL_CONDITION_DELEGATE_IS_PLATFORM_BINARY && real_proc != NULL) {
+ info->real_is_platform_binary = (necp_is_platform_binary(real_proc) ? true : false);
+ }
+
if (necp_kernel_application_policies_condition_mask & NECP_KERNEL_CONDITION_ACCOUNT_ID && account != NULL) {
struct necp_string_id_mapping *existing_mapping = necp_lookup_string_to_id_locked(&necp_account_id_list, account);
if (existing_mapping) {
if (local_port != 0) {
info->local_addr.sin6.sin6_port = local_port;
}
- } else if (local_port != 0) {
- info->local_addr.sin6.sin6_len = sizeof(struct sockaddr_in6);
- info->local_addr.sin6.sin6_family = AF_INET6;
- info->local_addr.sin6.sin6_port = local_port;
+ } else {
+ if (remote_addr && remote_addr->sa.sa_len > 0) {
+ info->local_addr.sa.sa_family = remote_addr->sa.sa_family;
+ info->local_addr.sa.sa_len = remote_addr->sa.sa_len;
+ } else {
+ info->local_addr.sin6.sin6_family = AF_INET6;
+ info->local_addr.sin6.sin6_len = sizeof(struct sockaddr_in6);
+ }
+ if (local_port != 0) {
+ info->local_addr.sin6.sin6_port = local_port;
+ }
}
if (remote_addr && remote_addr->sa.sa_len > 0) {
memcpy(&info->remote_addr, remote_addr, remote_addr->sa.sa_len);
u_int16_t local_port = 0;
u_int16_t remote_port = 0;
necp_drop_all_bypass_check_result_t drop_all_bypass = NECP_DROP_ALL_BYPASS_CHECK_RESULT_NONE;
+ bool is_delegated = false;
if (override_local_addr) {
memcpy(&local_addr, override_local_addr, sizeof(local_addr));
// Initialize UID, PID, and UUIDs to the current process
uid_t uid = kauth_cred_getuid(proc_ucred(proc));
pid_t pid = proc_pid(proc);
+ int32_t pid_version = proc_pidversion(proc);
uuid_t application_uuid;
uuid_clear(application_uuid);
uuid_t real_application_uuid;
NECP_VERIFY_DELEGATION_ENTITLEMENT(proc, "euuid");
+ is_delegated = true;
uuid_copy(application_uuid, value);
}
break;
NECP_VERIFY_DELEGATION_ENTITLEMENT(proc, "uuid");
+ is_delegated = true;
uuid_copy(real_application_uuid, value);
}
break;
NECP_VERIFY_DELEGATION_ENTITLEMENT(proc, "pid");
+ is_delegated = true;
memcpy(&pid, value, sizeof(pid_t));
}
break;
NECP_VERIFY_DELEGATION_ENTITLEMENT(proc, "uid");
+ is_delegated = true;
memcpy(&uid, value, sizeof(uid_t));
}
break;
proc_t found_proc = proc_find(pid);
if (found_proc != PROC_NULL) {
effective_proc = found_proc;
+ pid_version = proc_pidversion(effective_proc);
release_eproc = true;
}
}
u_int32_t route_rule_id_array[MAX_AGGREGATE_ROUTE_RULES];
size_t route_rule_id_array_count = 0;
- necp_application_fillout_info_locked(application_uuid, real_application_uuid, responsible_application_uuid, account, domain, pid, uid, protocol, bound_interface_index, traffic_class, &local_addr, &remote_addr, local_port, remote_port, has_client, effective_proc, responsible_proc, drop_order, client_flags, &info, (bypass_type == NECP_BYPASS_TYPE_LOOPBACK));
+ necp_application_fillout_info_locked(application_uuid, real_application_uuid, responsible_application_uuid, account, domain, pid, pid_version, uid, protocol, bound_interface_index, traffic_class, &local_addr, &remote_addr, local_port, remote_port, has_client, proc, effective_proc, responsible_proc, drop_order, client_flags, &info, (bypass_type == NECP_BYPASS_TYPE_LOOPBACK), is_delegated);
matched_policy = necp_socket_find_policy_match_with_info_locked(necp_kernel_socket_policies_app_layer_map, &info, &filter_control_unit, route_rule_id_array, &route_rule_id_array_count, MAX_AGGREGATE_ROUTE_RULES, &service_action, &service, netagent_ids, netagent_use_flags, NECP_MAX_NETAGENTS, required_agent_types, num_required_agent_types, info.used_responsible_pid ? responsible_proc : effective_proc, 0, NULL, NULL, &drop_dest_policy_result, &drop_all_bypass, &flow_divert_aggregate_unit);
// Check for loopback exception again after the policy match
if (v6Route->rt_ifp != NULL) {
*flags |= NECP_CLIENT_RESULT_FLAG_HAS_IPV6;
- if (ifnet_get_nat64prefix(v6Route->rt_ifp, NULL) == 0) {
+ if (ifnet_get_nat64prefix(v6Route->rt_ifp, returned_result->nat64_prefixes) == 0) {
*flags |= NECP_CLIENT_RESULT_FLAG_HAS_NAT64;
}
}
// If the route gets denied, stop matching rules
break;
}
+
+ // Check if there is a route rule that adds an agent
+ u_int32_t netagent_id = necp_route_get_netagent(rt, route_rule_id_array[route_rule_index]);
+ if (netagent_id != 0) {
+ struct necp_uuid_id_mapping *mapping = necp_uuid_lookup_uuid_with_service_id_locked(netagent_id);
+ if (mapping != NULL) {
+ for (netagent_cursor = 0; netagent_cursor < NECP_MAX_NETAGENTS; netagent_cursor++) {
+ if (uuid_is_null(returned_result->netagents[netagent_cursor])) {
+ // Found open slot
+ uuid_copy(returned_result->netagents[netagent_cursor], mapping->uuid);
+ returned_result->netagent_use_flags[netagent_cursor] = 0;
+ break;
+ }
+ }
+ }
+ }
}
if (rt != NULL && rt->rt_ifp != NULL) {
}
static bool
-necp_socket_check_policy(struct necp_kernel_socket_policy *kernel_policy, necp_app_id app_id, necp_app_id real_app_id, errno_t cred_result, u_int32_t account_id, struct substring domain, u_int8_t domain_dot_count, pid_t pid, uid_t uid, u_int32_t bound_interface_index, u_int32_t traffic_class, u_int16_t protocol, union necp_sockaddr_union *local, union necp_sockaddr_union *remote, struct necp_client_parameter_netagent_type *required_agent_types, u_int32_t num_required_agent_types, bool has_client, uint32_t client_flags, int is_platform_binary, proc_t proc, u_int16_t pf_tag, struct rtentry *rt, bool is_loopback)
+necp_socket_check_policy(struct necp_kernel_socket_policy *kernel_policy, necp_app_id app_id, necp_app_id real_app_id, errno_t cred_result, u_int32_t account_id, struct substring domain, u_int8_t domain_dot_count, pid_t pid, int32_t pid_version, uid_t uid, u_int32_t bound_interface_index, u_int32_t traffic_class, u_int16_t protocol, union necp_sockaddr_union *local, union necp_sockaddr_union *remote, struct necp_client_parameter_netagent_type *required_agent_types, u_int32_t num_required_agent_types, bool has_client, uint32_t client_flags, int is_platform_binary, proc_t proc, u_int16_t pf_tag, struct rtentry *rt, bool is_loopback, bool real_is_platform_binary, bool is_delegated)
{
if (!(kernel_policy->condition_mask & NECP_KERNEL_CONDITION_ALL_INTERFACES)) {
if (kernel_policy->condition_mask & NECP_KERNEL_CONDITION_BOUND_INTERFACE) {
// No match, matches forbidden pid
return FALSE;
}
+ if (kernel_policy->cond_pid_version != 0 && pid_version == kernel_policy->cond_pid_version) {
+ return FALSE;
+ }
} else {
if (pid != kernel_policy->cond_pid) {
// No match, does not match required pid
return FALSE;
}
+ if (kernel_policy->cond_pid_version != 0 && pid_version != kernel_policy->cond_pid_version) {
+ return FALSE;
+ }
}
}
}
}
+ if (is_delegated && (kernel_policy->condition_mask & NECP_KERNEL_CONDITION_DELEGATE_IS_PLATFORM_BINARY)) {
+ if (kernel_policy->condition_negated_mask & NECP_KERNEL_CONDITION_DELEGATE_IS_PLATFORM_BINARY) {
+ if (real_is_platform_binary) {
+ return FALSE;
+ }
+ } else {
+ if (!real_is_platform_binary) {
+ return FALSE;
+ }
+ }
+ }
+
return TRUE;
}
}
static void
-necp_socket_fillout_info_locked(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int32_t override_bound_interface, u_int32_t drop_order, proc_t *socket_proc, struct necp_socket_info *info, bool is_loopback)
+necp_socket_fillout_info_locked(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int32_t override_bound_interface, bool override_is_inbound, u_int32_t drop_order, proc_t *socket_proc, struct necp_socket_info *info, bool is_loopback)
{
struct socket *so = NULL;
proc_t sock_proc = NULL;
info->drop_order = drop_order;
info->is_loopback = is_loopback;
-
- if (necp_kernel_socket_policies_condition_mask & NECP_KERNEL_CONDITION_PID) {
- info->pid = ((so->so_flags & SOF_DELEGATED) ? so->e_pid : so->last_pid);
- }
+ info->is_delegated = ((so->so_flags & SOF_DELEGATED) ? true : false);
if (necp_kernel_socket_policies_condition_mask & NECP_KERNEL_CONDITION_UID) {
info->uid = kauth_cred_getuid(so->so_cred);
if (inp->inp_socket->so_flags1 & SOF1_CELLFALLBACK) {
info->client_flags |= NECP_CLIENT_PARAMETER_FLAG_FALLBACK_TRAFFIC;
}
- if (inp->inp_socket->so_flags1 & SOF1_INBOUND) {
+ if (inp->inp_socket->so_flags1 & SOF1_INBOUND || override_is_inbound) {
info->client_flags |= NECP_CLIENT_PARAMETER_FLAG_INBOUND;
}
if (inp->inp_socket->so_options & SO_ACCEPTCONN ||
}
}
+ if (necp_kernel_socket_policies_condition_mask & NECP_KERNEL_CONDITION_PID) {
+ info->pid = socket_pid;
+ info->pid_version = proc_pidversion(sock_proc != NULL ? sock_proc : curr_proc);
+ }
+
if (necp_kernel_socket_policies_condition_mask & NECP_KERNEL_CONDITION_PLATFORM_BINARY) {
info->is_platform_binary = necp_is_platform_binary(sock_proc ? sock_proc : curr_proc) ? true : false;
}
+ if (necp_kernel_socket_policies_condition_mask & NECP_KERNEL_CONDITION_DELEGATE_IS_PLATFORM_BINARY) {
+ proc_t real_proc = curr_proc;
+ bool release_real_proc = false;
+ if (so->last_pid != proc_pid(real_proc)) {
+ if (so->last_pid == socket_pid && sock_proc != NULL) {
+ real_proc = sock_proc;
+ } else {
+ proc_t last_proc = proc_find(so->last_pid);
+ if (last_proc != NULL) {
+ real_proc = last_proc;
+ release_real_proc = true;
+ }
+ }
+ }
+ if (real_proc != NULL) {
+ info->real_is_platform_binary = (necp_is_platform_binary(real_proc) ? true : false);
+ if (release_real_proc) {
+ proc_rele(real_proc);
+ }
+ }
+ }
+
if (necp_kernel_socket_policies_condition_mask & NECP_KERNEL_CONDITION_ACCOUNT_ID && inp->inp_necp_attributes.inp_account != NULL) {
struct necp_string_id_mapping *existing_mapping = necp_lookup_string_to_id_locked(&necp_account_id_list, inp->inp_necp_attributes.inp_account);
if (existing_mapping) {
continue;
}
- if (necp_socket_check_policy(policy_search_array[i], info->application_id, info->real_application_id, info->cred_result, info->account_id, domain_substring, domain_dot_count, info->pid, info->uid, info->bound_interface_index, info->traffic_class, info->protocol, &info->local_addr, &info->remote_addr, required_agent_types, num_required_agent_types, info->has_client, info->client_flags, info->is_platform_binary, proc, pf_tag, rt, info->is_loopback)) {
+ if (necp_socket_check_policy(policy_search_array[i], info->application_id, info->real_application_id, info->cred_result, info->account_id, domain_substring, domain_dot_count, info->pid, info->pid_version, info->uid, info->bound_interface_index, info->traffic_class, info->protocol, &info->local_addr, &info->remote_addr, required_agent_types, num_required_agent_types, info->has_client, info->client_flags, info->is_platform_binary, proc, pf_tag, rt, info->is_loopback, info->real_is_platform_binary, info->is_delegated)) {
if (policy_search_array[i]->result == NECP_KERNEL_POLICY_RESULT_SOCKET_FILTER) {
if (return_filter && *return_filter != NECP_FILTER_UNIT_NO_FILTER) {
necp_kernel_policy_filter control_unit = policy_search_array[i]->result_parameter.filter_control_unit;
// Lock
lck_rw_lock_shared(&necp_kernel_policy_lock);
- necp_socket_fillout_info_locked(inp, override_local_addr, override_remote_addr, override_bound_interface, drop_order, &socket_proc, &info, (bypass_type == NECP_BYPASS_TYPE_LOOPBACK));
+ necp_socket_fillout_info_locked(inp, override_local_addr, override_remote_addr, override_bound_interface, false, drop_order, &socket_proc, &info, (bypass_type == NECP_BYPASS_TYPE_LOOPBACK));
// Check info
u_int32_t flowhash = necp_socket_calc_flowhash_locked(&info);
necp_socket_ip_tunnel_tso(inp);
}
- if (send_local_network_denied_event) {
+ if (send_local_network_denied_event && inp->inp_policyresult.network_denied_notifies == 0) {
+ inp->inp_policyresult.network_denied_notifies++;
necp_send_network_denied_event(((so->so_flags & SOF_DELEGATED) ? so->e_pid : so->last_pid),
((so->so_flags & SOF_DELEGATED) ? so->e_uuid : so->last_uuid),
NETPOLICY_NETWORKTYPE_LOCAL);
return TRUE;
}
+static uint32_t
+necp_route_get_netagent(struct rtentry *route, u_int32_t route_rule_id)
+{
+ if (route == NULL) {
+ return 0;
+ }
+
+ struct ifnet *ifp = route->rt_ifp;
+ if (ifp == NULL) {
+ return 0;
+ }
+
+ struct necp_route_rule *route_rule = necp_lookup_route_rule_locked(&necp_route_rules, route_rule_id);
+ if (route_rule == NULL) {
+ return 0;
+ }
+
+ // No netagent, skip
+ if (route_rule->netagent_id == 0) {
+ return 0;
+ }
+
+ if (route_rule->default_action == NECP_ROUTE_RULE_USE_NETAGENT) {
+ return route_rule->netagent_id;
+ }
+
+ for (int exception_index = 0; exception_index < MAX_ROUTE_RULE_INTERFACES; exception_index++) {
+ if (route_rule->exception_if_indices[exception_index] == 0) {
+ break;
+ }
+ if (route_rule->exception_if_indices[exception_index] == ifp->if_index &&
+ route_rule->exception_if_actions[exception_index] == NECP_ROUTE_RULE_USE_NETAGENT) {
+ return route_rule->netagent_id;
+ }
+ }
+
+ if (route_rule->cellular_action == NECP_ROUTE_RULE_USE_NETAGENT &&
+ ifp->if_type == IFT_CELLULAR) {
+ return route_rule->netagent_id;
+ }
+
+ if (route_rule->wifi_action == NECP_ROUTE_RULE_USE_NETAGENT &&
+ ifp->if_family == IFNET_FAMILY_ETHERNET && ifp->if_subfamily == IFNET_SUBFAMILY_WIFI) {
+ return route_rule->netagent_id;
+ }
+
+ if (route_rule->wired_action == NECP_ROUTE_RULE_USE_NETAGENT &&
+ (ifp->if_family == IFNET_FAMILY_ETHERNET || ifp->if_family == IFNET_FAMILY_FIREWIRE)) {
+ return route_rule->netagent_id;
+ }
+
+ if (route_rule->expensive_action == NECP_ROUTE_RULE_USE_NETAGENT &&
+ ifp->if_eflags & IFEF_EXPENSIVE) {
+ return route_rule->netagent_id;
+ }
+
+ if (route_rule->constrained_action == NECP_ROUTE_RULE_USE_NETAGENT &&
+ ifp->if_xflags & IFXF_CONSTRAINED) {
+ return route_rule->netagent_id;
+ }
+
+ return 0;
+}
+
bool
necp_packet_is_allowed_over_interface(struct mbuf *packet, struct ifnet *interface)
{
}
static bool
-necp_socket_is_allowed_to_send_recv_internal(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, ifnet_t interface, u_int16_t pf_tag, necp_kernel_policy_id *return_policy_id, u_int32_t *return_route_rule_id, necp_kernel_policy_id *return_skip_policy_id, u_int32_t *return_pass_flags)
+necp_socket_is_allowed_to_send_recv_internal(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, ifnet_t input_interface, u_int16_t pf_tag, necp_kernel_policy_id *return_policy_id, u_int32_t *return_route_rule_id, necp_kernel_policy_id *return_skip_policy_id, u_int32_t *return_pass_flags)
{
- u_int32_t verifyifindex = interface ? interface->if_index : 0;
+ u_int32_t verifyifindex = input_interface ? input_interface->if_index : 0;
bool allowed_to_receive = TRUE;
struct necp_socket_info info;
u_int32_t flowhash = 0;
} else {
if (inp->inp_policyresult.results.route_rule_id != 0) {
lck_rw_lock_shared(&necp_kernel_policy_lock);
- if (!necp_route_is_allowed(route, interface, inp->inp_policyresult.results.route_rule_id, &interface_type_denied)) {
+ if (!necp_route_is_allowed(route, input_interface, inp->inp_policyresult.results.route_rule_id, &interface_type_denied)) {
route_allowed = FALSE;
}
lck_rw_done(&necp_kernel_policy_lock);
if (!route_allowed ||
inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_DROP ||
inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT ||
- (inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_IP_TUNNEL && interface &&
+ (inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_IP_TUNNEL && input_interface &&
inp->inp_policyresult.results.result_parameter.tunnel_interface_index != verifyifindex)) {
allowed_to_receive = FALSE;
} else {
// Actually calculate policy result
lck_rw_lock_shared(&necp_kernel_policy_lock);
- necp_socket_fillout_info_locked(inp, override_local_addr, override_remote_addr, 0, drop_order, &socket_proc, &info, (bypass_type == NECP_BYPASS_TYPE_LOOPBACK));
+ necp_socket_fillout_info_locked(inp, override_local_addr, override_remote_addr, 0, input_interface != NULL ? true : false, drop_order, &socket_proc, &info, (bypass_type == NECP_BYPASS_TYPE_LOOPBACK));
flowhash = necp_socket_calc_flowhash_locked(&info);
if (inp->inp_policyresult.policy_id != NECP_KERNEL_POLICY_ID_NONE &&
inp->inp_policyresult.flowhash == flowhash) {
if (inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_DROP ||
inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT ||
- (inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_IP_TUNNEL && interface &&
+ (inp->inp_policyresult.results.result == NECP_KERNEL_POLICY_RESULT_IP_TUNNEL && input_interface &&
inp->inp_policyresult.results.result_parameter.tunnel_interface_index != verifyifindex) ||
(inp->inp_policyresult.results.route_rule_id != 0 &&
- !necp_route_is_allowed(route, interface, inp->inp_policyresult.results.route_rule_id, &interface_type_denied))) {
+ !necp_route_is_allowed(route, input_interface, inp->inp_policyresult.results.route_rule_id, &interface_type_denied))) {
allowed_to_receive = FALSE;
} else {
if (return_policy_id) {
if (matched_policy->result == NECP_KERNEL_POLICY_RESULT_DROP ||
matched_policy->result == NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT ||
- (matched_policy->result == NECP_KERNEL_POLICY_RESULT_IP_TUNNEL && interface &&
+ (matched_policy->result == NECP_KERNEL_POLICY_RESULT_IP_TUNNEL && input_interface &&
matched_policy->result_parameter.tunnel_interface_index != verifyifindex) ||
((service_action == NECP_KERNEL_POLICY_RESULT_TRIGGER_SCOPED ||
service_action == NECP_KERNEL_POLICY_RESULT_NO_TRIGGER_SCOPED) &&
service.identifier != 0 && service.identifier != NECP_NULL_SERVICE_ID) ||
(route_rule_id != 0 &&
- !necp_route_is_allowed(route, interface, route_rule_id, &interface_type_denied)) ||
+ !necp_route_is_allowed(route, input_interface, route_rule_id, &interface_type_denied)) ||
!necp_netagents_allow_traffic(netagent_ids, NECP_MAX_NETAGENTS)) {
allowed_to_receive = FALSE;
} else {
lck_rw_done(&necp_kernel_policy_lock);
- if (send_local_network_denied_event) {
+ if (send_local_network_denied_event && inp->inp_policyresult.network_denied_notifies == 0) {
+ inp->inp_policyresult.network_denied_notifies++;
necp_send_network_denied_event(((so->so_flags & SOF_DELEGATED) ? so->e_pid : so->last_pid),
((so->so_flags & SOF_DELEGATED) ? so->e_uuid : so->last_uuid),
NETPOLICY_NETWORKTYPE_LOCAL);
}
bool
-necp_socket_is_allowed_to_send_recv_v4(struct inpcb *inp, u_int16_t local_port, u_int16_t remote_port, struct in_addr *local_addr, struct in_addr *remote_addr, ifnet_t interface, u_int16_t pf_tag, necp_kernel_policy_id *return_policy_id, u_int32_t *return_route_rule_id, necp_kernel_policy_id *return_skip_policy_id, u_int32_t *return_pass_flags)
+necp_socket_is_allowed_to_send_recv_v4(struct inpcb *inp, u_int16_t local_port, u_int16_t remote_port, struct in_addr *local_addr, struct in_addr *remote_addr, ifnet_t input_interface, u_int16_t pf_tag, necp_kernel_policy_id *return_policy_id, u_int32_t *return_route_rule_id, necp_kernel_policy_id *return_skip_policy_id, u_int32_t *return_pass_flags)
{
struct sockaddr_in local = {};
struct sockaddr_in remote = {};
memcpy(&local.sin_addr, local_addr, sizeof(local.sin_addr));
memcpy(&remote.sin_addr, remote_addr, sizeof(remote.sin_addr));
- return necp_socket_is_allowed_to_send_recv_internal(inp, (struct sockaddr *)&local, (struct sockaddr *)&remote, interface,
+ return necp_socket_is_allowed_to_send_recv_internal(inp, (struct sockaddr *)&local, (struct sockaddr *)&remote, input_interface,
pf_tag, return_policy_id, return_route_rule_id, return_skip_policy_id, return_pass_flags);
}
bool
-necp_socket_is_allowed_to_send_recv_v6(struct inpcb *inp, u_int16_t local_port, u_int16_t remote_port, struct in6_addr *local_addr, struct in6_addr *remote_addr, ifnet_t interface, u_int16_t pf_tag, necp_kernel_policy_id *return_policy_id, u_int32_t *return_route_rule_id, necp_kernel_policy_id *return_skip_policy_id, u_int32_t *return_pass_flags)
+necp_socket_is_allowed_to_send_recv_v6(struct inpcb *inp, u_int16_t local_port, u_int16_t remote_port, struct in6_addr *local_addr, struct in6_addr *remote_addr, ifnet_t input_interface, u_int16_t pf_tag, necp_kernel_policy_id *return_policy_id, u_int32_t *return_route_rule_id, necp_kernel_policy_id *return_skip_policy_id, u_int32_t *return_pass_flags)
{
struct sockaddr_in6 local = {};
struct sockaddr_in6 remote = {};
memcpy(&local.sin6_addr, local_addr, sizeof(local.sin6_addr));
memcpy(&remote.sin6_addr, remote_addr, sizeof(remote.sin6_addr));
- return necp_socket_is_allowed_to_send_recv_internal(inp, (struct sockaddr *)&local, (struct sockaddr *)&remote, interface,
+ return necp_socket_is_allowed_to_send_recv_internal(inp, (struct sockaddr *)&local, (struct sockaddr *)&remote, input_interface,
pf_tag, return_policy_id, return_route_rule_id, return_skip_policy_id, return_pass_flags);
}
bool
-necp_socket_is_allowed_to_send_recv(struct inpcb *inp, ifnet_t interface, u_int16_t pf_tag, necp_kernel_policy_id *return_policy_id,
+necp_socket_is_allowed_to_send_recv(struct inpcb *inp, ifnet_t input_interface, u_int16_t pf_tag, necp_kernel_policy_id *return_policy_id,
u_int32_t *return_route_rule_id, necp_kernel_policy_id *return_skip_policy_id, u_int32_t *return_pass_flags)
{
- return necp_socket_is_allowed_to_send_recv_internal(inp, NULL, NULL, interface, pf_tag,
+ return necp_socket_is_allowed_to_send_recv_internal(inp, NULL, NULL, input_interface, pf_tag,
return_policy_id, return_route_rule_id,
return_skip_policy_id, return_pass_flags);
}
/*
- * Copyright (c) 2013-2020 Apple Inc. All rights reserved.
+ * Copyright (c) 2013-2021 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#define NECP_POLICY_CONDITION_SDK_VERSION 28 // struct necp_policy_condition_sdk_version
#define NECP_POLICY_CONDITION_SIGNING_IDENTIFIER 29 // String
#define NECP_POLICY_CONDITION_PACKET_FILTER_TAGS 30 // u_int16_t
+#define NECP_POLICY_CONDITION_DELEGATE_IS_PLATFORM_BINARY 32 // N/A
/*
* Policy Packet tags
#define NECP_ROUTE_RULE_ALLOW_INTERFACE 2 // String, or empty to match all
#define NECP_ROUTE_RULE_QOS_MARKING 3 // String, or empty to match all
#define NECP_ROUTE_RULE_DENY_LQM_ABORT 4 // String, or empty to match all
+#define NECP_ROUTE_RULE_USE_NETAGENT 5 // UUID, followed by string or empty
#define NECP_ROUTE_RULE_FLAG_CELLULAR 0x01
#define NECP_ROUTE_RULE_FLAG_WIFI 0x02
u_int32_t policy_id;
uuid_t netagents[NECP_MAX_NETAGENTS];
u_int32_t netagent_use_flags[NECP_MAX_NETAGENTS];
+ struct ipv6_prefix nat64_prefixes[NAT64_MAX_NUM_PREFIXES];
u_int8_t mss_recommended;
};
#define NECP_CLIENT_RESULT_EFFECTIVE_TRAFFIC_CLASS 210 // u_int32_t
#define NECP_CLIENT_RESULT_TRAFFIC_MGMT_BG 211 // u_int32_t, 1: background, 0: not background
#define NECP_CLIENT_RESULT_GATEWAY 212 // struct necp_client_endpoint
+#define NECP_CLIENT_RESULT_NAT64 213 // struct ipv6_prefix[NAT64_MAX_NUM_PREFIXES]
#define NECP_CLIENT_RESULT_FLAG_IS_LOCAL 0x0001 // Routes to this device
#define NECP_CLIENT_RESULT_FLAG_IS_DIRECT 0x0002 // Routes to directly accessible peer
#define NECPCTL_RESTRICT_MULTICAST 20 /* Restrict multicast access */
#define NECPCTL_DEDUP_POLICIES 21 /* Dedup overlapping policies */
+#define NECP_LOOPBACK_PASS_ALL 1 // Pass all loopback traffic
+#define NECP_LOOPBACK_PASS_WITH_FILTER 2 // Pass all loopback traffic, but activate content filter and/or flow divert if applicable
#define NECPCTL_NAMES { \
{ 0, 0 }, \
struct necp_policy_condition_sdk_version cond_sdk_version;
char *cond_signing_identifier; // String
u_int16_t cond_packet_filter_tags;
+ int32_t cond_pid_version;
necp_kernel_policy_result result;
necp_kernel_policy_result_parameter result_parameter;
};
struct necp_inpcb_result {
- u_int32_t app_id;
+ u_int32_t app_id;
necp_kernel_policy_id policy_id;
necp_kernel_policy_id skip_policy_id;
- int32_t policy_gencount;
- u_int32_t flowhash;
- struct necp_aggregate_socket_result results;
+ int32_t policy_gencount;
+ u_int32_t flowhash;
+ u_int32_t network_denied_notifies;// Notification count
+ struct necp_aggregate_socket_result results;
};
extern errno_t necp_init(void);
extern bool necp_socket_is_allowed_to_recv_on_interface(struct inpcb *inp, ifnet_t interface);
-extern bool necp_socket_is_allowed_to_send_recv(struct inpcb *inp, ifnet_t interface, u_int16_t pf_tag,
+extern bool necp_socket_is_allowed_to_send_recv(struct inpcb *inp, ifnet_t input_interface, u_int16_t pf_tag,
necp_kernel_policy_id *return_policy_id,
u_int32_t *return_route_rule_id,
necp_kernel_policy_id *return_skip_policy_id, u_int32_t *return_pass_flags);
extern bool necp_socket_is_allowed_to_send_recv_v4(struct inpcb *inp, u_int16_t local_port,
u_int16_t remote_port, struct in_addr *local_addr,
- struct in_addr *remote_addr, ifnet_t interface, u_int16_t pf_tag,
+ struct in_addr *remote_addr, ifnet_t input_interface, u_int16_t pf_tag,
necp_kernel_policy_id *return_policy_id, u_int32_t *return_route_rule_id,
necp_kernel_policy_id *return_skip_policy_id, u_int32_t *return_pass_flags);
extern bool necp_socket_is_allowed_to_send_recv_v6(struct inpcb *inp, u_int16_t local_port,
u_int16_t remote_port, struct in6_addr *local_addr,
- struct in6_addr *remote_addr, ifnet_t interface, u_int16_t pf_tag,
+ struct in6_addr *remote_addr, ifnet_t input_interface, u_int16_t pf_tag,
necp_kernel_policy_id *return_policy_id, u_int32_t *return_route_rule_id,
necp_kernel_policy_id *return_skip_policy_id, u_int32_t *return_pass_flags);
extern void necp_socket_update_qos_marking(struct inpcb *inp, struct rtentry *route, u_int32_t route_rule_id);
necp_client_add_interface_option_if_needed(struct necp_client *client,
uint32_t interface_index,
uint32_t interface_generation,
- uuid_t *nexus_agent)
+ uuid_t *nexus_agent,
+ bool network_provider)
{
- if (interface_index == IFSCOPE_NONE ||
+ if ((interface_index == IFSCOPE_NONE && !network_provider) ||
(client->interface_option_count != 0 && !client->allow_multiple_flows)) {
- // Interface not set, or client not allowed to use this mode
+ // Interface or agent not set, or client not allowed to use this mode
return;
}
(flags & NETAGENT_FLAG_SUPPORTS_BROWSE) &&
(!(flags & NETAGENT_FLAG_SPECIFIC_USE_ONLY) ||
necp_netagent_is_required(parsed_parameters, &ifp->if_agentids[i]))) {
- necp_client_add_interface_option_if_needed(client, ifp->if_index, ifnet_get_generation(ifp), &ifp->if_agentids[i]);
+ necp_client_add_interface_option_if_needed(client, ifp->if_index, ifnet_get_generation(ifp),
+ &ifp->if_agentids[i], (flags & NETAGENT_FLAG_NETWORK_PROVIDER));
// Finding one is enough
break;
client->result, sizeof(client->result));
}
+ for (int i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
+ if (result.nat64_prefixes[i].prefix_len != 0) {
+ cursor = necp_buffer_write_tlv_if_different(cursor, NECP_CLIENT_RESULT_NAT64,
+ sizeof(result.nat64_prefixes), result.nat64_prefixes, &updated,
+ client->result, sizeof(client->result));
+ break;
+ }
+ }
+
if (result.mss_recommended != 0) {
cursor = necp_buffer_write_tlv_if_different(cursor, NECP_CLIENT_RESULT_RECOMMENDED_MSS,
sizeof(result.mss_recommended), &result.mss_recommended, &updated,
if (necp_ifnet_matches_parameters(multi_interface, parsed_parameters, 0, NULL, true, false)) {
// Add multipath interface flows for kernel MPTCP
necp_client_add_interface_option_if_needed(client, multi_interface->if_index,
- ifnet_get_generation(multi_interface), NULL);
+ ifnet_get_generation(multi_interface), NULL, false);
// Add nexus agents for multipath
necp_client_add_agent_interface_options(client, parsed_parameters, multi_interface);
// Add interface option in case it is not a nexus
necp_client_add_interface_option_if_needed(client, direct_interface->if_index,
- ifnet_get_generation(direct_interface), NULL);
+ ifnet_get_generation(direct_interface), NULL, false);
}
} else {
// Get listener interface options from global list
goto done;
}
- if (uap->buffer == 0 || buffer_size < sizeof(struct necp_client_add_flow)) {
+ if (uap->buffer == 0 || buffer_size < sizeof(struct necp_client_add_flow) ||
+ buffer_size > sizeof(struct necp_client_add_flow_default) * 4) {
error = EINVAL;
NECPLOG(LOG_ERR, "necp_client_add_flow invalid buffer (length %zu)", buffer_size);
goto done;
#include <sys/types.h>
#include <sys/param.h>
#include <kern/zalloc.h>
+#include <net/ethernet.h>
#include <net/if_var.h>
#include <net/if.h>
#include <net/classq/classq.h>
#include <net/classq/classq_fq_codel.h>
#include <net/pktsched/pktsched_fq_codel.h>
+#include <os/log.h>
+
+#define FQ_CODEL_DEFAULT_QUANTUM 1500
+
+#define FQ_CODEL_QUANTUM_BK_SYS(_q) (_q)
+#define FQ_CODEL_QUANTUM_BK(_q) (_q)
+#define FQ_CODEL_QUANTUM_BE(_q) (_q)
+#define FQ_CODEL_QUANTUM_RD(_q) (_q)
+#define FQ_CODEL_QUANTUM_OAM(_q) (_q)
+#define FQ_CODEL_QUANTUM_AV(_q) (_q * 2)
+#define FQ_CODEL_QUANTUM_RV(_q) (_q * 2)
+#define FQ_CODEL_QUANTUM_VI(_q) (_q * 2)
+#define FQ_CODEL_QUANTUM_VO(_q) ((_q * 2) / 5)
+#define FQ_CODEL_QUANTUM_CTL(_q) ((_q * 2) / 5)
+
+#define FQ_CODEL_DRR_MAX_BK_SYS 2
+#define FQ_CODEL_DRR_MAX_BK 2
+#define FQ_CODEL_DRR_MAX_BE 4
+#define FQ_CODEL_DRR_MAX_RD 4
+#define FQ_CODEL_DRR_MAX_OAM 4
+#define FQ_CODEL_DRR_MAX_AV 6
+#define FQ_CODEL_DRR_MAX_RV 6
+#define FQ_CODEL_DRR_MAX_VI 6
+#define FQ_CODEL_DRR_MAX_VO 8
+#define FQ_CODEL_DRR_MAX_CTL 8
static ZONE_DECLARE(fq_if_zone, "pktsched_fq_if", sizeof(fq_if_t), ZC_ZFREE_CLEARMEM);
+typedef STAILQ_HEAD(, flowq) flowq_dqlist_t;
+
static fq_if_t *fq_if_alloc(struct ifnet *, classq_pkt_type_t);
static void fq_if_destroy(fq_if_t *fqs);
static void fq_if_classq_init(fq_if_t *fqs, uint32_t priority,
uint16_t quantum, uint32_t drr_max, uint32_t svc_class);
static void fq_if_dequeue(fq_if_t *, fq_if_classq_t *, uint32_t,
int64_t, classq_pkt_t *, classq_pkt_t *, uint32_t *,
- uint32_t *, boolean_t drvmgmt);
+ uint32_t *, flowq_dqlist_t *, boolean_t drvmgmt);
void fq_if_stat_sc(fq_if_t *fqs, cqrq_stat_sc_t *stat);
static void fq_if_purge(fq_if_t *);
static void fq_if_purge_classq(fq_if_t *, fq_if_classq_t *);
static void fq_if_empty_new_flow(fq_t *fq, fq_if_classq_t *fq_cl,
bool add_to_old);
static void fq_if_empty_old_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl,
- fq_t *fq, bool remove_hash);
+ fq_t *fq, bool remove_hash, bool destroy);
#define FQ_IF_FLOW_HASH_ID(_flowid_) \
(((_flowid_) >> FQ_IF_HASH_TAG_SHIFT) & FQ_IF_HASH_TAG_MASK)
static boolean_t
fq_getq_flow_mbuf(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
- int64_t byte_limit, u_int32_t pkt_limit, classq_pkt_t *top,
- classq_pkt_t *last, u_int32_t *byte_cnt, u_int32_t *pkt_cnt,
+ int64_t byte_limit, u_int32_t pkt_limit, classq_pkt_t *head,
+ classq_pkt_t *tail, u_int32_t *byte_cnt, u_int32_t *pkt_cnt,
boolean_t *qempty, u_int32_t pflags)
{
u_int32_t plen;
fq->fq_deficit -= plen;
pkt.pktsched_pkt_mbuf->m_pkthdr.pkt_flags |= pflags;
- if (top->cp_mbuf == NULL) {
- *top = pkt.pktsched_pkt;
+ if (head->cp_mbuf == NULL) {
+ *head = pkt.pktsched_pkt;
} else {
- ASSERT(last->cp_mbuf != NULL);
- ASSERT(last->cp_mbuf->m_nextpkt == NULL);
- last->cp_mbuf->m_nextpkt = pkt.pktsched_pkt_mbuf;
+ ASSERT(tail->cp_mbuf != NULL);
+ ASSERT(tail->cp_mbuf->m_nextpkt == NULL);
+ tail->cp_mbuf->m_nextpkt = pkt.pktsched_pkt_mbuf;
}
- *last = pkt.pktsched_pkt;
- last->cp_mbuf->m_nextpkt = NULL;
+ *tail = pkt.pktsched_pkt;
+ tail->cp_mbuf->m_nextpkt = NULL;
fq_cl->fcl_stat.fcl_dequeue++;
fq_cl->fcl_stat.fcl_dequeue_bytes += plen;
*pkt_cnt += 1;
IFCQ_INC_BYTES(ifq, bytes);
IFCQ_UNLOCK(ifq);
done:
+#if DEBUG || DEVELOPMENT
+ if (__improbable((ret == EQFULL) && (ifclassq_flow_control_adv == 0))) {
+ ret = 0;
+ }
+#endif /* DEBUG || DEVELOPMENT */
return ret;
}
fq_cl = &fqs->fqs_classq[pri];
fq_if_dequeue(fqs, fq_cl, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
- pkt, NULL, &total_pktcnt, &total_bytecnt, TRUE);
+ pkt, NULL, &total_pktcnt, &total_bytecnt, NULL, TRUE);
IFCQ_XMIT_ADD(ifq, total_pktcnt, total_bytecnt);
}
+static inline void
+fq_dqlist_add(flowq_dqlist_t *fq_dqlist_head, fq_t *fq)
+{
+ ASSERT(fq->fq_dq_head.cp_mbuf == NULL);
+ ASSERT(!fq->fq_in_dqlist);
+ STAILQ_INSERT_TAIL(fq_dqlist_head, fq, fq_dqlink);
+ fq->fq_in_dqlist = true;
+}
+
+static inline void
+fq_dqlist_remove(flowq_dqlist_t *fq_dqlist_head, fq_t *fq, classq_pkt_t *head,
+ classq_pkt_t *tail)
+{
+ ASSERT(fq->fq_in_dqlist);
+ if (fq->fq_dq_head.cp_mbuf == NULL) {
+ goto done;
+ }
+
+ if (head->cp_mbuf == NULL) {
+ *head = fq->fq_dq_head;
+ } else {
+ ASSERT(tail->cp_mbuf != NULL);
+
+ switch (fq->fq_ptype) {
+ case QP_MBUF:
+ ASSERT(tail->cp_mbuf->m_nextpkt == NULL);
+ tail->cp_mbuf->m_nextpkt = fq->fq_dq_head.cp_mbuf;
+ ASSERT(fq->fq_dq_tail.cp_mbuf->m_nextpkt == NULL);
+ break;
+ default:
+ VERIFY(0);
+ /* NOTREACHED */
+ __builtin_unreachable();
+ }
+ }
+ *tail = fq->fq_dq_tail;
+done:
+ STAILQ_REMOVE(fq_dqlist_head, fq, flowq, fq_dqlink);
+ CLASSQ_PKT_INIT(&fq->fq_dq_head);
+ CLASSQ_PKT_INIT(&fq->fq_dq_tail);
+ fq->fq_in_dqlist = false;
+ if (fq->fq_flags & FQF_DESTROYED) {
+ fq_destroy(fq);
+ }
+}
+
+static inline void
+fq_dqlist_get_packet_list(flowq_dqlist_t *fq_dqlist_head, classq_pkt_t *head,
+ classq_pkt_t *tail)
+{
+ fq_t *fq, *tfq;
+
+ STAILQ_FOREACH_SAFE(fq, fq_dqlist_head, fq_dqlink, tfq) {
+ fq_dqlist_remove(fq_dqlist_head, fq, head, tail);
+ }
+}
+
int
fq_if_dequeue_classq_multi(struct ifclassq *ifq, u_int32_t maxpktcnt,
u_int32_t maxbytecnt, classq_pkt_t *first_packet,
classq_pkt_t *last_packet, u_int32_t *retpktcnt,
u_int32_t *retbytecnt)
{
- u_int32_t pktcnt = 0, bytecnt = 0, total_pktcnt = 0, total_bytecnt = 0;
+ uint32_t total_pktcnt = 0, total_bytecnt = 0;
classq_pkt_t first = CLASSQ_PKT_INITIALIZER(fisrt);
classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
classq_pkt_t tmp = CLASSQ_PKT_INITIALIZER(tmp);
fq_if_append_pkt_t append_pkt;
+ flowq_dqlist_t fq_dqlist_head;
fq_if_classq_t *fq_cl;
fq_if_t *fqs;
int pri;
IFCQ_LOCK_ASSERT_HELD(ifq);
fqs = (fq_if_t *)ifq->ifcq_disc;
+ STAILQ_INIT(&fq_dqlist_head);
switch (fqs->fqs_ptype) {
case QP_MBUF:
}
for (;;) {
- classq_pkt_t top = CLASSQ_PKT_INITIALIZER(top);
+ uint32_t pktcnt = 0, bytecnt = 0;
+ classq_pkt_t head = CLASSQ_PKT_INITIALIZER(head);
classq_pkt_t tail = CLASSQ_PKT_INITIALIZER(tail);
if (fqs->fqs_bitmaps[FQ_IF_ER] == 0 &&
}
}
fq_if_dequeue(fqs, fq_cl, (maxpktcnt - total_pktcnt),
- (maxbytecnt - total_bytecnt), &top, &tail, &pktcnt,
- &bytecnt, FALSE);
- if (top.cp_mbuf != NULL) {
- ASSERT(pktcnt > 0 && bytecnt > 0);
+ (maxbytecnt - total_bytecnt), &head, &tail, &pktcnt,
+ &bytecnt, &fq_dqlist_head, FALSE);
+ if (head.cp_mbuf != NULL) {
+ ASSERT(STAILQ_EMPTY(&fq_dqlist_head));
if (first.cp_mbuf == NULL) {
- first = top;
- total_pktcnt = pktcnt;
- total_bytecnt = bytecnt;
+ first = head;
} else {
ASSERT(last.cp_mbuf != NULL);
- append_pkt(&last, &top);
- total_pktcnt += pktcnt;
- total_bytecnt += bytecnt;
+ append_pkt(&last, &head);
}
last = tail;
append_pkt(&last, &tmp);
- fq_cl->fcl_budget -= bytecnt;
- pktcnt = 0;
- bytecnt = 0;
}
+ fq_cl->fcl_budget -= bytecnt;
+ total_pktcnt += pktcnt;
+ total_bytecnt += bytecnt;
/*
* If the class has exceeded the budget but still has data
}
}
+ fq_dqlist_get_packet_list(&fq_dqlist_head, &first, &last);
+
if (__probable(first_packet != NULL)) {
*first_packet = first;
}
classq_pkt_t first = CLASSQ_PKT_INITIALIZER(fisrt);
classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
fq_if_append_pkt_t append_pkt;
+ flowq_dqlist_t fq_dqlist_head;
switch (fqs->fqs_ptype) {
case QP_MBUF:
__builtin_unreachable();
}
+ STAILQ_INIT(&fq_dqlist_head);
pri = fq_if_service_to_priority(fqs, svc);
fq_cl = &fqs->fqs_classq[pri];
/*
*/
while (total_pktcnt < maxpktcnt && total_bytecnt < maxbytecnt &&
fq_cl->fcl_stat.fcl_pkt_cnt > 0) {
- classq_pkt_t top = CLASSQ_PKT_INITIALIZER(top);
+ classq_pkt_t head = CLASSQ_PKT_INITIALIZER(head);
classq_pkt_t tail = CLASSQ_PKT_INITIALIZER(tail);
u_int32_t pktcnt = 0, bytecnt = 0;
fq_if_dequeue(fqs, fq_cl, (maxpktcnt - total_pktcnt),
- (maxbytecnt - total_bytecnt), &top, &tail, &pktcnt,
- &bytecnt, TRUE);
- if (top.cp_mbuf != NULL) {
+ (maxbytecnt - total_bytecnt), &head, &tail, &pktcnt,
+ &bytecnt, &fq_dqlist_head, TRUE);
+ if (head.cp_mbuf != NULL) {
if (first.cp_mbuf == NULL) {
- first = top;
- total_pktcnt = pktcnt;
- total_bytecnt = bytecnt;
+ first = head;
} else {
ASSERT(last.cp_mbuf != NULL);
- append_pkt(&last, &top);
- total_pktcnt += pktcnt;
- total_bytecnt += bytecnt;
+ append_pkt(&last, &head);
}
last = tail;
}
+ total_pktcnt += pktcnt;
+ total_bytecnt += bytecnt;
}
+ fq_dqlist_get_packet_list(&fq_dqlist_head, &first, &last);
+
if (__probable(first_packet != NULL)) {
*first_packet = first;
}
if (fq->fq_flags & FQF_NEW_FLOW) {
fq_if_empty_new_flow(fq, fq_cl, false);
} else if (fq->fq_flags & FQF_OLD_FLOW) {
- fq_if_empty_old_flow(fqs, fq_cl, fq, false);
+ fq_if_empty_old_flow(fqs, fq_cl, fq, false, true);
}
- fq_if_destroy_flow(fqs, fq_cl, fq);
+ fq_if_destroy_flow(fqs, fq_cl, fq, true);
if (FQ_IF_CLASSQ_IDLE(fq_cl)) {
int i;
}
}
+static uint16_t
+fq_if_calc_quantum(struct ifnet *ifp)
+{
+ uint16_t quantum;
+
+ switch (ifp->if_family) {
+ case IFNET_FAMILY_ETHERNET:
+ VERIFY((ifp->if_mtu + ETHER_HDR_LEN) <= UINT16_MAX);
+ quantum = (uint16_t)ifp->if_mtu + ETHER_HDR_LEN;
+ break;
+
+ case IFNET_FAMILY_CELLULAR:
+ case IFNET_FAMILY_IPSEC:
+ case IFNET_FAMILY_UTUN:
+ VERIFY(ifp->if_mtu <= UINT16_MAX);
+ quantum = (uint16_t)ifp->if_mtu;
+ break;
+
+ default:
+ quantum = FQ_CODEL_DEFAULT_QUANTUM;
+ break;
+ }
+
+ /*
+ * XXX: Skywalk native interface doesn't support HW TSO offload.
+ */
+ if (((ifp->if_eflags & IFEF_SKYWALK_NATIVE) == 0) &&
+ ((ifp->if_hwassist & IFNET_TSOF) != 0)) {
+ VERIFY(ifp->if_tso_v4_mtu <= UINT16_MAX);
+ VERIFY(ifp->if_tso_v6_mtu <= UINT16_MAX);
+ quantum = (uint16_t)MAX(ifp->if_tso_v4_mtu, ifp->if_tso_v6_mtu);
+ quantum = (quantum != 0) ? quantum : IF_MAXMTU;
+ }
+
+ quantum = MAX(FQ_CODEL_DEFAULT_QUANTUM, quantum);
+#if DEBUG || DEVELOPMENT
+ quantum = (fq_codel_quantum != 0) ? fq_codel_quantum : quantum;
+#endif /* DEBUG || DEVELOPMENT */
+ return quantum;
+}
+
+static void
+fq_if_mtu_update(fq_if_t *fqs)
+{
+#define _FQ_CLASSQ_UPDATE_QUANTUM(_fqs, _s, _q) \
+ (_fqs)->fqs_classq[FQ_IF_ ## _s ## _INDEX].fcl_quantum = \
+ FQ_CODEL_QUANTUM_ ## _s(_q)
+
+ uint16_t quantum;
+
+ quantum = fq_if_calc_quantum(fqs->fqs_ifq->ifcq_ifp);
+
+ if ((fqs->fqs_flags & FQS_DRIVER_MANAGED) != 0) {
+ _FQ_CLASSQ_UPDATE_QUANTUM(fqs, BK, quantum);
+ _FQ_CLASSQ_UPDATE_QUANTUM(fqs, BE, quantum);
+ _FQ_CLASSQ_UPDATE_QUANTUM(fqs, VI, quantum);
+ _FQ_CLASSQ_UPDATE_QUANTUM(fqs, VO, quantum);
+ } else {
+ _FQ_CLASSQ_UPDATE_QUANTUM(fqs, BK_SYS, quantum);
+ _FQ_CLASSQ_UPDATE_QUANTUM(fqs, BK, quantum);
+ _FQ_CLASSQ_UPDATE_QUANTUM(fqs, BE, quantum);
+ _FQ_CLASSQ_UPDATE_QUANTUM(fqs, RD, quantum);
+ _FQ_CLASSQ_UPDATE_QUANTUM(fqs, OAM, quantum);
+ _FQ_CLASSQ_UPDATE_QUANTUM(fqs, AV, quantum);
+ _FQ_CLASSQ_UPDATE_QUANTUM(fqs, RV, quantum);
+ _FQ_CLASSQ_UPDATE_QUANTUM(fqs, VI, quantum);
+ _FQ_CLASSQ_UPDATE_QUANTUM(fqs, VO, quantum);
+ _FQ_CLASSQ_UPDATE_QUANTUM(fqs, CTL, quantum);
+ }
+#undef _FQ_CLASSQ_UPDATE_QUANTUM
+}
+
static void
fq_if_event(fq_if_t *fqs, cqev_t ev)
{
case CLASSQ_EV_LINK_DOWN:
fq_if_purge(fqs);
break;
+ case CLASSQ_EV_LINK_MTU:
+ fq_if_mtu_update(fqs);
+ break;
default:
break;
}
classq_pkt_type_t ptype)
{
#pragma unused(flags)
+#define _FQ_CLASSQ_INIT(_fqs, _s, _q) \
+ fq_if_classq_init((_fqs), FQ_IF_ ## _s ## _INDEX, \
+ FQ_CODEL_QUANTUM_ ## _s(_q), FQ_CODEL_DRR_MAX_ ## _s, \
+ MBUF_SC_ ## _s )
+
struct ifnet *ifp = ifq->ifcq_ifp;
fq_if_t *fqs = NULL;
+ uint16_t quantum;
int err = 0;
IFCQ_LOCK_ASSERT_HELD(ifq);
return ENOMEM;
}
+ quantum = fq_if_calc_quantum(ifp);
+
if (flags & PKTSCHEDF_QALG_DRIVER_MANAGED) {
fqs->fqs_flags |= FQS_DRIVER_MANAGED;
- fq_if_classq_init(fqs, FQ_IF_BK_INDEX, 1500,
- 2, MBUF_SC_BK);
- fq_if_classq_init(fqs, FQ_IF_BE_INDEX, 1500,
- 4, MBUF_SC_BE);
- fq_if_classq_init(fqs, FQ_IF_VI_INDEX, 3000,
- 6, MBUF_SC_VI);
- fq_if_classq_init(fqs, FQ_IF_VO_INDEX, 600,
- 8, MBUF_SC_VO);
+ _FQ_CLASSQ_INIT(fqs, BK, quantum);
+ _FQ_CLASSQ_INIT(fqs, BE, quantum);
+ _FQ_CLASSQ_INIT(fqs, VI, quantum);
+ _FQ_CLASSQ_INIT(fqs, VO, quantum);
} else {
/* SIG shares same INDEX with VI */
_CASSERT(SCIDX_SIG == SCIDX_VI);
_CASSERT(FQ_IF_SIG_INDEX == FQ_IF_VI_INDEX);
- fq_if_classq_init(fqs, FQ_IF_BK_SYS_INDEX, 1500,
- 2, MBUF_SC_BK_SYS);
- fq_if_classq_init(fqs, FQ_IF_BK_INDEX, 1500,
- 2, MBUF_SC_BK);
- fq_if_classq_init(fqs, FQ_IF_BE_INDEX, 1500,
- 4, MBUF_SC_BE);
- fq_if_classq_init(fqs, FQ_IF_RD_INDEX, 1500,
- 4, MBUF_SC_RD);
- fq_if_classq_init(fqs, FQ_IF_OAM_INDEX, 1500,
- 4, MBUF_SC_OAM);
- fq_if_classq_init(fqs, FQ_IF_AV_INDEX, 3000,
- 6, MBUF_SC_AV);
- fq_if_classq_init(fqs, FQ_IF_RV_INDEX, 3000,
- 6, MBUF_SC_RV);
- fq_if_classq_init(fqs, FQ_IF_VI_INDEX, 3000,
- 6, MBUF_SC_VI);
- fq_if_classq_init(fqs, FQ_IF_VO_INDEX, 600,
- 8, MBUF_SC_VO);
- fq_if_classq_init(fqs, FQ_IF_CTL_INDEX, 600,
- 8, MBUF_SC_CTL);
+ _FQ_CLASSQ_INIT(fqs, BK_SYS, quantum);
+ _FQ_CLASSQ_INIT(fqs, BK, quantum);
+ _FQ_CLASSQ_INIT(fqs, BE, quantum);
+ _FQ_CLASSQ_INIT(fqs, RD, quantum);
+ _FQ_CLASSQ_INIT(fqs, OAM, quantum);
+ _FQ_CLASSQ_INIT(fqs, AV, quantum);
+ _FQ_CLASSQ_INIT(fqs, RV, quantum);
+ _FQ_CLASSQ_INIT(fqs, VI, quantum);
+ _FQ_CLASSQ_INIT(fqs, VO, quantum);
+ _FQ_CLASSQ_INIT(fqs, CTL, quantum);
}
err = ifclassq_attach(ifq, PKTSCHEDT_FQ_CODEL, fqs);
-
if (err != 0) {
- printf("%s: error from ifclassq_attach, "
+ os_log_error(OS_LOG_DEFAULT, "%s: error from ifclassq_attach, "
"failed to attach fq_if: %d\n", __func__, err);
fq_if_destroy(fqs);
}
return err;
+#undef _FQ_CLASSQ_INIT
}
fq_t *
}
void
-fq_if_destroy_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq)
+fq_if_destroy_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
+ bool destroy_now)
{
u_int8_t hash_id;
hash_id = FQ_IF_FLOW_HASH_ID(fq->fq_flowhash);
fq_hashlink);
fq_cl->fcl_stat.fcl_flows_cnt--;
IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
- fq_destroy(fq);
+ fq->fq_flags |= FQF_DESTROYED;
+ if (destroy_now) {
+ fq_destroy(fq);
+ }
}
inline boolean_t
static void
fq_if_empty_old_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
- bool remove_hash)
+ bool remove_hash, bool destroy)
{
/*
* Remove the flow queue if it is empty
if (remove_hash) {
/* Remove from the hash list */
- fq_if_destroy_flow(fqs, fq_cl, fq);
+ fq_if_destroy_flow(fqs, fq_cl, fq, destroy);
}
}
if (fq_empty(fq)) {
fqs->fqs_large_flow = NULL;
if (fq->fq_flags & FQF_OLD_FLOW) {
- fq_if_empty_old_flow(fqs, fq_cl, fq, true);
+ fq_if_empty_old_flow(fqs, fq_cl, fq, true, true);
} else {
VERIFY(fq->fq_flags & FQF_NEW_FLOW);
fq_if_empty_new_flow(fq, fq_cl, true);
}
boolean_t
-fq_if_add_fcentry(fq_if_t *fqs, pktsched_pkt_t *pkt, uint32_t flowid,
- uint8_t flowsrc, fq_if_classq_t *fq_cl)
+fq_if_add_fcentry(fq_if_t *fqs, pktsched_pkt_t *pkt, uint8_t flowsrc,
+ fq_t *fq, fq_if_classq_t *fq_cl)
{
struct flowadv_fcentry *fce;
+#if DEBUG || DEVELOPMENT
+ if (__improbable(ifclassq_flow_control_adv == 0)) {
+ os_log(OS_LOG_DEFAULT, "%s: skipped flow control", __func__);
+ return TRUE;
+ }
+#endif /* DEBUG || DEVELOPMENT */
+
STAILQ_FOREACH(fce, &fqs->fqs_fclist, fce_link) {
if ((uint8_t)fce->fce_flowsrc_type == flowsrc &&
- fce->fce_flowid == flowid) {
+ fce->fce_flowid == fq->fq_flowhash) {
/* Already on flowcontrol list */
return TRUE;
}
/* XXX Add number of bytes in the queue */
STAILQ_INSERT_TAIL(&fqs->fqs_fclist, fce, fce_link);
fq_cl->fcl_stat.fcl_flow_control++;
+ os_log(OS_LOG_DEFAULT, "%s: num: %d, scidx: %d, flowsrc: %d, "
+ "flow: 0x%x, iface: %s\n", __func__,
+ fq_cl->fcl_stat.fcl_flow_control,
+ fq->fq_sc_index, fce->fce_flowsrc_type, fq->fq_flowhash,
+ if_name(fqs->fqs_ifq->ifcq_ifp));
}
return (fce != NULL) ? TRUE : FALSE;
}
STAILQ_REMOVE(&fqs->fqs_fclist, fce, flowadv_fcentry,
fce_link);
STAILQ_NEXT(fce, fce_link) = NULL;
- flowadv_add_entry(fce);
fq_cl->fcl_stat.fcl_flow_feedback++;
+ os_log(OS_LOG_DEFAULT, "%s: num: %d, scidx: %d, flowsrc: %d, "
+ "flow: 0x%x, iface: %s\n", __func__,
+ fq_cl->fcl_stat.fcl_flow_feedback, fq->fq_sc_index,
+ fce->fce_flowsrc_type, fce->fce_flowid,
+ if_name(fqs->fqs_ifq->ifcq_ifp));
+ flowadv_add_entry(fce);
}
fq->fq_flags &= ~FQF_FLOWCTL_ON;
}
void
fq_if_dequeue(fq_if_t *fqs, fq_if_classq_t *fq_cl, uint32_t pktlimit,
- int64_t bytelimit, classq_pkt_t *top, classq_pkt_t *tail,
- uint32_t *retpktcnt, uint32_t *retbytecnt, boolean_t drvmgmt)
+ int64_t bytelimit, classq_pkt_t *top, classq_pkt_t *bottom,
+ uint32_t *retpktcnt, uint32_t *retbytecnt, flowq_dqlist_t *fq_dqlist,
+ boolean_t drvmgmt)
{
fq_t *fq = NULL, *tfq = NULL;
flowq_stailq_t temp_stailq;
- u_int32_t pktcnt, bytecnt;
+ uint32_t pktcnt, bytecnt;
boolean_t qempty, limit_reached = FALSE;
classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
fq_getq_flow_t fq_getq_flow_fn;
+ classq_pkt_t *head, *tail;
switch (fqs->fqs_ptype) {
case QP_MBUF:
ASSERT((fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW)) ==
FQF_NEW_FLOW);
+ if (fq_dqlist != NULL) {
+ if (!fq->fq_in_dqlist) {
+ fq_dqlist_add(fq_dqlist, fq);
+ }
+ head = &fq->fq_dq_head;
+ tail = &fq->fq_dq_tail;
+ } else {
+ ASSERT(!fq->fq_in_dqlist);
+ head = top;
+ tail = &last;
+ }
+
limit_reached = fq_getq_flow_fn(fqs, fq_cl, fq, bytelimit,
- pktlimit, top, &last, &bytecnt, &pktcnt, &qempty,
+ pktlimit, head, tail, &bytecnt, &pktcnt, &qempty,
PKTF_NEW_FLOW);
if (fq->fq_deficit <= 0 || qempty) {
STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_old_flows, fq_actlink, tfq) {
VERIFY((fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW)) ==
FQF_OLD_FLOW);
+ bool destroy = true;
+
+ if (fq_dqlist != NULL) {
+ if (!fq->fq_in_dqlist) {
+ fq_dqlist_add(fq_dqlist, fq);
+ }
+ head = &fq->fq_dq_head;
+ tail = &fq->fq_dq_tail;
+ destroy = false;
+ } else {
+ ASSERT(!fq->fq_in_dqlist);
+ head = top;
+ tail = &last;
+ }
limit_reached = fq_getq_flow_fn(fqs, fq_cl, fq, bytelimit,
- pktlimit, top, &last, &bytecnt, &pktcnt, &qempty, 0);
+ pktlimit, head, tail, &bytecnt, &pktcnt, &qempty, 0);
if (qempty) {
- fq_if_empty_old_flow(fqs, fq_cl, fq, true);
+ fq_if_empty_old_flow(fqs, fq_cl, fq, true, destroy);
} else if (fq->fq_deficit <= 0) {
STAILQ_REMOVE(&fq_cl->fcl_old_flows, fq,
flowq, fq_actlink);
} else if (!STAILQ_EMPTY(&temp_stailq)) {
fq_cl->fcl_old_flows = temp_stailq;
}
-
if (last.cp_mbuf != NULL) {
VERIFY(top->cp_mbuf != NULL);
- if (tail != NULL) {
- *tail = last;
- }
- if (retpktcnt != NULL) {
- *retpktcnt = pktcnt;
- }
- if (retbytecnt != NULL) {
- *retbytecnt = bytecnt;
+ if (bottom != NULL) {
+ *bottom = last;
}
}
+ if (retpktcnt != NULL) {
+ *retpktcnt = pktcnt;
+ }
+ if (retbytecnt != NULL) {
+ *retbytecnt = bytecnt;
+ }
}
void
extern boolean_t fq_if_at_drop_limit(fq_if_t *);
extern void fq_if_drop_packet(fq_if_t *);
extern void fq_if_is_flow_heavy(fq_if_t *, struct flowq *);
-extern boolean_t fq_if_add_fcentry(fq_if_t *, pktsched_pkt_t *, uint32_t,
- uint8_t, fq_if_classq_t *);
+extern boolean_t fq_if_add_fcentry(fq_if_t *, pktsched_pkt_t *, uint8_t,
+ struct flowq *, fq_if_classq_t *);
extern void fq_if_flow_feedback(fq_if_t *, struct flowq *, fq_if_classq_t *);
extern int fq_if_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags,
classq_pkt_type_t ptype);
extern int fq_if_getqstats_ifclassq(struct ifclassq *ifq, u_int32_t qid,
struct if_ifclassq_stats *ifqs);
extern void fq_if_destroy_flow(fq_if_t *, fq_if_classq_t *,
- struct flowq *);
+ struct flowq *, bool);
#endif /* BSD_KERNEL_PRIVATE */
static char *rn_zeros, *rn_ones;
-extern lck_grp_t *domain_proto_mtx_grp;
-extern lck_attr_t *domain_proto_mtx_attr;
-
#define rn_masktop (mask_rnhead->rnh_treetop)
#undef Bcmp
#define Bcmp(a, b, l) \
int, flags, unsigned int, ifscope);
LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
+
+#if !(DEVELOPMENT || DEBUG)
+ /*
+ * Setting the global internet flag external is only for testing
+ */
+ flags &= ~RTF_GLOBAL;
+#endif /* !(DEVELOPMENT || DEBUG) */
+
/*
* Find the correct routing tree to use for this Address Family
*/
* necp client watchers to re-evaluate
*/
if (SA_DEFAULT(rt_key(rt))) {
+ /*
+ * Mark default routes as (potentially) leading to the global internet
+ * this can be used for policy decisions.
+ * The clone routes will inherit this flag.
+ * We check against the host flag as this works for default routes that have
+ * a gateway and defaults routes when all subnets are local.
+ */
+ if (req == RTM_ADD && (rt->rt_flags & RTF_HOST) == 0) {
+ rt->rt_flags |= RTF_GLOBAL;
+ }
if (rt->rt_ifp != NULL) {
ifnet_touch_lastupdown(rt->rt_ifp);
}
#define RTF_PROXY 0x8000000 /* proxying, no interface scope */
#define RTF_ROUTER 0x10000000 /* host is a router */
#define RTF_DEAD 0x20000000 /* Route entry is being freed */
- /* 0x40000000 and up unassigned */
+#define RTF_GLOBAL 0x40000000 /* route to destination of the global internet */
+ /* 0x80000000 unassigned */
#define RTPRF_OURS RTF_PROTO3 /* set on routes we manage */
#define RTF_BITS \
"\10DELCLONE\11CLONING\12XRESOLVE\13LLINFO\14STATIC\15BLACKHOLE" \
"\16NOIFREF\17PROTO2\20PROTO1\21PRCLONING\22WASCLONED\23PROTO3" \
"\25PINNED\26LOCAL\27BROADCAST\30MULTICAST\31IFSCOPE\32CONDEMNED" \
- "\33IFREF\34PROXY\35ROUTER"
+ "\33IFREF\34PROXY\35ROUTER\37GLOBAL"
#define IS_DIRECT_HOSTROUTE(rt) \
(((rt)->rt_flags & (RTF_HOST | RTF_GATEWAY)) == RTF_HOST)
STUB(kern_packet_get_next);
STUB(kern_packet_set_chain_counts);
STUB(kern_packet_get_chain_counts);
+STUB(kern_packet_trace_start);
+STUB(kern_packet_trace_end);
+STUB(kern_packet_is_traced);
+STUB(kern_packet_trace_event);
STUB(kern_pbufpool_alloc);
STUB(kern_pbufpool_alloc_batch);
STUB(kern_pbufpool_alloc_batch_callback);
os_cpu_in_cksum(const void *data, uint32_t len, uint32_t initial_sum)
{
/*
- * If data is 4-bytes aligned, length is multiple of 4-bytes,
- * and the amount to checksum is small, this would be quicker;
- * this is suitable for IPv4 header.
+ * If data is 4-bytes aligned (conditional), length is multiple
+ * of 4-bytes (required), and the amount to checksum is small,
+ * this would be quicker; this is suitable for IPv4/TCP header.
*/
- if (IS_P2ALIGNED(data, sizeof(uint32_t)) &&
- len <= 64 && (len & 3) == 0) {
+ if (
+#if !defined(__arm64__) && !defined(__x86_64__)
+ IS_P2ALIGNED(data, sizeof(uint32_t)) &&
+#endif /* !__arm64__ && !__x86_64__ */
+ len <= 64 && (len & 3) == 0) {
uint8_t *p = __DECONST(uint8_t *, data);
uint64_t sum = initial_sum;
- if (PREDICT_TRUE(len == 20)) { /* simple IPv4 header */
+ switch (len) {
+ case 20: /* simple IPv4 or TCP header */
sum += *(uint32_t *)(void *)p;
sum += *(uint32_t *)(void *)(p + 4);
sum += *(uint32_t *)(void *)(p + 8);
sum += *(uint32_t *)(void *)(p + 12);
sum += *(uint32_t *)(void *)(p + 16);
- } else {
+ break;
+
+ case 32: /* TCP header + timestamp option */
+ sum += *(uint32_t *)(void *)p;
+ sum += *(uint32_t *)(void *)(p + 4);
+ sum += *(uint32_t *)(void *)(p + 8);
+ sum += *(uint32_t *)(void *)(p + 12);
+ sum += *(uint32_t *)(void *)(p + 16);
+ sum += *(uint32_t *)(void *)(p + 20);
+ sum += *(uint32_t *)(void *)(p + 24);
+ sum += *(uint32_t *)(void *)(p + 28);
+ break;
+
+ default:
while (len) {
sum += *(uint32_t *)(void *)p;
p += 4;
len -= 4;
}
+ break;
}
/* fold 64-bit to 16-bit (deferred carries) */
/*
- * Copyright (c) 2012-2017, 2020 Apple Inc. All rights reserved.
+ * Copyright (c) 2012-2017, 2020, 2021 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#define FLOW_DIVERT_NOTIFY_ON_RECEIVED 0x00000080
#define FLOW_DIVERT_IMPLICIT_CONNECT 0x00000100
#define FLOW_DIVERT_DID_SET_LOCAL_ADDR 0x00000200
+#define FLOW_DIVERT_HAS_TOKEN 0x00000400
+#define FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR 0x00000800
#define FDLOG(level, pcb, format, ...) \
os_log_with_type(OS_LOG_DEFAULT, flow_divert_syslog_type_to_oslog_type(level), "(%u): " format "\n", (pcb)->hash, __VA_ARGS__)
goto done;
}
+ if (fd_cb->local_endpoint.sa.sa_family == AF_INET || fd_cb->local_endpoint.sa.sa_family == AF_INET6) {
+ error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_LOCAL_ADDR, fd_cb->local_endpoint.sa.sa_len, &(fd_cb->local_endpoint.sa));
+ if (error) {
+ goto done;
+ }
+ }
+
error = flow_divert_send_packet(fd_cb, packet, TRUE);
if (error) {
goto done;
}
static void
-flow_divert_set_local_endpoint(struct flow_divert_pcb *fd_cb, struct sockaddr *local_endpoint, bool port_only)
+flow_divert_set_local_endpoint(struct flow_divert_pcb *fd_cb, struct sockaddr *local_endpoint)
{
struct inpcb *inp = sotoinpcb(fd_cb->so);
if (local_endpoint->sa_family == AF_INET6) {
- if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && !port_only) {
+ if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && (fd_cb->flags & FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR)) {
fd_cb->flags |= FLOW_DIVERT_DID_SET_LOCAL_ADDR;
inp->in6p_laddr = (satosin6(local_endpoint))->sin6_addr;
}
inp->inp_lport = (satosin6(local_endpoint))->sin6_port;
}
} else if (local_endpoint->sa_family == AF_INET) {
- if (inp->inp_laddr.s_addr == INADDR_ANY && !port_only) {
+ if (inp->inp_laddr.s_addr == INADDR_ANY && (fd_cb->flags & FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR)) {
fd_cb->flags |= FLOW_DIVERT_DID_SET_LOCAL_ADDR;
inp->inp_laddr = (satosin(local_endpoint))->sin_addr;
}
NULL,
(last_proc != NULL ? last_proc : current_proc()));
- if (error) {
+ if (error && error != EWOULDBLOCK) {
FDLOG(LOG_ERR, fd_cb, "Failed to send queued data using the socket's original protocol: %d", error);
+ } else {
+ error = 0;
}
} else if (SOCK_TYPE(so) == SOCK_DGRAM) {
struct sockbuf *sb = &so->so_snd;
}
}
+static void
+flow_divert_scope(struct flow_divert_pcb *fd_cb, int out_if_index, bool derive_new_address)
+{
+ struct socket *so = NULL;
+ struct inpcb *inp = NULL;
+ struct ifnet *current_ifp = NULL;
+ struct ifnet *new_ifp = NULL;
+ int error = 0;
+
+ so = fd_cb->so;
+ if (so == NULL) {
+ return;
+ }
+
+ inp = sotoinpcb(so);
+
+ if (out_if_index <= 0) {
+ return;
+ }
+
+ if (inp->inp_vflag & INP_IPV6) {
+ current_ifp = inp->in6p_last_outifp;
+ } else {
+ current_ifp = inp->inp_last_outifp;
+ }
+
+ if (current_ifp != NULL) {
+ if (current_ifp->if_index == out_if_index) {
+ /* No change */
+ return;
+ }
+
+ /* Scope the socket to the given interface */
+ error = inp_bindif(inp, out_if_index, &new_ifp);
+ if (error != 0) {
+ FDLOG(LOG_ERR, fd_cb, "failed to scope to %d because inp_bindif returned %d", out_if_index, error);
+ return;
+ }
+
+ if (derive_new_address && fd_cb->original_remote_endpoint != NULL) {
+ /* Get the appropriate address for the given interface */
+ if (inp->inp_vflag & INP_IPV6) {
+ inp->in6p_laddr = sa6_any.sin6_addr;
+ error = in6_pcbladdr(inp, fd_cb->original_remote_endpoint, &(fd_cb->local_endpoint.sin6.sin6_addr), NULL);
+ } else {
+ inp->inp_laddr.s_addr = INADDR_ANY;
+ error = in_pcbladdr(inp, fd_cb->original_remote_endpoint, &(fd_cb->local_endpoint.sin.sin_addr), IFSCOPE_NONE, NULL, 0);
+ }
+
+ if (error != 0) {
+ FDLOG(LOG_WARNING, fd_cb, "failed to derive a new local address from %d because in_pcbladdr returned %d", out_if_index, error);
+ }
+ }
+ } else {
+ ifnet_head_lock_shared();
+ if (out_if_index <= if_index) {
+ new_ifp = ifindex2ifnet[out_if_index];
+ }
+ ifnet_head_done();
+ }
+
+ /* Update the "last interface" of the socket */
+ if (new_ifp != NULL) {
+ if (inp->inp_vflag & INP_IPV6) {
+ inp->in6p_last_outifp = new_ifp;
+ } else {
+ inp->inp_last_outifp = new_ifp;
+ }
+
+ }
+}
+
static void
flow_divert_handle_connect_result(struct flow_divert_pcb *fd_cb, mbuf_t packet, int offset)
{
FDLOCK(fd_cb);
if (fd_cb->so != NULL) {
struct inpcb *inp = NULL;
- struct ifnet *ifp = NULL;
struct flow_divert_group *old_group;
struct socket *so = fd_cb->so;
+ bool local_address_is_valid = false;
socket_lock(so, 0);
+ if (!(so->so_flags & SOF_FLOW_DIVERT)) {
+ FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring connect result");
+ goto done;
+ }
+
if (SOCK_TYPE(so) == SOCK_STREAM && !(so->so_state & SS_ISCONNECTING)) {
FDLOG0(LOG_ERR, fd_cb, "TCP socket is not in the connecting state, ignoring connect result");
goto done;
if (flow_divert_is_sockaddr_valid(&(local_endpoint.sa))) {
if (local_endpoint.sa.sa_family == AF_INET) {
local_endpoint.sa.sa_len = sizeof(struct sockaddr_in);
+ if ((inp->inp_vflag & INP_IPV4) && local_endpoint.sin.sin_addr.s_addr != INADDR_ANY) {
+ local_address_is_valid = true;
+ fd_cb->local_endpoint = local_endpoint;
+ inp->inp_laddr.s_addr = INADDR_ANY;
+ } else {
+ fd_cb->local_endpoint.sin.sin_port = local_endpoint.sin.sin_port;
+ }
} else if (local_endpoint.sa.sa_family == AF_INET6) {
local_endpoint.sa.sa_len = sizeof(struct sockaddr_in6);
+ if ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&local_endpoint.sin6.sin6_addr)) {
+ local_address_is_valid = true;
+ fd_cb->local_endpoint = local_endpoint;
+ inp->in6p_laddr = sa6_any.sin6_addr;
+ } else {
+ fd_cb->local_endpoint.sin6.sin6_port = local_endpoint.sin6.sin6_port;
+ }
}
- fd_cb->local_endpoint = local_endpoint;
- flow_divert_set_local_endpoint(fd_cb, &(local_endpoint.sa), (SOCK_TYPE(so) == SOCK_DGRAM));
}
+ flow_divert_scope(fd_cb, out_if_index, !local_address_is_valid);
+ flow_divert_set_local_endpoint(fd_cb, &(fd_cb->local_endpoint.sa));
+
if (flow_divert_is_sockaddr_valid(&(remote_endpoint.sa)) && SOCK_TYPE(so) == SOCK_STREAM) {
if (remote_endpoint.sa.sa_family == AF_INET) {
remote_endpoint.sa.sa_len = sizeof(struct sockaddr_in);
}
}
- ifnet_head_lock_shared();
- if (out_if_index > 0 && out_if_index <= if_index) {
- ifp = ifindex2ifnet[out_if_index];
- }
-
- if (ifp != NULL) {
- if (inp->inp_vflag & INP_IPV4) {
- inp->inp_last_outifp = ifp;
- } else if (inp->inp_vflag & INP_IPV6) {
- inp->in6p_last_outifp = ifp;
- }
- } else {
- error = EINVAL;
- }
- ifnet_head_done();
-
if (error) {
goto set_socket_state;
}
if (fd_cb->so != NULL) {
socket_lock(fd_cb->so, 0);
+ if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
+ FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring close from provider");
+ goto done;
+ }
+
fd_cb->so->so_error = (uint16_t)ntohl(close_error);
flow_divert_update_closed_state(fd_cb, how, TRUE);
} else if (how == SHUT_WR) {
socantsendmore(fd_cb->so);
}
-
+done:
socket_unlock(fd_cb->so, 0);
}
FDUNLOCK(fd_cb);
socket_lock(fd_cb->so, 0);
+ if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
+ FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring inbound data");
+ goto done;
+ }
+
if (sbspace(&fd_cb->so->so_rcv) == 0) {
error = ENOBUFS;
fd_cb->flags |= FLOW_DIVERT_NOTIFY_ON_RECEIVED;
FDLOCK(fd_cb);
if (fd_cb->so != NULL) {
socket_lock(fd_cb->so, 0);
+
+ if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
+ FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring read notification");
+ goto done;
+ }
+
fd_cb->send_window += ntohl(read_count);
flow_divert_send_buffered_data(fd_cb, FALSE);
+done:
socket_unlock(fd_cb->so, 0);
}
FDUNLOCK(fd_cb);
if (fd_cb->so != NULL) {
socket_lock(fd_cb->so, 0);
- if (out_if_index > 0) {
- struct inpcb *inp = NULL;
- struct ifnet *ifp = NULL;
-
- inp = sotoinpcb(fd_cb->so);
-
- ifnet_head_lock_shared();
- if (out_if_index <= if_index) {
- ifp = ifindex2ifnet[out_if_index];
- }
+ if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
+ FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring properties update");
+ goto done;
+ }
- if (ifp != NULL) {
- if (inp->inp_vflag & INP_IPV4) {
- inp->inp_last_outifp = ifp;
- } else if (inp->inp_vflag & INP_IPV6) {
- inp->in6p_last_outifp = ifp;
- }
- }
- ifnet_head_done();
+ if (out_if_index > 0) {
+ flow_divert_scope(fd_cb, out_if_index, true);
+ flow_divert_set_local_endpoint(fd_cb, &(fd_cb->local_endpoint.sa));
}
if (app_data_length > 0) {
FDLOG(LOG_ERR, fd_cb, "Failed to allocate a buffer of size %u to hold the application data from the properties update", app_data_length);
}
}
-
+done:
socket_unlock(fd_cb->so, 0);
}
FDUNLOCK(fd_cb);
goto done;
}
+ if (SOCK_TYPE(so) == SOCK_STREAM || /* TCP or */
+ !implicit || /* connect() was called or */
+ ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) || /* local address is not un-specified */
+ ((inp->inp_vflag & INP_IPV4) && inp->inp_laddr.s_addr != INADDR_ANY)) {
+ fd_cb->flags |= FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR;
+ }
+
error = flow_divert_create_connect_packet(fd_cb, to, so, p, &connect_packet);
if (error) {
goto done;
if (!implicit || SOCK_TYPE(so) == SOCK_STREAM) {
flow_divert_set_remote_endpoint(fd_cb, to);
- flow_divert_set_local_endpoint(fd_cb, &(fd_cb->local_endpoint.sa), false);
+ flow_divert_set_local_endpoint(fd_cb, &(fd_cb->local_endpoint.sa));
}
if (implicit) {
fd_cb->flags |= FLOW_DIVERT_CONNECT_STARTED;
}
- if (SOCK_TYPE(so) == SOCK_DGRAM) {
+ if (SOCK_TYPE(so) == SOCK_DGRAM && !(fd_cb->flags & FLOW_DIVERT_HAS_TOKEN)) {
soisconnected(so);
} else {
soisconnecting(so);
if (error) {
goto done;
}
-
- if (so->so_flags1 & SOF1_DATA_IDEMPOTENT) {
- /* Open up the send window so that the data will get sent right away */
- fd_cb->send_window = (uint32_t)mbuf_pkthdr_len(data);
- }
} else {
error = flow_divert_check_no_cellular(fd_cb) ||
flow_divert_check_no_expensive(fd_cb) ||
fd_cb->connect_token = token;
token = NULL;
+
+ fd_cb->flags |= FLOW_DIVERT_HAS_TOKEN;
}
if (hmac_error == 0) {
/*
- * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#define ICMPV6CTL_ND6_MAXQLEN 24
#define ICMPV6CTL_ND6_ACCEPT_6TO4 25
#define ICMPV6CTL_ND6_OPTIMISTIC_DAD 26 /* RFC 4429 */
-#define ICMPV6CTL_MAXID 27
+#define ICMPV6CTL_ERRPPSLIMIT_RANDOM_INCR 27
+#define ICMPV6CTL_MAXID 28
#ifdef BSD_KERNEL_PRIVATE
#define ICMPV6CTL_NAMES { \
/*
- * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#define ICMPCTL_STATS 2 /* statistics (read-only) */
#define ICMPCTL_ICMPLIM 3
#define ICMPCTL_TIMESTAMP 4 /* allow replies to time stamp requests */
-#define ICMPCTL_MAXID 5
+#define ICMPCTL_ICMPLIM_INCR 5
+#define ICMPCTL_MAXID 6
#ifdef BSD_KERNEL_PRIVATE
#define ICMPCTL_NAMES { \
lck_mtx_init(&inpcb_timeout_lock, inpcb_lock_grp, inpcb_lock_attr);
inpcb_thread_call = thread_call_allocate_with_priority(inpcb_timeout,
NULL, THREAD_CALL_PRIORITY_KERNEL);
+ /* Give it an arg so that we know that this is the fast timer */
inpcb_fast_thread_call = thread_call_allocate_with_priority(
- inpcb_timeout, NULL, THREAD_CALL_PRIORITY_KERNEL);
+ inpcb_timeout, &inpcb_timeout, THREAD_CALL_PRIORITY_KERNEL);
if (inpcb_thread_call == NULL || inpcb_fast_thread_call == NULL) {
panic("unable to alloc the inpcb thread call");
}
static void
inpcb_timeout(void *arg0, void *arg1)
{
-#pragma unused(arg0, arg1)
+#pragma unused(arg1)
struct inpcbinfo *ipi;
boolean_t t, gc;
struct intimercount gccnt, tmcnt;
inpcb_ticking = INPCB_HAVE_TIMER_REQ(tmcnt);
}
- /* re-arm the timer if there's work to do */
+ /* arg0 will be set if we are the fast timer */
+ if (arg0 != NULL) {
+ inpcb_fast_timer_on = FALSE;
+ }
inpcb_timeout_run--;
VERIFY(inpcb_timeout_run >= 0 && inpcb_timeout_run < 2);
+ /* re-arm the timer if there's work to do */
if (gccnt.intimer_nodelay > 0 || tmcnt.intimer_nodelay > 0) {
inpcb_sched_timeout();
} else if ((gccnt.intimer_fast + tmcnt.intimer_fast) <= 5) {
inpcb_timeout_run++;
if (offset == 0) {
inpcb_fast_timer_on = TRUE;
- thread_call_enter_delayed(inpcb_thread_call,
+ thread_call_enter_delayed(inpcb_fast_thread_call,
deadline);
} else {
inpcb_fast_timer_on = FALSE;
#include <kern/zalloc.h>
#include <netinet/in_stat.h>
#endif /* BSD_KERNEL_PRIVATE */
+#if !KERNEL
+#include <TargetConditionals.h>
+#endif
#if IPSEC
#include <netinet6/ipsec.h> /* for IPSEC */
u_quad_t xi_alignment_hack;
};
-#if XNU_TARGET_OS_OSX || !(TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR)
+#if XNU_TARGET_OS_OSX || KERNEL || !(TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR)
struct inpcb64_list_entry {
u_int64_t le_next;
u_int64_t le_prev;
struct xsocket64 xi_socket;
u_quad_t xi_alignment_hack;
};
-#endif /* XNU_TARGET_OS_OSX || !(TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR) */
+#endif /* XNU_TARGET_OS_OSX || KERNEL || !(TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR) */
#ifdef PRIVATE
struct xinpcb_list_entry {
#ifndef _NETINET_IN_SYSTM_H_
#define _NETINET_IN_SYSTM_H_
+
+#ifndef DRIVERKIT
#include <sys/appleapiopts.h>
+#endif /* DRIVERKIT */
+
#include <sys/_types.h>
/*
const static int icmp_datalen = 8;
#if ICMP_BANDLIM
-
/* Default values in case CONFIG_ICMP_BANDLIM is not defined in the MASTER file */
#ifndef CONFIG_ICMP_BANDLIM
#if XNU_TARGET_OS_OSX
static int icmplim = CONFIG_ICMP_BANDLIM;
SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RW | CTLFLAG_LOCKED,
&icmplim, 0, "");
-
#else /* ICMP_BANDLIM */
-
static int icmplim = -1;
SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RD | CTLFLAG_LOCKED,
&icmplim, 0, "");
-
#endif /* ICMP_BANDLIM */
+static int icmplim_random_incr = CONFIG_ICMP_BANDLIM;
+SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM_INCR, icmplim_random_incr, CTLFLAG_RW | CTLFLAG_LOCKED,
+ &icmplim_random_incr, 0, "");
+
/*
* ICMP broadcast echo sysctl
*/
/*
* badport_bandlim() - check for ICMP bandwidth limit
- *
- * Return 0 if it is ok to send an ICMP error response, -1 if we have
- * hit our bandwidth limit and it is not ok.
- *
- * If icmplim is <= 0, the feature is disabled and 0 is returned.
+ * Returns false when it is ok to send ICMP error and true to limit sending
+ * of ICMP error.
*
* For now we separate the TCP and UDP subsystems w/ different 'which'
* values. We may eventually remove this separation (and simplify the
static int lpackets[BANDLIM_MAX + 1];
uint64_t time;
uint64_t secs;
-
+ static boolean_t is_initialized = FALSE;
+ static int icmplim_random;
const char *bandlimittype[] = {
"Limiting icmp unreach response",
"Limiting icmp ping response",
return false;
}
+ if (is_initialized == FALSE) {
+ if (icmplim_random_incr > 0 &&
+ icmplim <= INT32_MAX - (icmplim_random_incr + 1)) {
+ icmplim_random = icmplim + (random() % icmplim_random_incr) + 1;
+ }
+ is_initialized = TRUE;
+ }
+
time = net_uptime();
secs = time - lticks[which];
*/
if (secs > 1) {
- if (lpackets[which] > icmplim) {
+ if (lpackets[which] > icmplim_random) {
printf("%s from %d to %d packets per second\n",
bandlimittype[which],
lpackets[which],
- icmplim
+ icmplim_random
);
}
lticks[which] = time;
/*
* bump packet count
*/
-
- if (++lpackets[which] > icmplim) {
- return true;
+ if (++lpackets[which] > icmplim_random) {
+ /*
+ * After hitting the randomized limit, we further randomize the
+ * behavior of how we apply rate limitation.
+ * We rate limit based on probability that increases with the
+ * increase in lpackets[which] count.
+ */
+ if ((random() % (lpackets[which] - icmplim_random)) != 0) {
+ return true;
+ }
}
return false;
}
struct tcpcb *tp = sototcpcb(mpts->mpts_socket);
int fail_thresh = mptcp_fail_thresh;
- if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER) {
+ if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER || mpte->mpte_svctype == MPTCP_SVCTYPE_PURE_HANDOVER) {
fail_thresh *= 2;
}
* Second Step: Among best and second_best. Choose the one that is
* most appropriate for this particular service-type.
*/
- if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER) {
+ if (mpte->mpte_svctype == MPTCP_SVCTYPE_PURE_HANDOVER) {
+ return mptcp_return_subflow(best);
+ } else if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER) {
/*
* Only handover if Symptoms tells us to do so.
*/
}
}
-void
-mptcp_ask_for_nat64(struct ifnet *ifp)
-{
- in6_post_msg(ifp, KEV_INET6_REQUEST_NAT64_PREFIX, NULL, NULL);
-
- os_log_info(mptcp_log_handle,
- "%s: asked for NAT64-prefix on %s\n", __func__,
- ifp->if_name);
-}
-
static void
mptcp_reset_itfinfo(struct mpt_itf_info *info)
{
}
dst = mptcp_get_session_dst(mpte, has_v6, has_v4);
- if (dst && (dst->sa_family == AF_INET || dst->sa_family == 0) &&
+ if (dst && dst->sa_family == AF_INET &&
has_v6 && !has_nat64 && !has_v4) {
if (found_slot) {
mpte->mpte_itfinfo[slot_index].ifindex = ifindex;
mpte->mpte_itfinfo[slot_index].has_v6_conn = has_v6;
mpte->mpte_itfinfo[slot_index].has_nat64_conn = has_nat64;
}
- mptcp_ask_for_nat64(ifp);
goto out;
}
if (tp->t_mpflags & TMPF_BACKUP_PATH) {
mpjoin_req.mmjo_subtype_bkp |= MPTCP_BACKUP;
} else if (inp->inp_boundifp && IFNET_IS_CELLULAR(inp->inp_boundifp) &&
- mpts->mpts_mpte->mpte_svctype < MPTCP_SVCTYPE_AGGREGATE) {
+ mptcp_subflows_need_backup_flag(mpts->mpts_mpte)) {
mpjoin_req.mmjo_subtype_bkp |= MPTCP_BACKUP;
tp->t_mpflags |= TMPF_BACKUP_PATH;
} else {
if (((struct mptcp_mpcapable_opt_common *)cp)->mmco_flags &
MPCAP_UNICAST_IPBIT) {
mpte->mpte_flags |= MPTE_UNICAST_IP;
+
+ /* We need an explicit signal for the addresses - zero the existing ones */
+ memset(&mpte->mpte_sub_dst_v4, 0, sizeof(mpte->mpte_sub_dst_v4));
+ memset(&mpte->mpte_sub_dst_v6, 0, sizeof(mpte->mpte_sub_dst_v6));
}
rsp = (struct mptcp_mpcapable_opt_rsp *)cp;
if (dss_rsp->mdss_subtype == MPO_DSS) {
if (dss_rsp->mdss_flags & MDSS_F) {
tp->t_rcv_map.mpt_dfin = 1;
+ } else {
+ tp->t_rcv_map.mpt_dfin = 0;
}
mptcp_do_dss_opt_meat(cp, tp, th);
}
if (addr_opt->maddr_len == MPTCP_ADD_ADDR_OPT_LEN_V4) {
- struct sockaddr_in *dst = &mpte->mpte_dst_unicast_v4;
+ struct sockaddr_in *dst = &mpte->mpte_sub_dst_v4;
struct in_addr *addr = &addr_opt->maddr_u.maddr_addrv4;
in_addr_t haddr = ntohl(addr->s_addr);
dst->sin_port = mpte->__mpte_dst_v4.sin_port;
dst->sin_addr.s_addr = addr->s_addr;
} else {
- struct sockaddr_in6 *dst = &mpte->mpte_dst_unicast_v6;
+ struct sockaddr_in6 *dst = &mpte->mpte_sub_dst_v6;
struct in6_addr *addr = &addr_opt->maddr_u.maddr_addrv6;
if (IN6_IS_ADDR_LINKLOCAL(addr) ||
struct sockaddr *
mptcp_get_session_dst(struct mptses *mpte, boolean_t ipv6, boolean_t ipv4)
{
- if (!(mpte->mpte_flags & MPTE_UNICAST_IP)) {
- return &mpte->mpte_dst;
+ if (ipv6 && mpte->mpte_sub_dst_v6.sin6_family == AF_INET6) {
+ return (struct sockaddr *)&mpte->mpte_sub_dst_v6;
}
- if (ipv6 && mpte->mpte_dst_unicast_v6.sin6_family == AF_INET6) {
- return (struct sockaddr *)&mpte->mpte_dst_unicast_v6;
- }
-
- if (ipv4 && mpte->mpte_dst_unicast_v4.sin_family == AF_INET) {
- return (struct sockaddr *)&mpte->mpte_dst_unicast_v4;
+ if (ipv4 && mpte->mpte_sub_dst_v4.sin_family == AF_INET) {
+ return (struct sockaddr *)&mpte->mpte_sub_dst_v4;
}
/* The interface has neither IPv4 nor IPv6 routes. Give our best guess,
* meaning we prefer IPv6 over IPv4.
*/
- if (mpte->mpte_dst_unicast_v6.sin6_family == AF_INET6) {
- return (struct sockaddr *)&mpte->mpte_dst_unicast_v6;
+ if (mpte->mpte_sub_dst_v6.sin6_family == AF_INET6) {
+ return (struct sockaddr *)&mpte->mpte_sub_dst_v6;
}
- if (mpte->mpte_dst_unicast_v4.sin_family == AF_INET) {
- return (struct sockaddr *)&mpte->mpte_dst_unicast_v4;
+ if (mpte->mpte_sub_dst_v4.sin_family == AF_INET) {
+ return (struct sockaddr *)&mpte->mpte_sub_dst_v4;
}
/* We don't yet have a unicast IP */
return;
}
+ /* Just to see if we have an IP-address available */
if (mptcp_get_session_dst(mpte, false, false) == NULL) {
return;
}
if (IFNET_IS_CELLULAR(ifp)) {
cellular_viable = TRUE;
+
+ if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER ||
+ mpte->mpte_svctype == MPTCP_SVCTYPE_PURE_HANDOVER) {
+ if (!mptcp_is_wifi_unusable_for_session(mpte)) {
+ continue;
+ }
+ }
}
TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
need_to_ask_symptoms = TRUE;
}
- if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER) {
+ if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER || mpte->mpte_svctype == MPTCP_SVCTYPE_PURE_HANDOVER) {
os_log(mptcp_log_handle,
- "%s - %lx: handover: cell %u wifi-state %d flags %#x rxt %u first-party %u sb_cc %u ifindex %u this %u rtt %u rttvar %u rto %u\n",
+ "%s - %lx: %s: cell %u wifi-state %d flags %#x rxt %u first-party %u sb_cc %u ifindex %u this %u rtt %u rttvar %u rto %u\n",
__func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
+ mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER ? "handover" : "pure-handover",
IFNET_IS_CELLULAR(subifp),
mptcp_is_wifi_unusable_for_session(mpte),
mpts->mpts_flags,
dst = (struct sockaddr *)&nat64pre;
}
- /* Initial subflow started on a NAT64'd address? */
- if (!(mpte->mpte_flags & MPTE_UNICAST_IP) &&
- mpte->mpte_dst.sa_family == AF_INET6 &&
- mpte->mpte_dst_v4_nat64.sin_family == AF_INET) {
- dst = (struct sockaddr *)&mpte->mpte_dst_v4_nat64;
- }
-
if (dst->sa_family == AF_INET && !info->has_v4_conn) {
continue;
}
mptcp_remove_cell_subflows(struct mptses *mpte)
{
struct mptsub *mpts, *tmpts;
- boolean_t found = false;
- TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
+ TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) {
const struct ifnet *ifp = sotoinpcb(mpts->mpts_socket)->inp_last_outifp;
- if (ifp == NULL || IFNET_IS_CELLULAR(ifp)) {
+ if (ifp == NULL || !IFNET_IS_CELLULAR(ifp)) {
continue;
}
- /* We have a functioning subflow on WiFi. No need for cell! */
- if (mpts->mpts_flags & MPTSF_CONNECTED &&
- !mptcp_subflow_disconnecting(mpts)) {
- found = true;
- }
- }
+ os_log(mptcp_log_handle, "%s - %lx: removing cell subflow\n",
+ __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
- /* Didn't found functional sub on WiFi - stay on cell */
- if (!found) {
- return;
+ soevent(mpts->mpts_socket, SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST);
}
+ return;
+}
+
+static void
+mptcp_remove_wifi_subflows(struct mptses *mpte)
+{
+ struct mptsub *mpts, *tmpts;
+
TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) {
const struct ifnet *ifp = sotoinpcb(mpts->mpts_socket)->inp_last_outifp;
- /* Only remove cellular subflows */
- if (ifp == NULL || !IFNET_IS_CELLULAR(ifp)) {
+ if (ifp == NULL || IFNET_IS_CELLULAR(ifp)) {
continue;
}
- os_log(mptcp_log_handle, "%s - %lx: removing cell subflow\n",
+ os_log(mptcp_log_handle, "%s - %lx: removing wifi subflow\n",
__func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
soevent(mpts->mpts_socket, SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST);
return;
}
-/* Returns true if it removed a subflow on cell */
+static void
+mptcp_pure_handover_subflows_remove(struct mptses *mpte)
+{
+ int wifi_unusable = mptcp_is_wifi_unusable_for_session(mpte);
+ boolean_t found_working_wifi_subflow = false;
+ boolean_t found_working_cell_subflow = false;
+
+ struct mptsub *mpts;
+
+ /*
+ * Look for a subflow that is on a non-cellular interface in connected
+ * state.
+ *
+ * In that case, remove all cellular subflows.
+ *
+ * If however there is no connected subflow
+ */
+ TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
+ const struct ifnet *ifp = sotoinpcb(mpts->mpts_socket)->inp_last_outifp;
+ struct socket *so;
+ struct tcpcb *tp;
+
+ if (ifp == NULL) {
+ continue;
+ }
+
+ so = mpts->mpts_socket;
+ tp = sototcpcb(so);
+
+ if (!(mpts->mpts_flags & MPTSF_CONNECTED) ||
+ tp->t_state != TCPS_ESTABLISHED ||
+ mptcp_subflow_disconnecting(mpts)) {
+ continue;
+ }
+
+ if (IFNET_IS_CELLULAR(ifp)) {
+ found_working_cell_subflow = true;
+ } else {
+ os_log_debug(mptcp_log_handle, "%s - %lx: rxt %u sb_cc %u unusable %d\n",
+ __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), tp->t_rxtshift, mptetoso(mpte)->so_snd.sb_cc, wifi_unusable);
+ if (!mptcp_handover_use_cellular(mpte, tp)) {
+ found_working_wifi_subflow = true;
+ }
+ }
+ }
+
+ /*
+ * Couldn't find a working subflow, let's not remove those on a cellular
+ * interface.
+ */
+ os_log_debug(mptcp_log_handle, "%s - %lx: Found Wi-Fi: %u Found Cellular %u",
+ __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
+ found_working_wifi_subflow, found_working_cell_subflow);
+ if (!found_working_wifi_subflow && wifi_unusable) {
+ if (found_working_cell_subflow) {
+ mptcp_remove_wifi_subflows(mpte);
+ }
+ return;
+ }
+
+ mptcp_remove_cell_subflows(mpte);
+}
+
static void
mptcp_handover_subflows_remove(struct mptses *mpte)
{
mptcp_targetbased_subflows_remove(struct mptses *mpte)
{
uint64_t time_now = mach_continuous_time();
+ struct mptsub *mpts;
if (mpte->mpte_time_target != 0 &&
(int64_t)(mpte->mpte_time_target - time_now) <= 0 &&
return;
}
- mptcp_remove_cell_subflows(mpte);
+ TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
+ const struct ifnet *ifp = sotoinpcb(mpts->mpts_socket)->inp_last_outifp;
+
+ if (ifp == NULL || IFNET_IS_CELLULAR(ifp)) {
+ continue;
+ }
+
+ /* We have a functioning subflow on WiFi. No need for cell! */
+ if (mpts->mpts_flags & MPTSF_CONNECTED &&
+ !mptcp_subflow_disconnecting(mpts)) {
+ mptcp_remove_cell_subflows(mpte);
+ break;
+ }
+ }
}
/*
socket_lock_assert_owned(mptetoso(mpte));
+ if (mpte->mpte_svctype == MPTCP_SVCTYPE_PURE_HANDOVER) {
+ mptcp_pure_handover_subflows_remove(mpte);
+ }
+
if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER) {
mptcp_handover_subflows_remove(mpte);
}
mptcp_sched_create_subflows(mpte);
if ((mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER ||
+ mpte->mpte_svctype == MPTCP_SVCTYPE_PURE_HANDOVER ||
mpte->mpte_svctype == MPTCP_SVCTYPE_TARGET_BASED) &&
viable != NULL) {
*viable = 1;
if (mp_so->so_flags1 & SOF1_DATA_IDEMPOTENT) {
(*so)->so_flags1 |= SOF1_DATA_IDEMPOTENT;
}
+ if (mp_so->so_flags1 & SOF1_DATA_AUTHENTICATED) {
+ (*so)->so_flags1 |= SOF1_DATA_AUTHENTICATED;
+ }
/* Inherit uuid and create the related flow. */
if (!uuid_is_null(mpsotomppcb(mp_so)->necp_client_uuid)) {
static int
mptcp_adj_rmap(struct socket *so, struct mbuf *m, int off, uint64_t dsn,
- uint32_t rseq, uint16_t dlen)
+ uint32_t rseq, uint16_t dlen, uint8_t dfin)
{
struct mptsub *mpts = sototcpcb(so)->t_mpsub;
if (m->m_pkthdr.pkt_flags & PKTF_MPTCP) {
if (off && (dsn != m->m_pkthdr.mp_dsn ||
rseq != m->m_pkthdr.mp_rseq ||
- dlen != m->m_pkthdr.mp_rlen)) {
- os_log_error(mptcp_log_handle, "%s - %lx: Received incorrect second mapping: %u - %u , %u - %u, %u - %u\n",
+ dlen != m->m_pkthdr.mp_rlen ||
+ dfin != !!(m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN))) {
+ os_log_error(mptcp_log_handle, "%s - %lx: Received incorrect second mapping: DSN: %u - %u , SSN: %u - %u, DLEN: %u - %u, DFIN: %u - %u\n",
__func__, (unsigned long)VM_KERNEL_ADDRPERM(mpts->mpts_mpte),
(uint32_t)dsn, (uint32_t)m->m_pkthdr.mp_dsn,
rseq, m->m_pkthdr.mp_rseq,
- dlen, m->m_pkthdr.mp_rlen);
+ dlen, m->m_pkthdr.mp_rlen,
+ dfin, !!(m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN));
soevent(mpts->mpts_socket, SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST);
return -1;
}
/* If mbuf is beyond right edge of the mapping, we need to split */
- if (m_pktlen(m) > dlen - off) {
- struct mbuf *new = m_split(m, dlen - off, M_DONTWAIT);
+ if (m_pktlen(m) > dlen - dfin - off) {
+ struct mbuf *new = m_split(m, dlen - dfin - off, M_DONTWAIT);
if (new == NULL) {
- os_log_error(mptcp_log_handle, "%s - %lx: m_split failed dlen %u off %d pktlen %d, killing subflow %d",
+ os_log_error(mptcp_log_handle, "%s - %lx: m_split failed dlen %u dfin %u off %d pktlen %d, killing subflow %d",
__func__, (unsigned long)VM_KERNEL_ADDRPERM(mpts->mpts_mpte),
- dlen, off, m_pktlen(m),
+ dlen, dfin, off, m_pktlen(m),
mpts->mpts_connid);
soevent(mpts->mpts_socket, SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST);
m->m_pkthdr.pkt_flags |= PKTF_MPTCP;
m->m_pkthdr.mp_dsn = dsn + off;
m->m_pkthdr.mp_rseq = rseq + off;
-
VERIFY(m_pktlen(m) < UINT16_MAX);
m->m_pkthdr.mp_rlen = (uint16_t)m_pktlen(m);
+ /* Only put the DATA_FIN-flag on the last mbuf of this mapping */
+ if (dfin) {
+ if (m->m_pkthdr.mp_dsn + m->m_pkthdr.mp_rlen < dsn + dlen - dfin) {
+ m->m_pkthdr.pkt_flags &= ~PKTF_MPTCP_DFIN;
+ } else {
+ m->m_pkthdr.pkt_flags |= PKTF_MPTCP_DFIN;
+ }
+ }
+
+
mpts->mpts_flags |= MPTSF_FULLY_ESTABLISHED;
return 0;
SBLASTMBUFCHK(&so->so_rcv, "mptcp_subflow_soreceive 1");
while (m != NULL) {
- int dlen = 0, dfin = 0, error_out = 0;
+ int dlen = 0, error_out = 0, off = 0;
+ uint8_t dfin = 0;
struct mbuf *start = m;
uint64_t dsn;
uint32_t sseq;
if (m_iter->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN) {
dfin = 1;
+ dlen--;
}
break;
if (m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN) {
dfin = 1;
+ dlen--;
}
}
/*
* Check if the full mapping is now present
*/
- if ((int)so->so_rcv.sb_cc < dlen - dfin) {
+ if ((int)so->so_rcv.sb_cc < dlen) {
if (*mp0 == NULL) {
error = EWOULDBLOCK;
}
}
/* Now, get the full mapping */
+ off = 0;
while (dlen > 0) {
- if (mptcp_adj_rmap(so, m, orig_dlen - dlen, dsn, sseq, orig_dlen)) {
+ if (mptcp_adj_rmap(so, m, off, dsn, sseq, orig_dlen, dfin)) {
error_out = 1;
error = EIO;
dlen = 0;
}
dlen -= m->m_len;
+ off += m->m_len;
sbfree(&so->so_rcv, m);
if (mp != NULL) {
*mp = NULL;
}
- if (dlen - dfin == 0) {
- dlen = 0;
- }
-
- VERIFY(dlen <= 0 || m);
+ VERIFY(dlen == 0 || m);
}
VERIFY(dlen == 0);
send_dfin = 1;
}
+ if (mp_so->so_flags & SOF_DEFUNCT) {
+ errno_t ret;
+
+ ret = sosetdefunct(NULL, so, SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL, TRUE);
+ if (ret == 0) {
+ ret = sodefunct(NULL, so, SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL);
+
+ if (ret != 0) {
+ os_log_error(mptcp_log_handle, "%s - %lx: sodefunct failed with %d\n",
+ __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), ret);
+ }
+ } else {
+ os_log_error(mptcp_log_handle, "%s - %lx: sosetdefunct failed with %d\n",
+ __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), ret);
+ }
+ }
+
if (!(so->so_state & (SS_ISDISCONNECTING | SS_ISDISCONNECTED)) &&
(so->so_state & SS_ISCONNECTED)) {
mptcplog((LOG_DEBUG, "%s: cid %d fin %d\n",
mptcp_send_dfin(so);
}
- if (mp_so->so_flags & SOF_DEFUNCT) {
- errno_t ret;
-
- ret = sosetdefunct(NULL, so, SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL, TRUE);
- if (ret == 0) {
- ret = sodefunct(NULL, so, SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL);
-
- if (ret != 0) {
- os_log_error(mptcp_log_handle, "%s - %lx: sodefunct failed with %d\n",
- __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), ret);
- }
- } else {
- os_log_error(mptcp_log_handle, "%s - %lx: sosetdefunct failed with %d\n",
- __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), ret);
- }
- } else {
- (void) soshutdownlock(so, SHUT_RD);
- (void) soshutdownlock(so, SHUT_WR);
- (void) sodisconnectlocked(so);
- }
+ (void) soshutdownlock(so, SHUT_RD);
+ (void) soshutdownlock(so, SHUT_WR);
+ (void) sodisconnectlocked(so);
}
/*
*/
error = 0;
} else {
+ /* We need to revert our change to mpts_rel_seq */
+ mpts->mpts_rel_seq -= tot_sent;
+
os_log_error(mptcp_log_handle, "%s - %lx: %u error %d len %d subflags %#x sostate %#x soerror %u hiwat %u lowat %u\n",
__func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpts->mpts_connid, error, tot_sent, so->so_flags, so->so_state, so->so_error, so->so_snd.sb_hiwat, so->so_snd.sb_lowat);
}
/* m is already fully covered by the next mbuf in the queue */
if (n->m_pkthdr.mp_dsn == m->m_pkthdr.mp_dsn &&
n->m_pkthdr.mp_rlen >= m->m_pkthdr.mp_rlen) {
- mptcplog((LOG_DEBUG, "%s fully covered with len %u\n",
- __func__, n->m_pkthdr.mp_rlen),
- MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
+ os_log(mptcp_log_handle, "%s - %lx: dsn %u dlen %u rseq %u fully covered with len %u\n",
+ __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
+ (uint32_t)m->m_pkthdr.mp_dsn, m->m_pkthdr.mp_rlen,
+ m->m_pkthdr.mp_rseq, n->m_pkthdr.mp_rlen);
goto dont_queue;
}
if (m->m_pkthdr.mp_dsn + m->m_pkthdr.mp_rlen >= n->m_pkthdr.mp_dsn + n->m_pkthdr.mp_rlen) {
struct mbuf *tmp = n->m_nextpkt;
- mptcplog((LOG_DEBUG, "%s m is covering that guy dsn %u len %u dsn %u len %u\n",
- __func__, (uint32_t)m->m_pkthdr.mp_dsn, m->m_pkthdr.mp_rlen,
- (uint32_t)n->m_pkthdr.mp_dsn, n->m_pkthdr.mp_rlen),
- MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
+ os_log(mptcp_log_handle, "%s - %lx: m (dsn %u len %u) is covering existing mbuf (dsn %u len %u)\n",
+ __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
+ (uint32_t)m->m_pkthdr.mp_dsn, m->m_pkthdr.mp_rlen,
+ (uint32_t)n->m_pkthdr.mp_dsn, n->m_pkthdr.mp_rlen);
m->m_nextpkt = NULL;
if (prev == NULL) {
if (prev) {
/* m is already fully covered by the previous mbuf in the queue */
if (prev->m_pkthdr.mp_dsn + prev->m_pkthdr.mp_rlen >= m->m_pkthdr.mp_dsn + m->m_pkthdr.len) {
- mptcplog((LOG_DEBUG, "%s prev covers us from %u with len %u\n",
- __func__, (uint32_t)prev->m_pkthdr.mp_dsn, prev->m_pkthdr.mp_rlen),
- MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
+ os_log(mptcp_log_handle, "%s - %lx: prev (dsn %u len %u) covers us (dsn %u len %u)\n",
+ __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
+ (uint32_t)prev->m_pkthdr.mp_dsn, prev->m_pkthdr.mp_rlen,
+ (uint32_t)m->m_pkthdr.mp_dsn, m->m_pkthdr.mp_rlen);
goto dont_queue;
}
}
m = sb->sb_mb;
while (m) {
struct mbuf *n = m->m_next, *orig = m;
+ bool set_reinject_flag = false;
mptcplog((LOG_DEBUG, "%s working on suna %u relseq %u iss %u len %u pktflags %#x\n",
__func__, tp->snd_una, m->m_pkthdr.mp_rseq, mpts->mpts_iss,
*/
mptcp_add_reinjectq(mpte, m);
+ set_reinject_flag = true;
orig->m_pkthdr.pkt_flags |= PKTF_MPTCP_REINJ;
next:
break;
}
- n->m_pkthdr.pkt_flags |= PKTF_MPTCP_REINJ;
+ if (set_reinject_flag) {
+ n->m_pkthdr.pkt_flags |= PKTF_MPTCP_REINJ;
+ }
n = n->m_next;
}
ifp = sotoinpcb(so)->inp_last_outifp;
if (ifnet_get_nat64prefix(ifp, nat64prefixes) == ENOENT) {
- mptcp_ask_for_nat64(ifp);
return;
}
-
for (j = 0; j < NAT64_MAX_NUM_PREFIXES; j++) {
int success;
success = mptcp_desynthesize_ipv6_addr(&mpte->__mpte_dst_v6.sin6_addr,
&nat64prefixes[j],
- &mpte->mpte_dst_v4_nat64.sin_addr);
+ &mpte->mpte_sub_dst_v4.sin_addr);
if (success) {
- mpte->mpte_dst_v4_nat64.sin_len = sizeof(mpte->mpte_dst_v4_nat64);
- mpte->mpte_dst_v4_nat64.sin_family = AF_INET;
- mpte->mpte_dst_v4_nat64.sin_port = mpte->__mpte_dst_v6.sin6_port;
+ mpte->mpte_sub_dst_v4.sin_len = sizeof(mpte->mpte_sub_dst_v4);
+ mpte->mpte_sub_dst_v4.sin_family = AF_INET;
+ mpte->mpte_sub_dst_v4.sin_port = mpte->__mpte_dst_v6.sin6_port;
break;
}
}
mptcp_notify_mpfail(so);
} else {
if (IFNET_IS_CELLULAR(inp->inp_last_outifp) &&
- mpte->mpte_svctype < MPTCP_SVCTYPE_AGGREGATE) {
+ mptcp_subflows_need_backup_flag(mpte)) {
tp->t_mpflags |= (TMPF_BACKUP_PATH | TMPF_SND_MPPRIO);
} else {
mpts->mpts_flags |= MPTSF_PREFERRED;
*/
if (IFNET_IS_CELLULAR(inp->inp_last_outifp) &&
!(tp->t_mpflags & TMPF_BACKUP_PATH) &&
- mpte->mpte_svctype < MPTCP_SVCTYPE_AGGREGATE) {
+ mptcp_subflows_need_backup_flag(mpte)) {
tp->t_mpflags |= (TMPF_BACKUP_PATH | TMPF_SND_MPPRIO);
mpts->mpts_flags &= ~MPTSF_PREFERRED;
} else {
/* Only handover- and urgency-mode are purely driven by Symptom's Wi-Fi status */
if (mpte->mpte_svctype != MPTCP_SVCTYPE_HANDOVER &&
+ mpte->mpte_svctype != MPTCP_SVCTYPE_PURE_HANDOVER &&
mpte->mpte_svctype != MPTCP_SVCTYPE_TARGET_BASED) {
goto next;
}
lck_mtx_unlock(&mtcbinfo.mppi_lock);
}
+struct mptcp_uuid_search_info {
+ uuid_t target_uuid;
+ proc_t found_proc;
+ boolean_t is_proc_found;
+};
+
+static int
+mptcp_find_proc_filter(proc_t p, void *arg)
+{
+ struct mptcp_uuid_search_info *info = (struct mptcp_uuid_search_info *)arg;
+ int found;
+
+ if (info->is_proc_found) {
+ return 0;
+ }
+
+ /*
+ * uuid_compare returns 0 if the uuids are matching, but the proc-filter
+ * expects != 0 for a matching filter.
+ */
+ found = uuid_compare(p->p_uuid, info->target_uuid) == 0;
+ if (found) {
+ info->is_proc_found = true;
+ }
+
+ return found;
+}
+
+static int
+mptcp_find_proc_callout(proc_t p, void * arg)
+{
+ struct mptcp_uuid_search_info *info = (struct mptcp_uuid_search_info *)arg;
+
+ if (uuid_compare(p->p_uuid, info->target_uuid) == 0) {
+ info->found_proc = p;
+ return PROC_CLAIMED_DONE;
+ }
+
+ return PROC_RETURNED;
+}
+
+static proc_t
+mptcp_find_proc(const uuid_t uuid)
+{
+ struct mptcp_uuid_search_info info;
+
+ uuid_copy(info.target_uuid, uuid);
+ info.found_proc = PROC_NULL;
+ info.is_proc_found = false;
+
+ proc_iterate(PROC_ALLPROCLIST, mptcp_find_proc_callout, &info,
+ mptcp_find_proc_filter, &info);
+
+ return info.found_proc;
+}
+
void
mptcp_ask_symptoms(struct mptses *mpte)
{
struct mptcp_symptoms_ask_uuid ask;
struct socket *mp_so;
- struct proc *p;
+ struct proc *p = PROC_NULL;
int pid, prio, err;
if (mptcp_kern_skt_unit == 0) {
mp_so = mptetoso(mpte);
if (mp_so->so_flags & SOF_DELEGATED) {
- pid = mp_so->e_pid;
- } else {
- pid = mp_so->last_pid;
- }
+ if (mpte->mpte_epid != 0) {
+ p = proc_find(mpte->mpte_epid);
+ if (p != PROC_NULL) {
+ /* We found a pid, check its UUID */
+ if (uuid_compare(mp_so->e_uuid, p->p_uuid)) {
+ /* It's not the same - we need to look for the real proc */
+ proc_rele(p);
+ p = PROC_NULL;
+ }
+ }
+ }
- p = proc_find(pid);
- if (p == PROC_NULL) {
- os_log_error(mptcp_log_handle, "%s - %lx: Couldn't find proc for pid %u\n",
- __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), pid);
- return;
- }
+ if (p == PROC_NULL) {
+ p = mptcp_find_proc(mp_so->e_uuid);
+ if (p == PROC_NULL) {
+ uuid_string_t uuid_string;
+ uuid_unparse(mp_so->e_uuid, uuid_string);
- ask.cmd = MPTCP_SYMPTOMS_ASK_UUID;
+ os_log_error(mptcp_log_handle, "%s - %lx: Couldn't find proc for uuid %s\n",
+ __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), uuid_string);
- if (mp_so->so_flags & SOF_DELEGATED) {
+ return;
+ }
+ mpte->mpte_epid = proc_pid(p);
+ }
+
+ pid = mpte->mpte_epid;
uuid_copy(ask.uuid, mp_so->e_uuid);
} else {
+ pid = mp_so->last_pid;
+
+ p = proc_find(pid);
+ if (p == PROC_NULL) {
+ os_log_error(mptcp_log_handle, "%s - %lx: Couldn't find proc for pid %u\n",
+ __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), pid);
+ return;
+ }
+
uuid_copy(ask.uuid, mp_so->last_uuid);
}
+
+ ask.cmd = MPTCP_SYMPTOMS_ASK_UUID;
+
prio = proc_get_effective_task_policy(proc_task(p), TASK_POLICY_ROLE);
if (prio == TASK_BACKGROUND_APPLICATION || prio == TASK_NONUI_APPLICATION ||
SYSCTL_INT(_net_inet_mptcp, OID_AUTO, allow_aggregate, CTLFLAG_RW | CTLFLAG_LOCKED,
&mptcp_developer_mode, 0, "Allow the Multipath aggregation mode");
+int mptcp_no_first_party = 0;
+SYSCTL_INT(_net_inet_mptcp, OID_AUTO, no_first_party, CTLFLAG_RW | CTLFLAG_LOCKED,
+ &mptcp_no_first_party, 0, "Do not do first-party app exemptions");
+
static unsigned long mptcp_expected_progress_headstart = 5000;
SYSCTL_ULONG(_net_inet_mptcp, OID_AUTO, expected_progress_headstart, CTLFLAG_RW | CTLFLAG_LOCKED,
&mptcp_expected_progress_headstart, "Headstart to give MPTCP before meeting the progress deadline");
{
struct mptses *mpte = mpsotompte(mp_so);
+ if (mptcp_no_first_party) {
+ return 0;
+ }
+
/* First, check for mptcp_extended without delegation */
if (soopt_cred_check(mp_so, PRIV_NET_RESTRICTED_MULTIPATH_EXTENDED, TRUE, FALSE) == 0) {
/*
if ((mp_so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING)) == 0) {
memcpy(&mpte->mpte_u_dst, dst, dst->sa_len);
+
+ if (dst->sa_family == AF_INET) {
+ memcpy(&mpte->mpte_sub_dst_v4, dst, dst->sa_len);
+ } else {
+ memcpy(&mpte->mpte_sub_dst_v6, dst, dst->sa_len);
+ }
}
if (src) {
struct socket *, mp_so, struct mptcb *, mp_tp);
/* if we're not detached, go thru socket state checks */
- if (!(mp_so->so_flags & SOF_PCBCLEARING)) {
+ if (!(mp_so->so_flags & SOF_PCBCLEARING) && !(mp_so->so_flags & SOF_DEFUNCT)) {
if (!(mp_so->so_state & (SS_ISCONNECTED |
SS_ISCONNECTING))) {
error = ENOTCONN;
struct mptcb *mp_tp = mpte->mpte_mptcb;
struct socket *mp_so = mptetoso(mpte);
- if (mp_tp->mpt_state == MPTCPS_CLOSED) {
+ if (mp_tp->mpt_state == MPTCPS_CLOSED || mp_tp->mpt_state == MPTCPS_TERMINATE) {
mpte = mptcp_close(mpte, mp_tp);
} else if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_2) {
soisdisconnected(mp_so);
mptcp_close_fsm(mp_tp, MPCE_CLOSE);
/* Not everything has been acknowledged - don't close the subflows! */
- if (mp_tp->mpt_sndnxt + 1 != mp_tp->mpt_sndmax) {
+ if (mp_tp->mpt_state != MPTCPS_TERMINATE &&
+ mp_tp->mpt_sndnxt + 1 != mp_tp->mpt_sndmax) {
return mpte;
}
case PERSIST_TIMEOUT:
case TCP_ADAPTIVE_READ_TIMEOUT:
case TCP_ADAPTIVE_WRITE_TIMEOUT:
+ case TCP_FASTOPEN_FORCE_ENABLE:
/* eligible; record it */
break;
case TCP_NOTSENT_LOWAT:
case TCP_RXT_CONNDROPTIME:
case TCP_ADAPTIVE_READ_TIMEOUT:
case TCP_ADAPTIVE_WRITE_TIMEOUT:
+ case TCP_FASTOPEN_FORCE_ENABLE:
{
struct mptopt *mpo = mptcp_sopt_find(mpte, sopt);
return "ADAPTIVE_READ_TIMEOUT";
case TCP_ADAPTIVE_WRITE_TIMEOUT:
return "ADAPTIVE_WRITE_TIMEOUT";
+ case TCP_FASTOPEN_FORCE_ENABLE:
+ return "TCP_FASTOPEN_FORCE_ENABLE";
case MPTCP_SERVICE_TYPE:
return "MPTCP_SERVICE_TYPE";
case MPTCP_ALTERNATE_PORT:
#define __mpte_dst_v4 mpte_u_dst._mpte_dst_v4
#define __mpte_dst_v6 mpte_u_dst._mpte_dst_v6
- struct sockaddr_in mpte_dst_v4_nat64;
-
- struct sockaddr_in mpte_dst_unicast_v4;
- struct sockaddr_in6 mpte_dst_unicast_v6;
+ struct sockaddr_in mpte_sub_dst_v4;
+ struct sockaddr_in6 mpte_sub_dst_v6;
uint16_t mpte_alternate_port; /* Alternate port for subflow establishment (network-byte-order) */
return MIN(cwnd, sbspace(&so->so_snd));
}
+static inline bool
+mptcp_subflows_need_backup_flag(struct mptses *mpte)
+{
+ return mpte->mpte_svctype < MPTCP_SVCTYPE_AGGREGATE ||
+ mpte->mpte_svctype == MPTCP_SVCTYPE_PURE_HANDOVER;
+}
/*
* MPTCP socket options
extern void mptcp_control_register(void);
extern int mptcp_is_wifi_unusable_for_session(struct mptses *mpte);
extern boolean_t symptoms_is_wifi_lossy(void);
-extern void mptcp_ask_for_nat64(struct ifnet *ifp);
extern void mptcp_session_necp_cb(void *, int, uint32_t, uint32_t, bool *);
extern struct sockaddr *mptcp_get_session_dst(struct mptses *mpte,
boolean_t has_v6, boolean_t has_v4);
if ((so->so_state & SS_PRIV) == 0) {
return EPERM;
}
+ if (proto > UINT8_MAX) {
+ return EINVAL;
+ }
error = soreserve(so, rip_sendspace, rip_recvspace);
if (error) {
#define MPTCP_SVCTYPE_INTERACTIVE 1
#define MPTCP_SVCTYPE_AGGREGATE 2
#define MPTCP_SVCTYPE_TARGET_BASED 3
-#define MPTCP_SVCTYPE_MAX 4
+#define MPTCP_SVCTYPE_PURE_HANDOVER 4
+#define MPTCP_SVCTYPE_MAX 5
+
/*
* Specify minimum time in seconds before which an established
* TCP connection will not be dropped when there is no response from the
#define TCPI_FLAG_STREAMING_ON 0x02 /* Streaming detection on */
struct tcp_conn_status {
- unsigned int probe_activated : 1;
- unsigned int write_probe_failed : 1;
- unsigned int read_probe_failed : 1;
- unsigned int conn_probe_failed : 1;
+ union {
+ struct {
+ unsigned int probe_activated : 1;
+ unsigned int write_probe_failed : 1;
+ unsigned int read_probe_failed : 1;
+ unsigned int conn_probe_failed : 1;
+ };
+ uint32_t pad_field;
+ };
};
/*
goto dropwithreset;
}
+ /* Now that we found the tcpcb, we can adjust the TCP timestamp */
+ if (to.to_flags & TOF_TS) {
+ to.to_tsecr -= tp->t_ts_offset;
+ }
+
TCP_LOG_TH_FLAGS(TCP_LOG_HDR, th, tp, false, ifp);
if (tp->t_state == TCPS_CLOSED) {
* be TH_NEEDSYN.
*/
if (tp->t_state == TCPS_ESTABLISHED &&
- (thflags & (TH_SYN | TH_FIN | TH_RST | TH_URG | TH_ACK | TH_ECE | TH_CWR)) == TH_ACK &&
+ !(so->so_state & SS_CANTRCVMORE) &&
+ (thflags & TH_FLAGS) == TH_ACK &&
((tp->t_flags & TF_NEEDFIN) == 0) &&
((to.to_flags & TOF_TS) == 0 ||
TSTMP_GEQ(to.to_tsval, tp->ts_recent)) &&
so_recv_data_stat(so, m, 0);
m_adj(m, drop_hdrlen); /* delayed header drop */
- /*
- * If message delivery (SOF_ENABLE_MSGS) is enabled on
- * this socket, deliver the packet received as an
- * in-order message with sequence number attached to it.
- */
if (isipv6) {
memcpy(&saved_hdr, ip6, sizeof(struct ip6_hdr));
ip6 = (struct ip6_hdr *)&saved_hdr[0];
close_it = TRUE;
}
+ if (so->so_state & SS_CANTRCVMORE) {
+ TCP_LOG_DROP_PCB(TCP_LOG_HDR, th, tp, false, "SS_CANTRCVMORE");
+ close_it = TRUE;
+ }
+
if (close_it) {
tp = tcp_close(tp);
tcpstat.tcps_rcvafterclose++;
(m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN) &&
(m->m_pkthdr.pkt_flags & PKTF_MPTCP)) {
m_adj(m, drop_hdrlen); /* delayed header drop */
+ /*
+ * 0-length DATA_FIN. The rlen is actually 0. We special-case the
+ * byte consumed by the dfin in mptcp_input and mptcp_reass_present
+ */
+ m->m_pkthdr.mp_rlen = 0;
mptcp_input(tptomptp(tp)->mpt_mpte, m);
tp->t_flags |= TF_ACKNOW;
} else {
bcopy((char *)cp + 6,
(char *)&to->to_tsecr, sizeof(to->to_tsecr));
NTOHL(to->to_tsecr);
+ to->to_tsecr -= tp->t_ts_offset;
/* Re-enable sending Timestamps if we received them */
if (!(tp->t_flags & TF_REQ_TSTMP)) {
tp->t_flags |= TF_REQ_TSTMP;
CTLFLAG_RW | CTLFLAG_LOCKED, int, tcp_ack_compression_rate, TCP_COMP_CHANGE_RATE,
"Rate at which we force sending new ACKs (in ms)");
+SYSCTL_SKMEM_TCP_INT(OID_AUTO, randomize_timestamps,
+ CTLFLAG_RW | CTLFLAG_LOCKED, int, tcp_randomize_timestamps, 1,
+ "Randomize TCP timestamps to prevent tracking (on: 1, off: 0)");
+
static int
sysctl_change_ecn_setting SYSCTL_HANDLER_ARGS
{
/* Form timestamp option as shown in appendix A of RFC 1323. */
*lp++ = htonl(TCPOPT_TSTAMP_HDR);
- *lp++ = htonl(tcp_now);
+ *lp++ = htonl(tcp_now + tp->t_ts_offset);
*lp = htonl(tp->ts_recent);
optlen += TCPOLEN_TSTAMP_APPA;
}
}
/*
* Unless this is due to interface restriction policy,
- * treat EHOSTUNREACH/ENETDOWN as a soft error.
+ * treat EHOSTUNREACH/ENETDOWN/EADDRNOTAVAIL as a soft error.
*/
- if ((error == EHOSTUNREACH || error == ENETDOWN) &&
+ if ((error == EHOSTUNREACH || error == ENETDOWN || error == EADDRNOTAVAIL) &&
TCPS_HAVERCVDSYN(tp->t_state) &&
!inp_restricted_send(inp, inp->inp_last_outifp)) {
tp->t_softerror = error;
struct tcpcb *tp;
struct socket *so = inp->inp_socket;
int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
+ uint32_t random_32;
calculate_tcp_clock();
tp->t_twentry.tqe_next = NULL;
tp->t_twentry.tqe_prev = NULL;
+ read_frandom(&random_32, sizeof(random_32));
if (__probable(tcp_do_ack_compression)) {
- read_frandom(&tp->t_comp_gencnt, sizeof(tp->t_comp_gencnt));
+ tp->t_comp_gencnt = random_32;
if (tp->t_comp_gencnt <= TCP_ACK_COMPRESSION_DUMMY) {
tp->t_comp_gencnt = TCP_ACK_COMPRESSION_DUMMY + 1;
}
tp->t_comp_lastinc = tcp_now;
}
+ if (__probable(tcp_randomize_timestamps)) {
+ tp->t_ts_offset = random_32;
+ }
+
/*
* IPv4 TTL initialization is necessary for an IPv6 socket as well,
* because the socket may be bound to an IPv6 wildcard address,
(so->so_flags & SOF_MP_SUBFLOW)) {
struct mptses *mpte = tptomptp(tp)->mpt_mpte;
- if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER) {
+ if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER ||
+ mpte->mpte_svctype == MPTCP_SVCTYPE_PURE_HANDOVER) {
mptcp_check_subflows_and_add(mpte);
}
}
struct sockaddr_in sin;
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
- return EINVAL;
+ error = EINVAL;
+ goto out;
}
in6_sin6_2_sin(&sin, sin6p);
+ /*
+ * Must disallow TCP ``connections'' to multicast addresses.
+ */
+ if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
+ error = EAFNOSUPPORT;
+ goto out;
+ }
inp->inp_vflag |= INP_IPV4;
inp->inp_vflag &= ~INP_IPV6;
if ((error = tcp_connect(tp, (struct sockaddr *)&sin, p)) != 0) {
#include <netinet/in_pcb.h>
#include <netinet/tcp.h>
#include <netinet/tcp_timer.h>
+#if !KERNEL
+#include <TargetConditionals.h>
+#endif
#if defined(__LP64__)
#define _TCPCB_PTR(x) u_int32_t
uint32_t t_comp_lastinc; /* Last time the gen-count was changed - should change every TCP_COMP_CHANGE_RATE ms */
#define TCP_COMP_CHANGE_RATE 5 /* Intervals at which we change the gencnt. Means that worst-case we send one ACK every TCP_COMP_CHANGE_RATE ms */
+ uint32_t t_ts_offset; /* Randomized timestamp offset to hide on-the-wire timestamp */
+
uuid_t t_fsw_uuid;
uuid_t t_flow_uuid;
};
u_quad_t xt_alignment_hack;
};
-#if XNU_TARGET_OS_OSX || !(TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR)
+#if XNU_TARGET_OS_OSX || KERNEL || !(TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR)
struct xtcpcb64 {
u_int32_t xt_len;
u_quad_t xt_alignment_hack;
};
-#endif /* XNU_TARGET_OS_OSX || !(TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR) */
+#endif /* XNU_TARGET_OS_OSX || KERNEL || !(TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR) */
#ifdef PRIVATE
extern uint32_t tcp_autorcvbuf_max;
extern int tcp_recv_bg;
extern int tcp_do_ack_compression;
+extern int tcp_randomize_timestamps;
/*
* Dummy value used for when there is no flow and we want to ensure that compression
* can happen.
struct inpcb *inp;
inp = sotoinpcb(so);
- if (inp == NULL
-#if NECP
- || (necp_socket_should_use_flow_divert(inp))
-#endif /* NECP */
- ) {
- return inp == NULL ? EINVAL : EPROTOTYPE;
+ if (inp == NULL) {
+ return EINVAL;
}
if (inp->inp_faddr.s_addr == INADDR_ANY) {
return ENOTCONN;
extern struct inpcbhead ripcb;
extern int icmp6errppslim;
+extern int icmp6errppslim_random_incr;
extern int icmp6rappslim;
static int icmp6errpps_count = 0;
static int icmp6rapps_count = 0;
if (!icmp6_initialized) {
icmp6_initialized = 1;
mld_init();
+ if (icmp6errppslim >= 0 &&
+ icmp6errppslim_random_incr > 0 &&
+ icmp6errppslim <= INT32_MAX - (icmp6errppslim_random_incr + 1)) {
+ icmp6errppslim += (random() % icmp6errppslim_random_incr) + 1;
+ }
}
}
}
} else if (!ppsratecheck(&icmp6errppslim_last, &icmp6errpps_count,
icmp6errppslim)) {
- /* The packet is subject to rate limit */
- ret++;
+ /*
+ * We add some randomness here to still generate ICMPv6 error
+ * post icmp6errppslim limit with a probability that goes down
+ * with increased value of icmp6errpps_count.
+ */
+ if (icmp6errpps_count > 0 && icmp6errppslim > 0 &&
+ icmp6errpps_count > icmp6errppslim &&
+ (random() % (icmp6errpps_count - icmp6errppslim)) != 0) {
+ /* The packet is subject to rate limit */
+ ret++;
+ }
}
return ret;
/* ICMPV6 parameters */
int icmp6_rediraccept = 1; /* accept and process redirects */
int icmp6_redirtimeout = 10 * 60; /* 10 minutes */
-int icmp6errppslim = 500; /* 500 packets per second */
+uint32_t icmp6errppslim = 500; /* 500 packets per second */
+uint32_t icmp6errppslim_random_incr = 500; /* We further randomize icmp6errppslim
+ * with this during icmpv6 initialization*/
int icmp6rappslim = 10; /* 10 packets per second */
-int icmp6_nodeinfo = 3; /* enable/disable NI response */
+int icmp6_nodeinfo = 0; /* enable/disable NI response */
/* UDP on IP6 parameters */
int udp6_sendspace = 9216; /* really max datagram size */
nodeinfo, CTLFLAG_RW | CTLFLAG_LOCKED, &icmp6_nodeinfo, 0, "");
SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ERRPPSLIMIT,
errppslimit, CTLFLAG_RW | CTLFLAG_LOCKED, &icmp6errppslim, 0, "");
+SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ERRPPSLIMIT_RANDOM_INCR,
+ errppslimit_random_incr, CTLFLAG_RW | CTLFLAG_LOCKED, &icmp6errppslim_random_incr, 0, "");
SYSCTL_INT(_net_inet6_icmp6, OID_AUTO,
rappslimit, CTLFLAG_RW | CTLFLAG_LOCKED, &icmp6rappslim, 0, "");
SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DEBUG,
#include <libkern/OSAtomic.h>
#include "gss_krb5_mech.h"
-lck_grp_t *gss_krb5_mech_grp;
+LCK_GRP_DECLARE(gss_krb5_mech_grp, "gss_krb5_mech");
typedef struct crypt_walker_ctx {
size_t length;
}
return;
}
- gss_krb5_mech_grp = lck_grp_alloc_init("gss_krb5_mech", LCK_GRP_ATTR_NULL);
gss_krb5_mech_initted = GSS_KRB5_INITIALIZED;
}
if (ikey) {
if (!(ctx->flags & CRYPTO_KS_ALLOCED)) {
- lck_mtx_lock(ctx->lock);
+ lck_mtx_lock(&ctx->lock);
if (!(ctx->flags & CRYPTO_KS_ALLOCED)) {
cc_key_schedule_create(ctx);
}
ctx->flags |= CRYPTO_KS_ALLOCED;
- lck_mtx_unlock(ctx->lock);
+ lck_mtx_unlock(&ctx->lock);
}
key2use = ctx->ks.ikey[kdx];
} else {
if (ikey) {
if (!(ctx->flags & CRYPTO_KS_ALLOCED)) {
- lck_mtx_lock(ctx->lock);
+ lck_mtx_lock(&ctx->lock);
if (!(ctx->flags & CRYPTO_KS_ALLOCED)) {
cc_key_schedule_create(ctx);
}
ctx->flags |= CRYPTO_KS_ALLOCED;
- lck_mtx_unlock(ctx->lock);
+ lck_mtx_unlock(&ctx->lock);
}
key2use = ctx->ks.ikey[kdx];
} else {
int error;
if (!(ctx->flags & CRYPTO_KS_ALLOCED)) {
- lck_mtx_lock(ctx->lock);
+ lck_mtx_lock(&ctx->lock);
if (!(ctx->flags & CRYPTO_KS_ALLOCED)) {
cc_key_schedule_create(ctx);
}
ctx->flags |= CRYPTO_KS_ALLOCED;
- lck_mtx_unlock(ctx->lock);
+ lck_mtx_unlock(&ctx->lock);
}
if (!ks) {
ks = encrypt ? ctx->ks.enc : ctx->ks.dec;
void
gss_crypto_ctx_free(crypto_ctx_t ctx)
{
+ lck_mtx_destroy(&ctx->lock, &gss_krb5_mech_grp);
+
ctx->ks.ikey[GSS_SND] = NULL;
if (ctx->ks.ikey[GSS_RCV] && ctx->key != ctx->ks.ikey[GSS_RCV]) {
cc_clear(ctx->keylen, ctx->ks.ikey[GSS_RCV]);
return ENOTSUP;
}
- ctx->lock = lck_mtx_alloc_init(gss_krb5_mech_grp, LCK_ATTR_NULL);
+ lck_mtx_init(&ctx->lock, &gss_krb5_mech_grp, LCK_ATTR_NULL);
return 0;
}
uint32_t etype;
uint32_t flags;
size_t mpad; /* Message padding */
- lck_mtx_t *lock;
+ lck_mtx_t lock;
lucid_context_t gss_ctx; /* Back pointer to lucid context */
void *key; /* Points to session key from lucid context */
const struct ccdigest_info *di;
extern uint32_t nfsrv_user_stat_node_count; /* current count of user stat nodes */
extern uint32_t nfsrv_user_stat_max_idle_sec; /* idle seconds (node no longer considered active) */
extern uint32_t nfsrv_user_stat_max_nodes; /* active user list size limit */
-extern lck_grp_t *nfsrv_active_user_mutex_group;
+extern lck_grp_t nfsrv_active_user_mutex_group;
/* An active user node represented in the kernel */
struct nfs_user_stat_node {
#define NFSRVFMODHASH(vp) (((uintptr_t) vp) & nfsrv_fmod_hash)
extern LIST_HEAD(nfsrv_fmod_hashhead, nfsrv_fmod) * nfsrv_fmod_hashtbl;
extern u_long nfsrv_fmod_hash;
-extern lck_mtx_t *nfsrv_fmod_mutex;
+extern lck_mtx_t nfsrv_fmod_mutex;
extern int nfsrv_fmod_pending, nfsrv_fsevents_enabled;
#endif
*/
TAILQ_HEAD(nfs_reqqhead, nfsreq);
extern struct nfs_reqqhead nfs_reqq;
-extern lck_grp_t *nfs_request_grp;
+extern lck_grp_t nfs_request_grp;
#define R_XID32(x) ((x) & 0xffffffff)
nfsrv_sockwait, nfsrv_sockwork;
/* lock groups for nfsrv_sock's */
-extern lck_grp_t *nfsrv_slp_rwlock_group;
-extern lck_grp_t *nfsrv_slp_mutex_group;
+extern lck_grp_t nfsrv_slp_rwlock_group;
+extern lck_grp_t nfsrv_slp_mutex_group;
/*
* One of these structures is allocated for each nfsd.
vfs_context_t, mbuf_t *);
/* mutex for nfs server */
-extern lck_mtx_t *nfsd_mutex;
+extern lck_mtx_t nfsd_mutex;
extern int nfsd_thread_count, nfsd_thread_max;
/* request list mutex */
-extern lck_mtx_t *nfs_request_mutex;
+extern lck_mtx_t nfs_request_mutex;
extern int nfs_request_timer_on;
/* mutex for nfs client globals */
-extern lck_mtx_t *nfs_global_mutex;
+extern lck_mtx_t nfs_global_mutex;
#if CONFIG_NFS4
/* NFSv4 callback globals */
void nfs_mbuf_init(void);
-void nfs_nhinit(void);
void nfs_nhinit_finish(void);
u_long nfs_hash(u_char *, int);
static uint8_t en0addr[6];
static uint8_t en0addr_set = 0;
- lck_mtx_lock(nfs_global_mutex);
+ lck_mtx_lock(&nfs_global_mutex);
if (!en0addr_set) {
ifnet_t interface = NULL;
error = ifnet_find_by_name("en0", &interface);
ifnet_release(interface);
}
}
- lck_mtx_unlock(nfs_global_mutex);
+ lck_mtx_unlock(&nfs_global_mutex);
MALLOC(ncip, struct nfs_client_id *, sizeof(struct nfs_client_id), M_TEMP, M_WAITOK);
if (!ncip) {
}
/* make sure the ID is unique, and add it to the sorted list */
- lck_mtx_lock(nfs_global_mutex);
+ lck_mtx_lock(&nfs_global_mutex);
TAILQ_FOREACH(ncip2, &nfsclientids, nci_link) {
if (ncip->nci_idlen > ncip2->nci_idlen) {
continue;
TAILQ_INSERT_TAIL(&nfsclientids, ncip, nci_link);
}
nmp->nm_longid = ncip;
- lck_mtx_unlock(nfs_global_mutex);
+ lck_mtx_unlock(&nfs_global_mutex);
return 0;
}
interval = 1;
}
lck_mtx_unlock(&nmp->nm_lock);
- nfs_interval_timer_start(nmp->nm_renew_timer, interval * 1000);
+
+ lck_mtx_lock(&nmp->nm_timer_lock);
+ if (nmp->nm_renew_timer) {
+ nfs_interval_timer_start(nmp->nm_renew_timer, interval * 1000);
+ }
+ lck_mtx_unlock(&nmp->nm_timer_lock);
}
/*
return NULL;
}
bzero(newnoop, sizeof(*newnoop));
- lck_mtx_init(&newnoop->noo_lock, nfs_open_grp, LCK_ATTR_NULL);
+ lck_mtx_init(&newnoop->noo_lock, &nfs_open_grp, LCK_ATTR_NULL);
newnoop->noo_mount = nmp;
kauth_cred_ref(cred);
newnoop->noo_cred = cred;
if (noop->noo_cred) {
kauth_cred_unref(&noop->noo_cred);
}
- lck_mtx_destroy(&noop->noo_lock, nfs_open_grp);
+ lck_mtx_destroy(&noop->noo_lock, &nfs_open_grp);
FREE(noop, M_TEMP);
}
return ENOMEM;
}
bzero(newnofp, sizeof(*newnofp));
- lck_mtx_init(&newnofp->nof_lock, nfs_open_grp, LCK_ATTR_NULL);
+ lck_mtx_init(&newnofp->nof_lock, &nfs_open_grp, LCK_ATTR_NULL);
newnofp->nof_owner = noop;
nfs_open_owner_ref(noop);
newnofp->nof_np = np;
TAILQ_REMOVE(&nofp->nof_owner->noo_opens, nofp, nof_oolink);
lck_mtx_unlock(&nofp->nof_owner->noo_lock);
nfs_open_owner_rele(nofp->nof_owner);
- lck_mtx_destroy(&nofp->nof_lock, nfs_open_grp);
+ lck_mtx_destroy(&nofp->nof_lock, &nfs_open_grp);
FREE(nofp, M_TEMP);
}
return NULL;
}
bzero(newnlop, sizeof(*newnlop));
- lck_mtx_init(&newnlop->nlo_lock, nfs_open_grp, LCK_ATTR_NULL);
+ lck_mtx_init(&newnlop->nlo_lock, &nfs_open_grp, LCK_ATTR_NULL);
newnlop->nlo_pid = pid;
newnlop->nlo_pid_start = p->p_start;
newnlop->nlo_name = OSAddAtomic(1, &nfs_lock_owner_seqnum);
nfs_open_owner_rele(nlop->nlo_open_owner);
nlop->nlo_open_owner = NULL;
}
- lck_mtx_destroy(&nlop->nlo_lock, nfs_open_grp);
+ lck_mtx_destroy(&nlop->nlo_lock, &nfs_open_grp);
FREE(nlop, M_TEMP);
}
error = EIO;
}
if (!error) {
+ if (busy) {
+ nfs_open_state_clear_busy(np);
+ busy = 0;
+ }
error = nmp->nm_funcs->nf_setlock_rpc(np, nofp, newnflp, 0, 0, vfs_context_thread(ctx), vfs_context_ucred(ctx));
+ if (!busy && !nfs_open_state_set_busy(np, vfs_context_thread(ctx))) {
+ busy = 1;
+ }
}
if (!error || ((error != NFSERR_DENIED) && (error != NFSERR_GRACE))) {
break;
* again if another object gets created with the same filehandle
* before this vnode gets reclaimed
*/
- lck_mtx_lock(nfs_node_hash_mutex);
+ lck_mtx_lock(&nfs_node_hash_mutex);
if (np->n_hflag & NHHASHED) {
LIST_REMOVE(np, n_hash);
np->n_hflag &= ~NHHASHED;
FSDBG(266, 0, np, np->n_flag, 0xb1eb1e);
}
- lck_mtx_unlock(nfs_node_hash_mutex);
+ lck_mtx_unlock(&nfs_node_hash_mutex);
}
FREE(dul, M_TEMP);
return error;
int nfs_buf_timer_on = 0;
thread_t nfsbufdelwrithd = NULL;
-ZONE_DECLARE(nfsbuf_zone, "NFS bio", sizeof(struct nfsbuf), ZC_NONE);
+static ZONE_DECLARE(nfsbuf_zone, "NFS bio", sizeof(struct nfsbuf), ZC_NONE);
-lck_grp_t *nfs_buf_lck_grp;
-lck_mtx_t *nfs_buf_mutex;
+static LCK_GRP_DECLARE(nfs_buf_lck_grp, "nfs buf");
+LCK_MTX_DECLARE(nfs_buf_mutex, &nfs_buf_lck_grp);
#define NFSBUF_FREE_PERIOD 30 /* seconds */
#define NFSBUF_LRU_STALE 120
void
nfs_nbinit(void)
{
- nfs_buf_lck_grp = lck_grp_alloc_init("nfs_buf", LCK_GRP_ATTR_NULL);
- nfs_buf_mutex = lck_mtx_alloc_init(nfs_buf_lck_grp, LCK_ATTR_NULL);
-
nfsbufcnt = nfsbufmetacnt =
nfsbuffreecnt = nfsbuffreemetacnt = nfsbufdelwricnt = 0;
nfsbufmin = 128;
{
nfs_buf_freeup(1);
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
if (nfsbufcnt <= nfsbufmin) {
nfs_buf_timer_on = 0;
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
return;
}
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
nfs_interval_timer_start(nfs_buf_timer_call,
NFSBUF_FREE_PERIOD * 1000);
TAILQ_INIT(&nfsbuffreeup);
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
microuptime(&now);
FSDBG(320, nfsbufcnt, nfsbuffreecnt, nfsbuffreemetacnt, 0);
NFSBUFCNTCHK();
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
while ((fbp = TAILQ_FIRST(&nfsbuffreeup))) {
TAILQ_REMOVE(&nfsbuffreeup, fbp, nb_free);
nfs_buf_is_incore(nfsnode_t np, daddr64_t blkno)
{
boolean_t rv;
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
if (nfs_buf_incore(np, blkno)) {
rv = TRUE;
} else {
rv = FALSE;
}
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
return rv;
}
return ENXIO;
}
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
bp = nfs_buf_incore(VTONFS(vp), (daddr64_t)(offset / nmp->nm_biosize));
if (!bp) {
goto out;
}
}
out:
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
return error;
}
TAILQ_INSERT_TAIL(&nfsbufdelwri, bp, nb_free);
nfsbufdelwricnt++;
nfs_buf_drop(bp);
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
nfs_flushcommits(np, 1);
} else {
SET(bp->nb_flags, NB_ASYNC);
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
nfs_buf_write(bp);
}
i++;
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
}
}
struct timespec ts = { .tv_sec = 30, .tv_nsec = 0 };
int error = 0;
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
while (!error) {
nfs_buf_delwri_service();
- error = msleep(&nfsbufdelwrithd, nfs_buf_mutex, 0, "nfsbufdelwri", &ts);
+ error = msleep(&nfsbufdelwrithd, &nfs_buf_mutex, 0, "nfsbufdelwri", &ts);
}
nfsbufdelwrithd = NULL;
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
thread_terminate(nfsbufdelwrithd);
}
return;
}
if (!locked) {
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
}
/* wake up the delayed write service thread */
if (nfsbufdelwrithd) {
nfs_buf_delwri_service();
}
if (!locked) {
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
}
}
}
loop:
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
/* wait for any buffer invalidation/flushing to complete */
while (np->n_bflag & NBINVALINPROG) {
np->n_bflag |= NBINVALWANT;
ts.tv_sec = 2;
ts.tv_nsec = 0;
- msleep(&np->n_bflag, nfs_buf_mutex, slpflag, "nfs_buf_get_invalwait", &ts);
+ msleep(&np->n_bflag, &nfs_buf_mutex, slpflag, "nfs_buf_get_invalwait", &ts);
if ((error = nfs_sigintr(VTONMP(vp), NULL, thd, 0))) {
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
FSDBG_BOT(541, np, blkno, 0, error);
return error;
}
/* if busy, set wanted and wait */
if (ISSET(bp->nb_lflags, NBL_BUSY)) {
if (flags & NBLK_NOWAIT) {
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
FSDBG_BOT(541, np, blkno, bp, 0xbcbcbcbc);
return 0;
}
ts.tv_sec = 2;
ts.tv_nsec = 0;
- msleep(bp, nfs_buf_mutex, slpflag | (PRIBIO + 1) | PDROP,
+ msleep(bp, &nfs_buf_mutex, slpflag | (PRIBIO + 1) | PDROP,
"nfsbufget", (slpflag == PCATCH) ? NULL : &ts);
slpflag = 0;
FSDBG_BOT(543, np, blkno, bp, bp->nb_flags);
}
if (flags & NBLK_ONLYVALID) {
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
FSDBG_BOT(541, np, blkno, 0, 0x0000cace);
return 0;
}
nfs_buf_delwri_push(1);
nfsneedbuffer = 1;
- msleep(&nfsneedbuffer, nfs_buf_mutex, PCATCH | PDROP, "nfsbufget", NULL);
+ msleep(&nfsneedbuffer, &nfs_buf_mutex, PCATCH | PDROP, "nfsbufget", NULL);
FSDBG_BOT(546, np, blkno, nfsbufcnt, nfsbufmax);
if ((error = nfs_sigintr(VTONMP(vp), NULL, thd, 0))) {
FSDBG_BOT(541, np, blkno, 0, error);
buffer_setup:
/* unlock hash */
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
switch (operation) {
case NBLK_META:
if (!bp->nb_data) {
/* Ack! couldn't allocate the data buffer! */
/* clean up buffer and return error */
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
LIST_REMOVE(bp, nb_vnbufs);
bp->nb_vnbufs.le_next = NFSNOLIST;
bp->nb_np = NULL;
}
TAILQ_INSERT_HEAD(&nfsbuffree, bp, nb_free);
nfsbuffreecnt++;
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
FSDBG_BOT(541, np, blkno, 0xb00, ENOMEM);
return ENOMEM;
}
/* unable to create upl */
/* vm object must no longer exist */
/* clean up buffer and return error */
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
LIST_REMOVE(bp, nb_vnbufs);
bp->nb_vnbufs.le_next = NFSNOLIST;
bp->nb_np = NULL;
}
TAILQ_INSERT_HEAD(&nfsbuffree, bp, nb_free);
nfsbuffreecnt++;
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
FSDBG_BOT(541, np, blkno, 0x2bc, EIO);
return EIO;
}
bp->nb_pagelist = NULL;
}
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
wakeup_needbuffer = wakeup_buffer = wakeup_nbdwrite = 0;
FSDBG_BOT(548, bp, NBOFF(bp), bp->nb_flags, bp->nb_data);
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
if (wakeup_needbuffer) {
wakeup(&nfsneedbuffer);
{
FSDBG_TOP(549, bp, NBOFF(bp), bp->nb_flags, bp->nb_error);
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
while (!ISSET(bp->nb_flags, NB_DONE)) {
- msleep(bp, nfs_buf_mutex, PRIBIO + 1, "nfs_buf_iowait", NULL);
+ msleep(bp, &nfs_buf_mutex, PRIBIO + 1, "nfs_buf_iowait", NULL);
}
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
FSDBG_BOT(549, bp, NBOFF(bp), bp->nb_flags, bp->nb_error);
SET(bp->nb_flags, NB_DONE); /* note that it's done */
nfs_buf_release(bp, 1);
} else { /* or just wakeup the buffer */
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
SET(bp->nb_flags, NB_DONE); /* note that it's done */
CLR(bp->nb_lflags, NBL_WANTED);
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
wakeup(bp);
}
if (!ISSET(bp->nb_flags, NB_DELWRI)) {
SET(bp->nb_flags, NB_DELWRI);
/* move to dirty list */
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
nfs_nbdwrite++;
NFSBUFCNTCHK();
if (bp->nb_vnbufs.le_next != NFSNOLIST) {
LIST_REMOVE(bp, nb_vnbufs);
}
LIST_INSERT_HEAD(&np->n_dirtyblkhd, bp, nb_vnbufs);
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
}
/*
/* the hz value is 100; which leads to 10ms */
ts.tv_nsec = (slptimeo % 100) * 10 * NSEC_PER_USEC * 1000;
- error = msleep(bp, nfs_buf_mutex, slpflag | (PRIBIO + 1),
+ error = msleep(bp, &nfs_buf_mutex, slpflag | (PRIBIO + 1),
"nfs_buf_acquire", &ts);
if (error) {
return error;
while (np->n_bufiterflags & NBI_ITER) {
np->n_bufiterflags |= NBI_ITERWANT;
- msleep(&np->n_bufiterflags, nfs_buf_mutex, 0, "nfs_buf_iterprepare", NULL);
+ msleep(&np->n_bufiterflags, &nfs_buf_mutex, 0, "nfs_buf_iterprepare", NULL);
}
if (LIST_EMPTY(listheadp)) {
LIST_INIT(iterheadp);
SET(bp->nb_flags, NB_ERROR);
if (ISSET(bp->nb_flags, NB_MULTASYNCRPC)) {
nrpcs = (length + nmrsize - 1) / nmrsize;
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
bp->nb_rpcs -= nrpcs;
if (bp->nb_rpcs == 0) {
/* No RPCs left, so the buffer's done */
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
nfs_buf_iodone(bp);
} else {
/* wait for the last RPC to mark it done */
while (bp->nb_rpcs > 0) {
- msleep(&bp->nb_rpcs, nfs_buf_mutex, 0,
+ msleep(&bp->nb_rpcs, &nfs_buf_mutex, 0,
"nfs_buf_read_rpc_cancel", NULL);
}
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
}
} else {
nfs_buf_iodone(bp);
multasyncrpc = ISSET(bp->nb_flags, NB_MULTASYNCRPC);
if (multasyncrpc) {
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
}
bp->nb_rpcs--;
finished = (bp->nb_rpcs == 0);
if (multasyncrpc) {
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
}
if (finished) {
CLR(bp->nb_flags, (NB_READ | NB_DONE | NB_ERROR | NB_DELWRI));
if (ISSET(oldflags, NB_DELWRI)) {
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
nfs_nbdwrite--;
NFSBUFCNTCHK();
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
wakeup(&nfs_nbdwrite);
}
/* move to clean list */
if (ISSET(oldflags, (NB_ASYNC | NB_DELWRI))) {
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
if (bp->nb_vnbufs.le_next != NFSNOLIST) {
LIST_REMOVE(bp, nb_vnbufs);
}
LIST_INSERT_HEAD(&np->n_cleanblkhd, bp, nb_vnbufs);
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
}
nfs_node_lock_force(np);
np->n_numoutput++;
error = nfs_buf_iowait(bp);
/* move to clean list */
if (oldflags & NB_DELWRI) {
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
if (bp->nb_vnbufs.le_next != NFSNOLIST) {
LIST_REMOVE(bp, nb_vnbufs);
}
LIST_INSERT_HEAD(&np->n_cleanblkhd, bp, nb_vnbufs);
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
}
FSDBG_BOT(553, bp, NBOFF(bp), bp->nb_flags, error);
nfs_buf_release(bp, 1);
CLR(bp->nb_flags, NB_INVAL);
if (!ISSET(bp->nb_flags, NB_DELWRI)) {
SET(bp->nb_flags, NB_DELWRI);
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
nfs_nbdwrite++;
NFSBUFCNTCHK();
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
}
/*
* Since for the NB_ASYNC case, we've reassigned the buffer to the
*/
if (ISSET(bp->nb_flags, NB_ASYNC)) {
/* move to dirty list */
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
if (bp->nb_vnbufs.le_next != NFSNOLIST) {
LIST_REMOVE(bp, nb_vnbufs);
}
LIST_INSERT_HEAD(&np->n_dirtyblkhd, bp, nb_vnbufs);
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
}
} else {
/* either there's an error or we don't need to commit */
SET(bp->nb_flags, NB_ERROR);
if (ISSET(bp->nb_flags, NB_MULTASYNCRPC)) {
nrpcs = (length + nmwsize - 1) / nmwsize;
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
bp->nb_rpcs -= nrpcs;
if (bp->nb_rpcs == 0) {
/* No RPCs left, so the buffer's done */
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
nfs_buf_write_finish(bp, thd, cred);
} else {
/* wait for the last RPC to mark it done */
while (bp->nb_rpcs > 0) {
- msleep(&bp->nb_rpcs, nfs_buf_mutex, 0,
+ msleep(&bp->nb_rpcs, &nfs_buf_mutex, 0,
"nfs_buf_write_rpc_cancel", NULL);
}
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
}
} else {
nfs_buf_write_finish(bp, thd, cred);
*/
multasyncrpc = ISSET(bp->nb_flags, NB_MULTASYNCRPC);
if (multasyncrpc) {
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
}
bp->nb_rpcs--;
finished = (bp->nb_rpcs == 0);
if (multasyncrpc) {
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
}
if (finished) {
if (nowait) {
flags |= NBI_NOWAIT;
}
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
wverf = nmp->nm_verf;
if (!nfs_buf_iterprepare(np, &blist, flags)) {
while ((bp = LIST_FIRST(&blist))) {
}
nfs_buf_itercomplete(np, &blist, NBI_DIRTY);
}
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
if (LIST_EMPTY(&commitlist)) {
error = ENOBUFS;
if (retv) {
/* move back to dirty list */
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
LIST_INSERT_HEAD(&np->n_dirtyblkhd, bp, nb_vnbufs);
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
nfs_buf_release(bp, 1);
continue;
}
nfs_node_unlock(np);
vnode_startwrite(NFSTOV(np));
if (ISSET(bp->nb_flags, NB_DELWRI)) {
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
nfs_nbdwrite--;
NFSBUFCNTCHK();
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
wakeup(&nfs_nbdwrite);
}
CLR(bp->nb_flags, (NB_READ | NB_DONE | NB_ERROR | NB_DELWRI));
}
/* move to clean list */
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
LIST_INSERT_HEAD(&np->n_cleanblkhd, bp, nb_vnbufs);
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
bp->nb_dirtyoff = bp->nb_dirtyend = 0;
nfs_node_unlock(np);
}
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
while (np->n_bflag & NBFLUSHINPROG) {
np->n_bflag |= NBFLUSHWANT;
- error = msleep(&np->n_bflag, nfs_buf_mutex, slpflag, "nfs_flush", NULL);
+ error = msleep(&np->n_bflag, &nfs_buf_mutex, slpflag, "nfs_flush", NULL);
if ((error && (error != EWOULDBLOCK)) ||
((error = nfs_sigintr(NFSTONMP(np), NULL, thd, 0)))) {
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
goto out;
}
}
again:
FSDBG(518, LIST_FIRST(&np->n_dirtyblkhd), np->n_flag, 0, 0);
if (!NFSTONMP(np)) {
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
error = ENXIO;
goto done;
}
nfs_buf_refrele(bp);
}
nfs_buf_itercomplete(np, &blist, NBI_DIRTY);
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
error = error2;
goto done;
}
continue;
}
nfs_buf_remfree(bp);
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
if (ISSET(bp->nb_flags, NB_ERROR)) {
nfs_node_lock_force(np);
np->n_error = bp->nb_error ? bp->nb_error : EIO;
np->n_flag |= NWRITEERR;
nfs_node_unlock(np);
nfs_buf_release(bp, 1);
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
continue;
}
SET(bp->nb_flags, NB_ASYNC);
SET(bp->nb_flags, NB_STABLE);
}
nfs_buf_write(bp);
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
}
nfs_buf_itercomplete(np, &blist, NBI_DIRTY);
}
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
if (waitfor == MNT_WAIT || waitfor == MNT_DWAIT) {
while ((error = vnode_waitforwrites(NFSTOV(np), 0, slpflag, slptimeo, "nfsflush"))) {
np->n_flag |= NMODIFIED;
nfs_node_unlock(np);
}
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
goto again;
}
np->n_flag |= NMODIFIED;
nfs_node_unlock(np);
}
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
if (!LIST_EMPTY(&np->n_dirtyblkhd)) {
goto again;
}
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
nfs_node_lock_force(np);
/*
* OK, it looks like there are no dirty blocks. If we have no
}
nfs_node_unlock(np);
done:
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
flags = np->n_bflag;
np->n_bflag &= ~(NBFLUSHINPROG | NBFLUSHWANT);
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
if (flags & NBFLUSHWANT) {
wakeup(&np->n_bflag);
}
}
}
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
for (;;) {
list = NBI_CLEAN;
if (nfs_buf_iterprepare(np, &blist, list)) {
FSDBG(554, np, bp, -1, error);
nfs_buf_refrele(bp);
nfs_buf_itercomplete(np, &blist, list);
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
return error;
}
}
nfs_buf_refrele(bp);
FSDBG(554, np, bp, NBOFF(bp), bp->nb_flags);
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
if ((flags & V_SAVE) && UBCINFOEXISTS(NFSTOV(np)) && bp->nb_np &&
(NBOFF(bp) < (off_t)np->n_size)) {
/* extra paranoia: make sure we're not */
* be stuck in this loop forever because
* the buffer will continue to stay dirty.
*/
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
nfs_buf_itercomplete(np, &blist, list);
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
return error;
}
error = 0;
}
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
continue;
}
}
SET(bp->nb_flags, NB_INVAL);
// hold off on FREEUPs until we're done here
nfs_buf_release(bp, 0);
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
}
nfs_buf_itercomplete(np, &blist, list);
}
if (!LIST_EMPTY(&(np)->n_dirtyblkhd) || !LIST_EMPTY(&(np)->n_cleanblkhd)) {
panic("nfs_vinvalbuf: flush/inval failed");
}
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
nfs_node_lock_force(np);
if (!(flags & V_SAVE)) {
np->n_flag &= ~NMODIFIED;
FSDBG_TOP(554, np, flags, intrflg, 0);
- /*
- * If the mount is gone no sense to try and write anything.
- * and hang trying to do IO.
- */
- if (nfs_mount_gone(nmp)) {
- flags &= ~V_SAVE;
- ubcflags &= ~UBC_PUSHALL;
- }
-
if (nmp && !NMFLAG(nmp, INTR)) {
intrflg = 0;
}
}
/* First wait for any other process doing a flush to complete. */
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
while (np->n_bflag & NBINVALINPROG) {
np->n_bflag |= NBINVALWANT;
- msleep(&np->n_bflag, nfs_buf_mutex, slpflag, "nfs_vinvalbuf", &ts);
+ msleep(&np->n_bflag, &nfs_buf_mutex, slpflag, "nfs_vinvalbuf", &ts);
if ((error = nfs_sigintr(VTONMP(vp), NULL, thd, 0))) {
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
return error;
}
if (np->n_bflag & NBINVALINPROG) {
}
}
np->n_bflag |= NBINVALINPROG;
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
/* Now, flush as required. */
again:
+ /* If the mount is gone no sense to try and write anything. and hang trying to do IO. */
+ if (nfs_mount_gone(nmp)) {
+ flags &= ~V_SAVE;
+ }
+
error = nfs_vinvalbuf_internal(np, flags, thd, cred, slpflag, 0);
while (error) {
FSDBG(554, np, 0, 0, error);
error = nfs_vinvalbuf_internal(np, flags, thd, cred, 0, slptimeo);
}
+ /* If the mount is gone no sense to try and write anything. and hang trying to do IO. */
+ if (nfs_mount_gone(nmp)) {
+ ubcflags &= ~UBC_PUSHALL;
+ }
+
/* get the pages out of vm also */
if (UBCINFOEXISTS(vp) && (size = ubc_getsize(vp))) {
if ((error = ubc_msync(vp, 0, size, NULL, ubcflags))) {
}
}
done:
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
nflags = np->n_bflag;
np->n_bflag &= ~(NBINVALINPROG | NBINVALWANT);
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
if (nflags & NBINVALWANT) {
wakeup(&np->n_bflag);
}
struct nfsbuflists blist;
int error = 0;
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
if (!nfs_buf_iterprepare(np, &blist, NBI_CLEAN)) {
while ((bp = LIST_FIRST(&blist))) {
LIST_REMOVE(bp, nb_vnbufs);
if (error != EAGAIN) {
nfs_buf_refrele(bp);
nfs_buf_itercomplete(np, &blist, NBI_CLEAN);
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
return;
}
}
if (error != EAGAIN) {
nfs_buf_refrele(bp);
nfs_buf_itercomplete(np, &blist, NBI_DIRTY);
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
return;
}
}
}
nfs_buf_itercomplete(np, &blist, NBI_DIRTY);
}
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
}
return;
}
- lck_mtx_lock(nfsiod_mutex);
+ lck_mtx_lock(&nfsiod_mutex);
niod = nmp->nm_niod;
/* grab an nfsiod if we don't have one already */
* We may try a couple times if other callers
* get the new threads before we do.
*/
- lck_mtx_unlock(nfsiod_mutex);
+ lck_mtx_unlock(&nfsiod_mutex);
started++;
if (!nfsiod_start()) {
goto again;
}
- lck_mtx_lock(nfsiod_mutex);
+ lck_mtx_lock(&nfsiod_mutex);
}
}
if (!nmp->nm_niod) {
if (niod) { /* give it the nfsiod we just grabbed */
nmp->nm_niod = niod;
- lck_mtx_unlock(nfsiod_mutex);
+ lck_mtx_unlock(&nfsiod_mutex);
wakeup(niod);
} else if (nfsiod_thread_count > 0) {
/* just queue it up on nfsiod mounts queue if needed */
if (nmp->nm_iodlink.tqe_next == NFSNOLIST) {
TAILQ_INSERT_TAIL(&nfsiodmounts, nmp, nm_iodlink);
}
- lck_mtx_unlock(nfsiod_mutex);
+ lck_mtx_unlock(&nfsiod_mutex);
} else {
printf("nfs_asyncio(): no nfsiods? %d %d (%d)\n", nfsiod_thread_count, NFSIOD_MAX, started);
- lck_mtx_unlock(nfsiod_mutex);
+ lck_mtx_unlock(&nfsiod_mutex);
/* we have no other option but to be persistent */
started = 0;
goto again;
}
} else {
- lck_mtx_unlock(nfsiod_mutex);
+ lck_mtx_unlock(&nfsiod_mutex);
}
FSDBG_BOT(552, nmp, 0, 0, 0);
#if CONFIG_NFS_SERVER
u_long nfs_gss_svc_ctx_hash;
struct nfs_gss_svc_ctx_hashhead *nfs_gss_svc_ctx_hashtbl;
-lck_mtx_t *nfs_gss_svc_ctx_mutex;
-lck_grp_t *nfs_gss_svc_grp;
+static LCK_GRP_DECLARE(nfs_gss_svc_grp, "rpcsec_gss_svc");
+static LCK_MTX_DECLARE(nfs_gss_svc_ctx_mutex, &nfs_gss_svc_grp);
uint32_t nfsrv_gss_context_ttl = GSS_CTX_EXPIRE;
#define GSS_SVC_CTX_TTL ((uint64_t)max(2*GSS_CTX_PEND, nfsrv_gss_context_ttl) * NSEC_PER_SEC)
#endif /* CONFIG_NFS_SERVER */
#if CONFIG_NFS_CLIENT
-lck_grp_t *nfs_gss_clnt_grp;
+LCK_GRP_DECLARE(nfs_gss_clnt_grp, "rpcsec_gss_clnt");
#endif /* CONFIG_NFS_CLIENT */
#define KRB5_MAX_MIC_SIZE 128
void
nfs_gss_init(void)
{
-#if CONFIG_NFS_CLIENT
- nfs_gss_clnt_grp = lck_grp_alloc_init("rpcsec_gss_clnt", LCK_GRP_ATTR_NULL);
-#endif /* CONFIG_NFS_CLIENT */
-
#if CONFIG_NFS_SERVER
- nfs_gss_svc_grp = lck_grp_alloc_init("rpcsec_gss_svc", LCK_GRP_ATTR_NULL);
-
nfs_gss_svc_ctx_hashtbl = hashinit(SVC_CTX_HASHSZ, M_TEMP, &nfs_gss_svc_ctx_hash);
- nfs_gss_svc_ctx_mutex = lck_mtx_alloc_init(nfs_gss_svc_grp, LCK_ATTR_NULL);
nfs_gss_svc_ctx_timer_call = thread_call_allocate(nfs_gss_svc_ctx_timer, NULL);
#endif /* CONFIG_NFS_SERVER */
lck_mtx_lock(&nmp->nm_lock);
NFS_GSS_DBG("Enter\n");
TAILQ_FOREACH(cp, &nmp->nm_gsscl, gss_clnt_entries) {
- lck_mtx_lock(cp->gss_clnt_mtx);
+ lck_mtx_lock(&cp->gss_clnt_mtx);
printf("context %d/%d: refcnt = %d, flags = %x\n",
kauth_cred_getasid(cp->gss_clnt_cred),
kauth_cred_getauid(cp->gss_clnt_cred),
cp->gss_clnt_refcnt, cp->gss_clnt_flags);
- lck_mtx_unlock(cp->gss_clnt_mtx);
+ lck_mtx_unlock(&cp->gss_clnt_mtx);
}
NFS_GSS_DBG("Exit\n");
lck_mtx_unlock(&nmp->nm_lock);
microuptime(&now);
lck_mtx_lock(&nmp->nm_lock);
TAILQ_FOREACH_SAFE(cp, &nmp->nm_gsscl, gss_clnt_entries, tcp) {
- lck_mtx_lock(cp->gss_clnt_mtx);
+ lck_mtx_lock(&cp->gss_clnt_mtx);
if (cp->gss_clnt_flags & GSS_CTX_DESTROY) {
NFS_GSS_DBG("Found destroyed context %s refcnt = %d continuing\n",
NFS_GSS_CTX(req, cp),
cp->gss_clnt_refcnt);
- lck_mtx_unlock(cp->gss_clnt_mtx);
+ lck_mtx_unlock(&cp->gss_clnt_mtx);
continue;
}
if (nfs_gss_clnt_ctx_cred_match(cp->gss_clnt_cred, req->r_cred)) {
bcmp(cp->gss_clnt_principal, principal, plen) != 0) {
cp->gss_clnt_flags |= (GSS_CTX_INVAL | GSS_CTX_DESTROY);
cp->gss_clnt_refcnt++;
- lck_mtx_unlock(cp->gss_clnt_mtx);
+ lck_mtx_unlock(&cp->gss_clnt_mtx);
NFS_GSS_DBG("Marking %s for deletion because %s does not match\n",
NFS_GSS_CTX(req, cp), principal);
NFS_GSS_DBG("len = (%zu,%zu), nt = (%d,%d)\n", cp->gss_clnt_prinlen, plen,
if (cp->gss_clnt_nctime + GSS_NEG_CACHE_TO >= now.tv_sec || cp->gss_clnt_nctime == 0) {
NFS_GSS_DBG("Context %s (refcnt = %d) not expired returning EAUTH nctime = %ld now = %ld\n",
NFS_GSS_CTX(req, cp), cp->gss_clnt_refcnt, cp->gss_clnt_nctime, now.tv_sec);
- lck_mtx_unlock(cp->gss_clnt_mtx);
+ lck_mtx_unlock(&cp->gss_clnt_mtx);
lck_mtx_unlock(&nmp->nm_lock);
NFS_ZFREE(nfs_req_zone, treq);
return NFSERR_EAUTH;
NFS_GSS_DBG("Context %s has expired but we still have %d references\n",
NFS_GSS_CTX(req, cp), cp->gss_clnt_refcnt);
error = nfs_gss_clnt_ctx_copy(cp, &ncp);
- lck_mtx_unlock(cp->gss_clnt_mtx);
+ lck_mtx_unlock(&cp->gss_clnt_mtx);
if (error) {
lck_mtx_unlock(&nmp->nm_lock);
NFS_ZFREE(nfs_req_zone, treq);
if (cp->gss_clnt_nctime) {
nmp->nm_ncentries--;
}
- lck_mtx_unlock(cp->gss_clnt_mtx);
+ lck_mtx_unlock(&cp->gss_clnt_mtx);
TAILQ_REMOVE(&nmp->nm_gsscl, cp, gss_clnt_entries);
break;
}
/* Found a valid context to return */
cp->gss_clnt_refcnt++;
req->r_gss_ctx = cp;
- lck_mtx_unlock(cp->gss_clnt_mtx);
+ lck_mtx_unlock(&cp->gss_clnt_mtx);
lck_mtx_unlock(&nmp->nm_lock);
NFS_ZFREE(nfs_req_zone, treq);
return 0;
}
- lck_mtx_unlock(cp->gss_clnt_mtx);
+ lck_mtx_unlock(&cp->gss_clnt_mtx);
}
if (!cp && nfs_root_steals_ctx && principal == NULL && kauth_cred_getuid(req->r_cred) == 0) {
}
cp->gss_clnt_cred = req->r_cred;
kauth_cred_ref(cp->gss_clnt_cred);
- cp->gss_clnt_mtx = lck_mtx_alloc_init(nfs_gss_clnt_grp, LCK_ATTR_NULL);
+ lck_mtx_init(&cp->gss_clnt_mtx, &nfs_gss_clnt_grp, LCK_ATTR_NULL);
cp->gss_clnt_ptime = now.tv_sec - GSS_PRINT_DELAY;
if (principal) {
MALLOC(cp->gss_clnt_principal, uint8_t *, plen + 1, M_TEMP, M_WAITOK | M_ZERO);
* doing the context setup. Wait until the context thread
* is null.
*/
- lck_mtx_lock(cp->gss_clnt_mtx);
+ lck_mtx_lock(&cp->gss_clnt_mtx);
if (cp->gss_clnt_thread && cp->gss_clnt_thread != current_thread()) {
cp->gss_clnt_flags |= GSS_NEEDCTX;
- msleep(cp, cp->gss_clnt_mtx, slpflag | PDROP, "ctxwait", NULL);
+ msleep(cp, &cp->gss_clnt_mtx, slpflag | PDROP, "ctxwait", NULL);
slpflag &= ~PCATCH;
if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0))) {
return error;
nfs_gss_clnt_ctx_unref(req);
goto retry;
}
- lck_mtx_unlock(cp->gss_clnt_mtx);
+ lck_mtx_unlock(&cp->gss_clnt_mtx);
if (cp->gss_clnt_flags & GSS_CTX_COMPLETE) {
/*
* we allocate a new sequence number and allow this request
* to proceed.
*/
- lck_mtx_lock(cp->gss_clnt_mtx);
+ lck_mtx_lock(&cp->gss_clnt_mtx);
while (win_getbit(cp->gss_clnt_seqbits,
((cp->gss_clnt_seqnum - cp->gss_clnt_seqwin) + 1) % cp->gss_clnt_seqwin)) {
cp->gss_clnt_flags |= GSS_NEEDSEQ;
- msleep(cp, cp->gss_clnt_mtx, slpflag | PDROP, "seqwin", NULL);
+ msleep(cp, &cp->gss_clnt_mtx, slpflag | PDROP, "seqwin", NULL);
slpflag &= ~PCATCH;
if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0))) {
return error;
}
- lck_mtx_lock(cp->gss_clnt_mtx);
+ lck_mtx_lock(&cp->gss_clnt_mtx);
if (cp->gss_clnt_flags & GSS_CTX_INVAL) {
/* Renewed while while we were waiting */
- lck_mtx_unlock(cp->gss_clnt_mtx);
+ lck_mtx_unlock(&cp->gss_clnt_mtx);
nfs_gss_clnt_ctx_unref(req);
goto retry;
}
}
seqnum = ++cp->gss_clnt_seqnum;
win_setbit(cp->gss_clnt_seqbits, seqnum % cp->gss_clnt_seqwin);
- lck_mtx_unlock(cp->gss_clnt_mtx);
+ lck_mtx_unlock(&cp->gss_clnt_mtx);
MALLOC(gsp, struct gss_seq *, sizeof(*gsp), M_TEMP, M_WAITOK | M_ZERO);
if (gsp == NULL) {
/*
* The context is apparently established successfully
*/
- lck_mtx_lock(cp->gss_clnt_mtx);
+ lck_mtx_lock(&cp->gss_clnt_mtx);
cp->gss_clnt_flags |= GSS_CTX_COMPLETE;
- lck_mtx_unlock(cp->gss_clnt_mtx);
+ lck_mtx_unlock(&cp->gss_clnt_mtx);
cp->gss_clnt_proc = RPCSEC_GSS_DATA;
network_seqnum = htonl(cp->gss_clnt_seqwin);
* It will be removed when the reference count
* drops to zero.
*/
- lck_mtx_lock(cp->gss_clnt_mtx);
+ lck_mtx_lock(&cp->gss_clnt_mtx);
if (error) {
cp->gss_clnt_flags |= GSS_CTX_INVAL;
}
cp->gss_clnt_flags &= ~GSS_NEEDCTX;
wakeup(cp);
}
- lck_mtx_unlock(cp->gss_clnt_mtx);
+ lck_mtx_unlock(&cp->gss_clnt_mtx);
NFS_GSS_DBG("Returning error = %d\n", error);
return error;
/*
* Give up on this context
*/
- lck_mtx_lock(cp->gss_clnt_mtx);
+ lck_mtx_lock(&cp->gss_clnt_mtx);
cp->gss_clnt_flags |= GSS_CTX_INVAL;
/*
cp->gss_clnt_flags &= ~GSS_NEEDCTX;
wakeup(cp);
}
- lck_mtx_unlock(cp->gss_clnt_mtx);
+ lck_mtx_unlock(&cp->gss_clnt_mtx);
return error;
}
* sequence number window to indicate it's done.
* We do this even if the request timed out.
*/
- lck_mtx_lock(cp->gss_clnt_mtx);
+ lck_mtx_lock(&cp->gss_clnt_mtx);
gsp = SLIST_FIRST(&req->r_gss_seqlist);
if (gsp && gsp->gss_seqnum > (cp->gss_clnt_seqnum - cp->gss_clnt_seqwin)) {
win_resetbit(cp->gss_clnt_seqbits,
cp->gss_clnt_flags &= ~GSS_NEEDSEQ;
wakeup(cp);
}
- lck_mtx_unlock(cp->gss_clnt_mtx);
+ lck_mtx_unlock(&cp->gss_clnt_mtx);
}
/*
{
req->r_gss_ctx = cp;
- lck_mtx_lock(cp->gss_clnt_mtx);
+ lck_mtx_lock(&cp->gss_clnt_mtx);
cp->gss_clnt_refcnt++;
- lck_mtx_unlock(cp->gss_clnt_mtx);
+ lck_mtx_unlock(&cp->gss_clnt_mtx);
}
/*
req->r_gss_ctx = NULL;
- lck_mtx_lock(cp->gss_clnt_mtx);
+ lck_mtx_lock(&cp->gss_clnt_mtx);
if (--cp->gss_clnt_refcnt < 0) {
panic("Over release of gss context!\n");
}
cp->gss_clnt_nctime = now.tv_sec;
neg_cache = 1;
}
- lck_mtx_unlock(cp->gss_clnt_mtx);
+ lck_mtx_unlock(&cp->gss_clnt_mtx);
if (destroy) {
NFS_GSS_DBG("Destroying context %s\n", NFS_GSS_CTX(req, cp));
if (nmp) {
continue;
}
/* Not referenced, remove it. */
- lck_mtx_lock(cp->gss_clnt_mtx);
+ lck_mtx_lock(&cp->gss_clnt_mtx);
if (cp->gss_clnt_refcnt == 0) {
cp->gss_clnt_flags |= GSS_CTX_DESTROY;
destroy = 1;
}
- lck_mtx_unlock(cp->gss_clnt_mtx);
+ lck_mtx_unlock(&cp->gss_clnt_mtx);
if (destroy) {
TAILQ_REMOVE(&nmp->nm_gsscl, cp, gss_clnt_entries);
nmp->nm_ncentries++;
return ENOMEM;
}
bzero(dcp, sizeof(struct nfs_gss_clnt_ctx));
- dcp->gss_clnt_mtx = lck_mtx_alloc_init(nfs_gss_clnt_grp, LCK_ATTR_NULL);
+ lck_mtx_init(&dcp->gss_clnt_mtx, &nfs_gss_clnt_grp, LCK_ATTR_NULL);
dcp->gss_clnt_cred = scp->gss_clnt_cred;
kauth_cred_ref(dcp->gss_clnt_cred);
dcp->gss_clnt_prinlen = scp->gss_clnt_prinlen;
host_release_special_port(cp->gss_clnt_mport);
cp->gss_clnt_mport = IPC_PORT_NULL;
- if (cp->gss_clnt_mtx) {
- lck_mtx_destroy(cp->gss_clnt_mtx, nfs_gss_clnt_grp);
- cp->gss_clnt_mtx = (lck_mtx_t *)NULL;
- }
+ lck_mtx_destroy(&cp->gss_clnt_mtx, &nfs_gss_clnt_grp);
+
if (IS_VALID_CRED(cp->gss_clnt_cred)) {
kauth_cred_unref(&cp->gss_clnt_cred);
}
}
nmp = req->r_nmp;
- lck_mtx_lock(cp->gss_clnt_mtx);
+ lck_mtx_lock(&cp->gss_clnt_mtx);
if (cp->gss_clnt_flags & GSS_CTX_INVAL) {
- lck_mtx_unlock(cp->gss_clnt_mtx);
+ lck_mtx_unlock(&cp->gss_clnt_mtx);
nfs_gss_clnt_ctx_unref(req);
return 0; // already being renewed
}
cp->gss_clnt_flags &= ~GSS_NEEDSEQ;
wakeup(cp);
}
- lck_mtx_unlock(cp->gss_clnt_mtx);
+ lck_mtx_unlock(&cp->gss_clnt_mtx);
if (cp->gss_clnt_proc == RPCSEC_GSS_DESTROY) {
return EACCES; /* Destroying a context is best effort. Don't renew. */
while ((cp = TAILQ_FIRST(&nmp->nm_gsscl))) {
TAILQ_REMOVE(&nmp->nm_gsscl, cp, gss_clnt_entries);
cp->gss_clnt_entries.tqe_next = NFSNOLIST;
- lck_mtx_lock(cp->gss_clnt_mtx);
+ lck_mtx_lock(&cp->gss_clnt_mtx);
if (cp->gss_clnt_flags & GSS_CTX_DESTROY) {
- lck_mtx_unlock(cp->gss_clnt_mtx);
+ lck_mtx_unlock(&cp->gss_clnt_mtx);
continue;
}
cp->gss_clnt_refcnt++;
- lck_mtx_unlock(cp->gss_clnt_mtx);
+ lck_mtx_unlock(&cp->gss_clnt_mtx);
req->r_gss_ctx = cp;
lck_mtx_unlock(&nmp->nm_lock);
* the reference to remove it if its
* refcount is zero.
*/
- lck_mtx_lock(cp->gss_clnt_mtx);
+ lck_mtx_lock(&cp->gss_clnt_mtx);
cp->gss_clnt_flags |= (GSS_CTX_INVAL | GSS_CTX_DESTROY);
- lck_mtx_unlock(cp->gss_clnt_mtx);
+ lck_mtx_unlock(&cp->gss_clnt_mtx);
nfs_gss_clnt_ctx_unref(req);
lck_mtx_lock(&nmp->nm_lock);
}
NFS_GSS_CLNT_CTX_DUMP(nmp);
lck_mtx_lock(&nmp->nm_lock);
TAILQ_FOREACH_SAFE(cp, &nmp->nm_gsscl, gss_clnt_entries, tcp) {
- lck_mtx_lock(cp->gss_clnt_mtx);
+ lck_mtx_lock(&cp->gss_clnt_mtx);
if (nfs_gss_clnt_ctx_cred_match(cp->gss_clnt_cred, cred)) {
if (cp->gss_clnt_flags & GSS_CTX_DESTROY) {
NFS_GSS_DBG("Found destroyed context %d/%d. refcnt = %d continuing\n",
kauth_cred_getasid(cp->gss_clnt_cred),
kauth_cred_getauid(cp->gss_clnt_cred),
cp->gss_clnt_refcnt);
- lck_mtx_unlock(cp->gss_clnt_mtx);
+ lck_mtx_unlock(&cp->gss_clnt_mtx);
continue;
}
cp->gss_clnt_refcnt++;
cp->gss_clnt_flags |= (GSS_CTX_INVAL | GSS_CTX_DESTROY);
- lck_mtx_unlock(cp->gss_clnt_mtx);
+ lck_mtx_unlock(&cp->gss_clnt_mtx);
req->r_gss_ctx = cp;
lck_mtx_unlock(&nmp->nm_lock);
/*
NFS_ZFREE(nfs_req_zone, req);
return 0;
}
- lck_mtx_unlock(cp->gss_clnt_mtx);
+ lck_mtx_unlock(&cp->gss_clnt_mtx);
}
lck_mtx_unlock(&nmp->nm_lock);
req->r_nmp = nmp;
lck_mtx_lock(&nmp->nm_lock);
TAILQ_FOREACH(cp, &nmp->nm_gsscl, gss_clnt_entries) {
- lck_mtx_lock(cp->gss_clnt_mtx);
+ lck_mtx_lock(&cp->gss_clnt_mtx);
if (cp->gss_clnt_flags & GSS_CTX_DESTROY) {
NFS_GSS_DBG("Found destroyed context %s refcnt = %d continuing\n",
NFS_GSS_CTX(req, cp),
cp->gss_clnt_refcnt);
- lck_mtx_unlock(cp->gss_clnt_mtx);
+ lck_mtx_unlock(&cp->gss_clnt_mtx);
continue;
}
if (nfs_gss_clnt_ctx_cred_match(cp->gss_clnt_cred, cred)) {
cp->gss_clnt_refcnt++;
- lck_mtx_unlock(cp->gss_clnt_mtx);
+ lck_mtx_unlock(&cp->gss_clnt_mtx);
goto out;
}
- lck_mtx_unlock(cp->gss_clnt_mtx);
+ lck_mtx_unlock(&cp->gss_clnt_mtx);
}
out:
*/
clock_interval_to_deadline(GSS_CTX_PEND, NSEC_PER_SEC, &timenow);
- lck_mtx_lock(nfs_gss_svc_ctx_mutex);
+ lck_mtx_lock(&nfs_gss_svc_ctx_mutex);
LIST_FOREACH(cp, head, gss_svc_entries) {
if (cp->gss_svc_handle == handle) {
cp = NULL;
break;
}
- lck_mtx_lock(cp->gss_svc_mtx);
+ lck_mtx_lock(&cp->gss_svc_mtx);
cp->gss_svc_refcnt++;
- lck_mtx_unlock(cp->gss_svc_mtx);
+ lck_mtx_unlock(&cp->gss_svc_mtx);
break;
}
}
- lck_mtx_unlock(nfs_gss_svc_ctx_mutex);
+ lck_mtx_unlock(&nfs_gss_svc_ctx_mutex);
return cp;
}
struct nfs_gss_svc_ctx_hashhead *head;
struct nfs_gss_svc_ctx *p;
- lck_mtx_lock(nfs_gss_svc_ctx_mutex);
+ lck_mtx_lock(&nfs_gss_svc_ctx_mutex);
/*
* Give the client a random handle so that if we reboot
min(GSS_TIMER_PERIOD, max(GSS_CTX_TTL_MIN, nfsrv_gss_context_ttl)) * MSECS_PER_SEC);
}
- lck_mtx_unlock(nfs_gss_svc_ctx_mutex);
+ lck_mtx_unlock(&nfs_gss_svc_ctx_mutex);
}
/*
int contexts = 0;
int i;
- lck_mtx_lock(nfs_gss_svc_ctx_mutex);
+ lck_mtx_lock(&nfs_gss_svc_ctx_mutex);
clock_get_uptime(&timenow);
NFS_GSS_DBG("is running\n");
if (cp->gss_svc_seqbits) {
FREE(cp->gss_svc_seqbits, M_TEMP);
}
- lck_mtx_destroy(cp->gss_svc_mtx, nfs_gss_svc_grp);
+ lck_mtx_destroy(&cp->gss_svc_mtx, &nfs_gss_svc_grp);
FREE(cp, M_TEMP);
contexts--;
}
min(GSS_TIMER_PERIOD, max(GSS_CTX_TTL_MIN, nfsrv_gss_context_ttl)) * MSECS_PER_SEC);
}
- lck_mtx_unlock(nfs_gss_svc_ctx_mutex);
+ lck_mtx_unlock(&nfs_gss_svc_ctx_mutex);
}
/*
error = ENOMEM;
goto nfsmout;
}
- cp->gss_svc_mtx = lck_mtx_alloc_init(nfs_gss_svc_grp, LCK_ATTR_NULL);
+ lck_mtx_init(&cp->gss_svc_mtx, &nfs_gss_svc_grp, LCK_ATTR_NULL);
cp->gss_svc_refcnt = 1;
} else {
/*
}
if (error) {
if (proc == RPCSEC_GSS_INIT) {
- lck_mtx_destroy(cp->gss_svc_mtx, nfs_gss_svc_grp);
+ lck_mtx_destroy(&cp->gss_svc_mtx, &nfs_gss_svc_grp);
FREE(cp, M_TEMP);
cp = NULL;
}
cp = nfs_gss_svc_ctx_find(cp->gss_svc_handle);
if (cp != NULL) {
cp->gss_svc_handle = 0; // so it can't be found
- lck_mtx_lock(cp->gss_svc_mtx);
+ lck_mtx_lock(&cp->gss_svc_mtx);
clock_interval_to_deadline(GSS_CTX_PEND, NSEC_PER_SEC,
&cp->gss_svc_incarnation);
- lck_mtx_unlock(cp->gss_svc_mtx);
+ lck_mtx_unlock(&cp->gss_svc_mtx);
}
break;
default:
if (cp->gss_svc_token != NULL) {
FREE(cp->gss_svc_token, M_TEMP);
}
- lck_mtx_destroy(cp->gss_svc_mtx, nfs_gss_svc_grp);
+ lck_mtx_destroy(&cp->gss_svc_mtx, &nfs_gss_svc_grp);
FREE(cp, M_TEMP);
}
uint32_t win = cp->gss_svc_seqwin;
uint32_t i;
- lck_mtx_lock(cp->gss_svc_mtx);
+ lck_mtx_lock(&cp->gss_svc_mtx);
/*
* If greater than the window upper bound,
}
win_setbit(bits, seq % win);
cp->gss_svc_seqmax = seq;
- lck_mtx_unlock(cp->gss_svc_mtx);
+ lck_mtx_unlock(&cp->gss_svc_mtx);
return 1;
}
* Invalid if below the lower bound of the window
*/
if (seq <= cp->gss_svc_seqmax - win) {
- lck_mtx_unlock(cp->gss_svc_mtx);
+ lck_mtx_unlock(&cp->gss_svc_mtx);
return 0;
}
* In the window, invalid if the bit is already set
*/
if (win_getbit(bits, seq % win)) {
- lck_mtx_unlock(cp->gss_svc_mtx);
+ lck_mtx_unlock(&cp->gss_svc_mtx);
return 0;
}
win_setbit(bits, seq % win);
- lck_mtx_unlock(cp->gss_svc_mtx);
+ lck_mtx_unlock(&cp->gss_svc_mtx);
return 1;
}
void
nfs_gss_svc_ctx_deref(struct nfs_gss_svc_ctx *cp)
{
- lck_mtx_lock(cp->gss_svc_mtx);
+ lck_mtx_lock(&cp->gss_svc_mtx);
if (cp->gss_svc_refcnt > 0) {
cp->gss_svc_refcnt--;
} else {
printf("nfs_gss_ctx_deref: zero refcount\n");
}
- lck_mtx_unlock(cp->gss_svc_mtx);
+ lck_mtx_unlock(&cp->gss_svc_mtx);
}
/*
struct nfs_gss_svc_ctx *cp, *ncp;
int i;
- lck_mtx_lock(nfs_gss_svc_ctx_mutex);
+ lck_mtx_lock(&nfs_gss_svc_ctx_mutex);
/*
* Run through all the buckets
if (cp->gss_svc_seqbits) {
FREE(cp->gss_svc_seqbits, M_TEMP);
}
- lck_mtx_destroy(cp->gss_svc_mtx, nfs_gss_svc_grp);
+ lck_mtx_destroy(&cp->gss_svc_mtx, &nfs_gss_svc_grp);
FREE(cp, M_TEMP);
}
}
- lck_mtx_unlock(nfs_gss_svc_ctx_mutex);
+ lck_mtx_unlock(&nfs_gss_svc_ctx_mutex);
}
#endif /* CONFIG_NFS_SERVER */
* The client's RPCSEC_GSS context information
*/
struct nfs_gss_clnt_ctx {
- lck_mtx_t *gss_clnt_mtx;
+ lck_mtx_t gss_clnt_mtx;
thread_t gss_clnt_thread; // Thread creating context
TAILQ_ENTRY(nfs_gss_clnt_ctx) gss_clnt_entries;
uint32_t gss_clnt_flags; // Flag bits - see below
* The server's RPCSEC_GSS context information
*/
struct nfs_gss_svc_ctx {
- lck_mtx_t *gss_svc_mtx;
+ lck_mtx_t gss_svc_mtx;
LIST_ENTRY(nfs_gss_svc_ctx) gss_svc_entries;
uint32_t gss_svc_handle; // Identifies server context to client
uint32_t gss_svc_refcnt; // Reference count
* kept sorted by transaction ID (xid).
*/
static uint64_t nfs_lockxid = 0;
-static LOCKD_MSG_QUEUE nfs_pendlockq;
+static LOCKD_MSG_QUEUE nfs_pendlockq = TAILQ_HEAD_INITIALIZER(nfs_pendlockq);
/* list of mounts that are (potentially) making lockd requests */
-TAILQ_HEAD(nfs_lockd_mount_list, nfsmount) nfs_lockd_mount_list;
+TAILQ_HEAD(nfs_lockd_mount_list, nfsmount) nfs_lockd_mount_list =
+ TAILQ_HEAD_INITIALIZER(nfs_lockd_mount_list);
-static lck_grp_t *nfs_lock_lck_grp;
-static lck_mtx_t *nfs_lock_mutex;
+static LCK_GRP_DECLARE(nfs_lock_lck_grp, "nfs_lock");
+static LCK_MTX_DECLARE(nfs_lock_mutex, &nfs_lock_lck_grp);
void nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *);
void nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *);
uint64_t nfs_lockxid_get(void);
int nfs_lockd_send_request(LOCKD_MSG *, int);
-/*
- * initialize global nfs lock state
- */
-void
-nfs_lockinit(void)
-{
- TAILQ_INIT(&nfs_pendlockq);
- TAILQ_INIT(&nfs_lockd_mount_list);
-
- nfs_lock_lck_grp = lck_grp_alloc_init("nfs_lock", LCK_GRP_ATTR_NULL);
- nfs_lock_mutex = lck_mtx_alloc_init(nfs_lock_lck_grp, LCK_ATTR_NULL);
-}
-
/*
* Register a mount as (potentially) making lockd requests.
*/
void
nfs_lockd_mount_register(struct nfsmount *nmp)
{
- lck_mtx_lock(nfs_lock_mutex);
+ lck_mtx_lock(&nfs_lock_mutex);
TAILQ_INSERT_HEAD(&nfs_lockd_mount_list, nmp, nm_ldlink);
nfs_lockd_mounts++;
- lck_mtx_unlock(nfs_lock_mutex);
+ lck_mtx_unlock(&nfs_lock_mutex);
}
/*
mach_port_t lockd_port = IPC_PORT_NULL;
kern_return_t kr;
- lck_mtx_lock(nfs_lock_mutex);
+ lck_mtx_lock(&nfs_lock_mutex);
if (nmp->nm_ldlink.tqe_next == NFSNOLIST) {
- lck_mtx_unlock(nfs_lock_mutex);
+ lck_mtx_unlock(&nfs_lock_mutex);
return;
}
nfs_lockd_request_sent = 0;
}
- lck_mtx_unlock(nfs_lock_mutex);
+ lck_mtx_unlock(&nfs_lock_mutex);
if (!send_shutdown) {
return;
interruptable = NMFLAG(nmp, INTR);
lck_mtx_unlock(&nmp->nm_lock);
- lck_mtx_lock(nfs_lock_mutex);
+ lck_mtx_lock(&nfs_lock_mutex);
/* allocate unique xid */
msg->lm_xid = nfs_lockxid_get();
nfs_lockd_request_sent = 1;
/* need to drop nfs_lock_mutex while calling nfs_lockd_send_request() */
- lck_mtx_unlock(nfs_lock_mutex);
+ lck_mtx_unlock(&nfs_lock_mutex);
error = nfs_lockd_send_request(msg, interruptable);
- lck_mtx_lock(nfs_lock_mutex);
+ lck_mtx_lock(&nfs_lock_mutex);
if (error && error != EAGAIN) {
break;
}
while (now.tv_sec < endtime) {
error = error2 = 0;
if (!msgreq->lmr_answered) {
- error = msleep(msgreq, nfs_lock_mutex, slpflag | PUSER, "lockd", &ts);
+ error = msleep(msgreq, &nfs_lock_mutex, slpflag | PUSER, "lockd", &ts);
slpflag = 0;
}
if (msgreq->lmr_answered) {
* for this mount.
*/
nfs_lockdmsg_dequeue(msgreq);
- lck_mtx_unlock(nfs_lock_mutex);
+ lck_mtx_unlock(&nfs_lock_mutex);
lck_mtx_lock(&nmp->nm_lock);
if (nmp->nm_lockmode == NFS_LOCK_MODE_ENABLED) {
nmp->nm_lockmode = NFS_LOCK_MODE_DISABLED;
nfs_lockdmsg_dequeue(msgreq);
- lck_mtx_unlock(nfs_lock_mutex);
+ lck_mtx_unlock(&nfs_lock_mutex);
return error;
}
return EINVAL;
}
- lck_mtx_lock(nfs_lock_mutex);
+ lck_mtx_lock(&nfs_lock_mutex);
/* try to find the lockd message by transaction id (cookie) */
msgreq = nfs_lockdmsg_find_by_xid(ansp->la_xid);
}
}
if (!msgreq) {
- lck_mtx_unlock(nfs_lock_mutex);
+ lck_mtx_unlock(&nfs_lock_mutex);
return EPIPE;
}
}
msgreq->lmr_answered = 1;
- lck_mtx_unlock(nfs_lock_mutex);
+ lck_mtx_unlock(&nfs_lock_mutex);
wakeup(msgreq);
return 0;
argp += headsize;
saddr = (struct sockaddr *)&ln.ln_addr[0];
- lck_mtx_lock(nfs_lock_mutex);
+ lck_mtx_lock(&nfs_lock_mutex);
for (i = 0; i < ln.ln_addrcount; i++) {
error = copyin(argp, &ln.ln_addr[0], sizeof(ln.ln_addr[0]));
}
}
- lck_mtx_unlock(nfs_lock_mutex);
+ lck_mtx_unlock(&nfs_lock_mutex);
return error;
}
#ifdef KERNEL
-void nfs_lockinit(void);
void nfs_lockd_mount_register(struct nfsmount *);
void nfs_lockd_mount_unregister(struct nfsmount *);
int nfs3_lockd_request(nfsnode_t, int, LOCKD_MSG_REQUEST *, int, thread_t);
static LIST_HEAD(nfsnodehashhead, nfsnode) * nfsnodehashtbl;
static u_long nfsnodehash;
-static lck_grp_t *nfs_node_hash_lck_grp;
-static lck_grp_t *nfs_node_lck_grp;
-static lck_grp_t *nfs_data_lck_grp;
-lck_mtx_t *nfs_node_hash_mutex;
+static LCK_GRP_DECLARE(nfs_node_hash_lck_grp, "nfs_node_hash");
+static LCK_GRP_DECLARE(nfs_node_lck_grp, "nfs_node");
+static LCK_GRP_DECLARE(nfs_data_lck_grp, "nfs_data");
+LCK_MTX_DECLARE(nfs_node_hash_mutex, &nfs_node_hash_lck_grp);
ZONE_DECLARE(nfsnode_zone, "NFS node",
sizeof(struct nfsnode), ZC_ZFREE_CLEARMEM);
#define NFS_NODE_DBG(...) NFS_DBG(NFS_FAC_NODE, 7, ## __VA_ARGS__)
-/*
- * Initialize hash links for nfsnodes
- * and build nfsnode free list.
- */
-void
-nfs_nhinit(void)
-{
- nfs_node_hash_lck_grp = lck_grp_alloc_init("nfs_node_hash", LCK_GRP_ATTR_NULL);
- nfs_node_hash_mutex = lck_mtx_alloc_init(nfs_node_hash_lck_grp, LCK_ATTR_NULL);
- nfs_node_lck_grp = lck_grp_alloc_init("nfs_node", LCK_GRP_ATTR_NULL);
- nfs_data_lck_grp = lck_grp_alloc_init("nfs_data", LCK_GRP_ATTR_NULL);
-}
-
void
nfs_nhinit_finish(void)
{
- lck_mtx_lock(nfs_node_hash_mutex);
+ lck_mtx_lock(&nfs_node_hash_mutex);
if (!nfsnodehashtbl) {
nfsnodehashtbl = hashinit(desiredvnodes, M_NFSNODE, &nfsnodehash);
}
- lck_mtx_unlock(nfs_node_hash_mutex);
+ lck_mtx_unlock(&nfs_node_hash_mutex);
}
/*
cn_namelen = cnp ? cnp->cn_namelen : 0;
nfshash = nfs_hash(fhp, fhsize);
loop:
- lck_mtx_lock(nfs_node_hash_mutex);
+ lck_mtx_lock(&nfs_node_hash_mutex);
nhpp = NFSNOHASH(nfshash);
for (np = nhpp->lh_first; np != 0; np = np->n_hash.le_next) {
mp2 = (np->n_hflag & NHINIT) ? np->n_mount : NFSTOMP(np);
if ((np->n_hflag & NHINIT) || ((np->n_hflag & NHLOCKED) && !(flags & NG_NOCREATE))) {
np->n_hflag |= NHLOCKWANT;
FSDBG(263, dnp, np, np->n_flag, 0xcace2222);
- msleep(np, nfs_node_hash_mutex, PDROP | PINOD, "nfs_nget", NULL);
+ msleep(np, &nfs_node_hash_mutex, PDROP | PINOD, "nfs_nget", NULL);
FSDBG(263, dnp, np, np->n_flag, 0xcace3333);
goto loop;
}
vp = NFSTOV(np);
vid = vnode_vid(vp);
- lck_mtx_unlock(nfs_node_hash_mutex);
+ lck_mtx_unlock(&nfs_node_hash_mutex);
if ((error = vnode_getwithvid(vp, vid))) {
/*
* If vnode is being reclaimed or has already
FSDBG(263, mp, dnp, npp, 0xaaaaaaaa);
if (flags & NG_NOCREATE) {
- lck_mtx_unlock(nfs_node_hash_mutex);
+ lck_mtx_unlock(&nfs_node_hash_mutex);
*npp = 0;
FSDBG_BOT(263, dnp, *npp, 0x80000001, ENOENT);
return ENOENT;
if (fhsize > NFS_SMALLFH) {
MALLOC(np->n_fhp, u_char *, fhsize, M_NFSBIGFH, M_WAITOK);
if (!np->n_fhp) {
- lck_mtx_unlock(nfs_node_hash_mutex);
+ lck_mtx_unlock(&nfs_node_hash_mutex);
NFS_ZFREE(nfsnode_zone, np);
*npp = 0;
FSDBG_BOT(263, dnp, *npp, 0x80000002, ENOMEM);
FSDBG(266, 0, np, np->n_flag, np->n_hflag);
/* lock the new nfsnode */
- lck_mtx_init(&np->n_lock, nfs_node_lck_grp, LCK_ATTR_NULL);
- lck_rw_init(&np->n_datalock, nfs_data_lck_grp, LCK_ATTR_NULL);
- lck_mtx_init(&np->n_openlock, nfs_open_grp, LCK_ATTR_NULL);
+ lck_mtx_init(&np->n_lock, &nfs_node_lck_grp, LCK_ATTR_NULL);
+ lck_rw_init(&np->n_datalock, &nfs_data_lck_grp, LCK_ATTR_NULL);
+ lck_mtx_init(&np->n_openlock, &nfs_open_grp, LCK_ATTR_NULL);
lck_mtx_lock(&np->n_lock);
/* release lock on hash table */
- lck_mtx_unlock(nfs_node_hash_mutex);
+ lck_mtx_unlock(&nfs_node_hash_mutex);
/* do initial loading of attributes */
NACLINVALIDATE(np);
if (error) {
FSDBG(266, 0, np, np->n_flag, 0xb1eb1e);
nfs_node_unlock(np);
- lck_mtx_lock(nfs_node_hash_mutex);
+ lck_mtx_lock(&nfs_node_hash_mutex);
LIST_REMOVE(np, n_hash);
np->n_hflag &= ~(NHHASHED | NHINIT | NHLOCKED);
if (np->n_hflag & NHLOCKWANT) {
np->n_hflag &= ~NHLOCKWANT;
wakeup(np);
}
- lck_mtx_unlock(nfs_node_hash_mutex);
+ lck_mtx_unlock(&nfs_node_hash_mutex);
if (np->n_parent) {
if (!vnode_get(np->n_parent)) {
vnode_rele(np->n_parent);
}
np->n_parent = NULL;
}
- lck_mtx_destroy(&np->n_lock, nfs_node_lck_grp);
- lck_rw_destroy(&np->n_datalock, nfs_data_lck_grp);
- lck_mtx_destroy(&np->n_openlock, nfs_open_grp);
+ lck_mtx_destroy(&np->n_lock, &nfs_node_lck_grp);
+ lck_rw_destroy(&np->n_datalock, &nfs_data_lck_grp);
+ lck_mtx_destroy(&np->n_openlock, &nfs_open_grp);
if (np->n_fhsize > NFS_SMALLFH) {
FREE(np->n_fhp, M_NFSBIGFH);
}
if (error) {
FSDBG(266, 0, np, np->n_flag, 0xb1eb1e);
nfs_node_unlock(np);
- lck_mtx_lock(nfs_node_hash_mutex);
+ lck_mtx_lock(&nfs_node_hash_mutex);
LIST_REMOVE(np, n_hash);
np->n_hflag &= ~(NHHASHED | NHINIT | NHLOCKED);
if (np->n_hflag & NHLOCKWANT) {
np->n_hflag &= ~NHLOCKWANT;
wakeup(np);
}
- lck_mtx_unlock(nfs_node_hash_mutex);
+ lck_mtx_unlock(&nfs_node_hash_mutex);
if (np->n_parent) {
if (!vnode_get(np->n_parent)) {
vnode_rele(np->n_parent);
}
np->n_parent = NULL;
}
- lck_mtx_destroy(&np->n_lock, nfs_node_lck_grp);
- lck_rw_destroy(&np->n_datalock, nfs_data_lck_grp);
- lck_mtx_destroy(&np->n_openlock, nfs_open_grp);
+ lck_mtx_destroy(&np->n_lock, &nfs_node_lck_grp);
+ lck_rw_destroy(&np->n_datalock, &nfs_data_lck_grp);
+ lck_mtx_destroy(&np->n_openlock, &nfs_open_grp);
if (np->n_fhsize > NFS_SMALLFH) {
FREE(np->n_fhp, M_NFSBIGFH);
}
/* node is now initialized */
/* check if anyone's waiting on this node */
- lck_mtx_lock(nfs_node_hash_mutex);
+ lck_mtx_lock(&nfs_node_hash_mutex);
np->n_hflag &= ~(NHINIT | NHLOCKED);
if (np->n_hflag & NHLOCKWANT) {
np->n_hflag &= ~NHLOCKWANT;
wakeup(np);
}
- lck_mtx_unlock(nfs_node_hash_mutex);
+ lck_mtx_unlock(&nfs_node_hash_mutex);
*npp = np;
ubc_setsize(vp, 0);
}
- /* mark this node and the directory busy while we do the remove */
- busyerror = nfs_node_set_busy2(nsp->nsr_dnp, np, vfs_context_thread(ctx));
+ if (!vfs_isforce(nmp->nm_mountp)) {
+ /* mark this node and the directory busy while we do the remove */
+ busyerror = nfs_node_set_busy2(nsp->nsr_dnp, np, vfs_context_thread(ctx));
+ } else {
+ /* we are in force unmount we can't trust nsp->nsr_dnp, mark this np busy only */
+ busyerror = nfs_node_set_busy(np, vfs_context_thread(ctx));
+ }
/* lock the node while we remove the silly file */
- lck_mtx_lock(nfs_node_hash_mutex);
+ lck_mtx_lock(&nfs_node_hash_mutex);
while (np->n_hflag & NHLOCKED) {
np->n_hflag |= NHLOCKWANT;
- msleep(np, nfs_node_hash_mutex, PINOD, "nfs_inactive", NULL);
+ msleep(np, &nfs_node_hash_mutex, PINOD, "nfs_inactive", NULL);
}
np->n_hflag |= NHLOCKED;
- lck_mtx_unlock(nfs_node_hash_mutex);
+ lck_mtx_unlock(&nfs_node_hash_mutex);
- /* purge the name cache to deter others from finding it */
- bzero(&cn, sizeof(cn));
- cn.cn_nameptr = nsp->nsr_name;
- cn.cn_namelen = nsp->nsr_namlen;
- nfs_name_cache_purge(nsp->nsr_dnp, np, &cn, ctx);
+ if (!vfs_isforce(nmp->nm_mountp)) {
+ /* purge the name cache to deter others from finding it */
+ bzero(&cn, sizeof(cn));
+ cn.cn_nameptr = nsp->nsr_name;
+ cn.cn_namelen = nsp->nsr_namlen;
+ nfs_name_cache_purge(nsp->nsr_dnp, np, &cn, ctx);
+ }
FSDBG(264, np, np->n_size, np->n_vattr.nva_size, 0xf00d00f1);
- /* now remove the silly file */
- nfs_removeit(nsp);
+ if (!vfs_isforce(nmp->nm_mountp)) {
+ /* now remove the silly file */
+ nfs_removeit(nsp);
+ }
/* clear all flags other than these */
nfs_node_lock_force(np);
nfs_node_unlock(np);
if (!busyerror) {
- nfs_node_clear_busy2(nsp->nsr_dnp, np);
+ if (!vfs_isforce(nmp->nm_mountp)) {
+ nfs_node_clear_busy2(nsp->nsr_dnp, np);
+ } else {
+ nfs_node_clear_busy(np);
+ }
}
if (unhash && vnode_isinuse(vp, 0)) {
ubc_setsize(vp, np->n_size);
}
- lck_mtx_lock(nfs_node_hash_mutex);
+ lck_mtx_lock(&nfs_node_hash_mutex);
if (unhash) {
/*
* remove nfsnode from hash now so we can't accidentally find it
np->n_hflag &= ~NHLOCKWANT;
wakeup(np);
}
- lck_mtx_unlock(nfs_node_hash_mutex);
+ lck_mtx_unlock(&nfs_node_hash_mutex);
/* cleanup sillyrename info */
if (nsp->nsr_cred != NOCRED) {
kauth_cred_unref(&nsp->nsr_cred);
}
- vnode_rele(NFSTOV(nsp->nsr_dnp));
+ if (!vfs_isforce(nmp->nm_mountp)) {
+ /* in case of forceful unmount usecounts ignore anyways */
+ vnode_rele(NFSTOV(nsp->nsr_dnp));
+ }
FREE(nsp, M_TEMP);
FSDBG_BOT(264, vp, np, np->n_flag, 0);
out_free:
lck_mtx_unlock(&nmp->nm_lock);
}
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
if (!force && (!LIST_EMPTY(&np->n_dirtyblkhd) || !LIST_EMPTY(&np->n_cleanblkhd))) {
NP(np, "nfs_reclaim: dropping %s buffers", (!LIST_EMPTY(&np->n_dirtyblkhd) ? "dirty" : "clean"));
}
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
nfs_vinvalbuf(vp, V_IGNORE_WRITEERR, ap->a_context, 0);
- lck_mtx_lock(nfs_node_hash_mutex);
+ lck_mtx_lock(&nfs_node_hash_mutex);
if ((vnode_vtype(vp) != VDIR) && np->n_sillyrename) {
if (!force) {
np->n_hflag &= ~NHHASHED;
FSDBG(266, 0, np, np->n_flag, 0xb1eb1e);
}
- lck_mtx_unlock(nfs_node_hash_mutex);
+ lck_mtx_unlock(&nfs_node_hash_mutex);
/*
* Free up any directory cookie structures and large file handle
np->n_parent = NULL;
}
- lck_mtx_destroy(&np->n_lock, nfs_node_lck_grp);
- lck_rw_destroy(&np->n_datalock, nfs_data_lck_grp);
- lck_mtx_destroy(&np->n_openlock, nfs_open_grp);
+ lck_mtx_destroy(&np->n_lock, &nfs_node_lck_grp);
+ lck_rw_destroy(&np->n_datalock, &nfs_data_lck_grp);
+ lck_mtx_destroy(&np->n_openlock, &nfs_open_grp);
FSDBG_BOT(265, vp, np, np->n_flag, 0xd1ed1e);
NFS_ZFREE(nfsnode_zone, np);
u_long ncnt = 0;
microuptime(&now);
#endif
- lck_mtx_lock(nfs_node_hash_mutex);
+ lck_mtx_lock(&nfs_node_hash_mutex);
for (i = 0; i <= nfsnodehash; i++) {
LIST_FOREACH(np, &nfsnodehashtbl[i], n_hash) {
#ifdef DODEBUG
}
}
out:
- lck_mtx_unlock(nfs_node_hash_mutex);
+ lck_mtx_unlock(&nfs_node_hash_mutex);
#ifdef DODEBUG
microuptime(&then);
timersub(&then, &now, &diff);
int nfsd_thread_count = 0;
int nfsd_thread_max = 0;
-lck_grp_t *nfsd_lck_grp;
-lck_mtx_t *nfsd_mutex;
+static LCK_GRP_DECLARE(nfsd_lck_grp, "nfsd");
+LCK_MTX_DECLARE(nfsd_mutex, &nfsd_lck_grp);
struct nfsd_head nfsd_head, nfsd_queue;
-lck_grp_t *nfsrv_slp_rwlock_group;
-lck_grp_t *nfsrv_slp_mutex_group;
+LCK_GRP_DECLARE(nfsrv_slp_rwlock_group, "nfsrv-slp-rwlock");
+LCK_GRP_DECLARE(nfsrv_slp_mutex_group, "nfsrv-slp-mutex");
struct nfsrv_sockhead nfsrv_socklist, nfsrv_sockwg,
nfsrv_sockwait, nfsrv_sockwork;
struct nfsrv_sock *nfsrv_udpsock = NULL;
struct nfsrv_export_hashhead *nfsrv_export_hashtbl = NULL;
int nfsrv_export_hash_size = NFSRVEXPHASHSZ;
u_long nfsrv_export_hash;
-lck_grp_t *nfsrv_export_rwlock_group;
-lck_rw_t nfsrv_export_rwlock;
+static LCK_GRP_DECLARE(nfsrv_export_rwlock_group, "nfsrv-export-rwlock");
+LCK_RW_DECLARE(nfsrv_export_rwlock, &nfsrv_export_rwlock_group);
#if CONFIG_FSE
/* NFS server file modification event generator */
struct nfsrv_fmod_hashhead *nfsrv_fmod_hashtbl;
u_long nfsrv_fmod_hash;
-lck_grp_t *nfsrv_fmod_grp;
-lck_mtx_t *nfsrv_fmod_mutex;
+static LCK_GRP_DECLARE(nfsrv_fmod_grp, "nfsrv_fmod");
+LCK_MTX_DECLARE(nfsrv_fmod_mutex, &nfsrv_fmod_grp);
static int nfsrv_fmod_timer_on = 0;
int nfsrv_fsevents_enabled = 1;
#endif
uint32_t nfsrv_user_stat_node_count = 0;
uint32_t nfsrv_user_stat_max_idle_sec = NFSRV_USER_STAT_DEF_IDLE_SEC;
uint32_t nfsrv_user_stat_max_nodes = NFSRV_USER_STAT_DEF_MAX_NODES;
-lck_grp_t *nfsrv_active_user_mutex_group;
+LCK_GRP_DECLARE(nfsrv_active_user_mutex_group, "nfs-active-user-mutex");
int nfsrv_wg_delay = NFSRV_WGATHERDELAY * 1000;
int nfsrv_wg_delay_v3 = 0;
printf("struct nfsrv_sock bloated (> %dbytes)\n", NFS_SVCALLOC);
}
- /* init nfsd mutex */
- nfsd_lck_grp = lck_grp_alloc_init("nfsd", LCK_GRP_ATTR_NULL);
- nfsd_mutex = lck_mtx_alloc_init(nfsd_lck_grp, LCK_ATTR_NULL);
-
- /* init slp rwlock */
- nfsrv_slp_rwlock_group = lck_grp_alloc_init("nfsrv-slp-rwlock", LCK_GRP_ATTR_NULL);
- nfsrv_slp_mutex_group = lck_grp_alloc_init("nfsrv-slp-mutex", LCK_GRP_ATTR_NULL);
-
/* init export data structures */
LIST_INIT(&nfsrv_exports);
- nfsrv_export_rwlock_group = lck_grp_alloc_init("nfsrv-export-rwlock", LCK_GRP_ATTR_NULL);
- lck_rw_init(&nfsrv_export_rwlock, nfsrv_export_rwlock_group, LCK_ATTR_NULL);
-
- /* init active user list mutex structures */
- nfsrv_active_user_mutex_group = lck_grp_alloc_init("nfs-active-user-mutex", LCK_GRP_ATTR_NULL);
-
- /* init nfs server request cache mutex */
- nfsrv_reqcache_lck_grp = lck_grp_alloc_init("nfsrv_reqcache", LCK_GRP_ATTR_NULL);
- nfsrv_reqcache_mutex = lck_mtx_alloc_init(nfsrv_reqcache_lck_grp, LCK_ATTR_NULL);
#if CONFIG_FSE
/* init NFS server file modified event generation */
nfsrv_fmod_hashtbl = hashinit(NFSRVFMODHASHSZ, M_TEMP, &nfsrv_fmod_hash);
- nfsrv_fmod_grp = lck_grp_alloc_init("nfsrv_fmod", LCK_GRP_ATTR_NULL);
- nfsrv_fmod_mutex = lck_mtx_alloc_init(nfsrv_fmod_grp, LCK_ATTR_NULL);
#endif
/* initialize NFS server timer callouts */
int i, fmod_fire;
LIST_INIT(&firehead);
- lck_mtx_lock(nfsrv_fmod_mutex);
+ lck_mtx_lock(&nfsrv_fmod_mutex);
again:
clock_get_uptime(&timenow);
clock_interval_to_deadline(nfsrv_fmod_pendtime, 1000 * 1000,
}
if (fmod_fire) {
- lck_mtx_unlock(nfsrv_fmod_mutex);
+ lck_mtx_unlock(&nfsrv_fmod_mutex);
/*
* Fire off the content modified fsevent for each
* entry and free it.
LIST_REMOVE(fp, fm_link);
FREE(fp, M_TEMP);
}
- lck_mtx_lock(nfsrv_fmod_mutex);
+ lck_mtx_lock(&nfsrv_fmod_mutex);
nfsrv_fmod_pending -= fmod_fire;
goto again;
}
nfs_interval_timer_start(nfsrv_fmod_timer_call, interval);
}
- lck_mtx_unlock(nfsrv_fmod_mutex);
+ lck_mtx_unlock(&nfsrv_fmod_mutex);
}
/*
struct nfsrv_fmod *fp;
struct nfsrv_fmod_hashhead *head;
- lck_mtx_lock(nfsrv_fmod_mutex);
+ lck_mtx_lock(&nfsrv_fmod_mutex);
/*
* Compute the time in the future when the
LIST_REMOVE(fp, fm_link);
LIST_INSERT_HEAD(head, fp, fm_link);
}
- lck_mtx_unlock(nfsrv_fmod_mutex);
+ lck_mtx_unlock(&nfsrv_fmod_mutex);
return;
}
}
nfsrv_fmod_pendtime);
}
done:
- lck_mtx_unlock(nfsrv_fmod_mutex);
+ lck_mtx_unlock(&nfsrv_fmod_mutex);
return;
}
#endif /* CONFIG_FSE */
*
* Add/Remove the socket in the nfsrv_sockwg queue as needed.
*/
- lck_mtx_lock(nfsd_mutex);
+ lck_mtx_lock(&nfsd_mutex);
if (slp->ns_wgtime) {
if (slp->ns_wgq.tqe_next == SLPNOLIST) {
TAILQ_INSERT_HEAD(&nfsrv_sockwg, slp, ns_wgq);
TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq);
slp->ns_wgq.tqe_next = SLPNOLIST;
}
- lck_mtx_unlock(nfsd_mutex);
+ lck_mtx_unlock(&nfsd_mutex);
return 0;
}
cur_usec = now.tv_sec * 1000000 + now.tv_usec;
next_usec = cur_usec + (NFSRV_WGATHERDELAY * 1000);
- lck_mtx_lock(nfsd_mutex);
+ lck_mtx_lock(&nfsd_mutex);
TAILQ_FOREACH(slp, &nfsrv_sockwg, ns_wgq) {
if (slp->ns_wgtime) {
writes_pending++;
if (writes_pending == 0) {
nfsrv_wg_timer_on = 0;
- lck_mtx_unlock(nfsd_mutex);
+ lck_mtx_unlock(&nfsd_mutex);
return;
}
- lck_mtx_unlock(nfsd_mutex);
+ lck_mtx_unlock(&nfsd_mutex);
/*
* Return the number of msec to wait again
#include <sys/tprintf.h>
#include <libkern/OSAtomic.h>
+#include <sys/reboot.h>
#include <sys/time.h>
#include <kern/clock.h>
#include <kern/task.h>
#define NFS_SOCK_DBG(...) NFS_DBG(NFS_FAC_SOCK, 7, ## __VA_ARGS__)
#define NFS_SOCK_DUMP_MBUF(msg, mb) if (NFS_IS_DBG(NFS_FAC_SOCK, 15)) nfs_dump_mbuf(__func__, __LINE__, (msg), (mb))
+#ifndef SUN_LEN
+#define SUN_LEN(su) \
+ (sizeof(*(su)) - sizeof((su)->sun_path) + strnlen((su)->sun_path, sizeof((su)->sun_path)))
+#endif /* SUN_LEN */
+
/* XXX */
boolean_t current_thread_aborted(void);
kern_return_t thread_terminate(thread_t);
switch (sa->sa_family) {
case AF_INET:
+ if (sa->sa_len != sizeof(struct sockaddr_in)) {
+ return EINVAL;
+ }
+ sinaddr = &((struct sockaddr_in*)sa)->sin_addr;
+ if (inet_ntop(sa->sa_family, sinaddr, naddr, sizeof(naddr)) != naddr) {
+ strlcpy(naddr, "<unknown>", sizeof(naddr));
+ }
+ break;
case AF_INET6:
- if (sa->sa_family == AF_INET) {
- sinaddr = &((struct sockaddr_in*)sa)->sin_addr;
- } else {
- sinaddr = &((struct sockaddr_in6*)sa)->sin6_addr;
+ if (sa->sa_len != sizeof(struct sockaddr_in6)) {
+ return EINVAL;
}
+ sinaddr = &((struct sockaddr_in6*)sa)->sin6_addr;
if (inet_ntop(sa->sa_family, sinaddr, naddr, sizeof(naddr)) != naddr) {
strlcpy(naddr, "<unknown>", sizeof(naddr));
}
break;
case AF_LOCAL:
+ if (sa->sa_len != sizeof(struct sockaddr_un) && sa->sa_len != SUN_LEN((struct sockaddr_un *)sa)) {
+ return EINVAL;
+ }
strlcpy(naddr, ((struct sockaddr_un *)sa)->sun_path, sizeof(naddr));
break;
default:
}
return ENOMEM;
}
- lck_mtx_init(&nso->nso_lock, nfs_request_grp, LCK_ATTR_NULL);
+ lck_mtx_init(&nso->nso_lock, &nfs_request_grp, LCK_ATTR_NULL);
nso->nso_sotype = sotype;
if (nso->nso_sotype == SOCK_STREAM) {
nfs_rpc_record_state_init(&nso->nso_rrs);
if (nso->nso_sotype == SOCK_STREAM) {
nfs_rpc_record_state_cleanup(&nso->nso_rrs);
}
- lck_mtx_destroy(&nso->nso_lock, nfs_request_grp);
+ lck_mtx_destroy(&nso->nso_lock, &nfs_request_grp);
if (nso->nso_saddr) {
FREE(nso->nso_saddr, M_SONAME);
}
* as needing a resend. (Though nfs_need_reconnect() probably
* marked them all already.)
*/
- lck_mtx_lock(nfs_request_mutex);
+ lck_mtx_lock(&nfs_request_mutex);
TAILQ_FOREACH(rq, &nfs_reqq, r_chain) {
if (rq->r_nmp == nmp) {
lck_mtx_lock(&rq->r_mtx);
lck_mtx_unlock(&rq->r_mtx);
}
}
- lck_mtx_unlock(nfs_request_mutex);
+ lck_mtx_unlock(&nfs_request_mutex);
return 0;
}
* Loop through outstanding request list and
* mark all requests as needing a resend.
*/
- lck_mtx_lock(nfs_request_mutex);
+ lck_mtx_lock(&nfs_request_mutex);
TAILQ_FOREACH(rq, &nfs_reqq, r_chain) {
if (rq->r_nmp == nmp) {
lck_mtx_lock(&rq->r_mtx);
lck_mtx_unlock(&rq->r_mtx);
}
}
- lck_mtx_unlock(nfs_request_mutex);
+ lck_mtx_unlock(&nfs_request_mutex);
}
int error, on = 1;
in_port_t port;
- lck_mtx_lock(nfs_global_mutex);
+ lck_mtx_lock(&nfs_global_mutex);
if (nfs4_cb_id == 0) {
TAILQ_INIT(&nfs4_cb_mounts);
TAILQ_INIT(&nfs4_cb_socks);
TAILQ_INSERT_HEAD(&nfs4_cb_mounts, nmp, nm_cblink);
if (nfs4_cb_so) {
- lck_mtx_unlock(nfs_global_mutex);
+ lck_mtx_unlock(&nfs_global_mutex);
return;
}
fail:
if (error) {
nfs4_cb_so = nfs4_cb_so6 = NULL;
- lck_mtx_unlock(nfs_global_mutex);
+ lck_mtx_unlock(&nfs_global_mutex);
if (so) {
sock_shutdown(so, SHUT_RDWR);
sock_close(so);
sock_close(so6);
}
} else {
- lck_mtx_unlock(nfs_global_mutex);
+ lck_mtx_unlock(&nfs_global_mutex);
}
}
struct nfs4_cb_sock_list cb_socks;
struct timespec ts = { .tv_sec = 1, .tv_nsec = 0 };
- lck_mtx_lock(nfs_global_mutex);
+ lck_mtx_lock(&nfs_global_mutex);
if (nmp->nm_cbid == 0) {
- lck_mtx_unlock(nfs_global_mutex);
+ lck_mtx_unlock(&nfs_global_mutex);
return;
}
TAILQ_REMOVE(&nfs4_cb_mounts, nmp, nm_cblink);
/* wait for any callbacks in progress to complete */
while (nmp->nm_cbrefs) {
- msleep(&nmp->nm_cbrefs, nfs_global_mutex, PSOCK, "cbshutwait", &ts);
+ msleep(&nmp->nm_cbrefs, &nfs_global_mutex, PSOCK, "cbshutwait", &ts);
}
nmp->nm_cbid = 0;
if (--nfs4_cb_so_usecount) {
- lck_mtx_unlock(nfs_global_mutex);
+ lck_mtx_unlock(&nfs_global_mutex);
return;
}
so = nfs4_cb_so;
nfs4_cb_so = nfs4_cb_so6 = NULL;
TAILQ_INIT(&cb_socks);
TAILQ_CONCAT(&cb_socks, &nfs4_cb_socks, ncbs_link);
- lck_mtx_unlock(nfs_global_mutex);
+ lck_mtx_unlock(&nfs_global_mutex);
if (so) {
sock_shutdown(so, SHUT_RDWR);
sock_close(so);
struct timeval now;
loop:
- lck_mtx_lock(nfs_global_mutex);
+ lck_mtx_lock(&nfs_global_mutex);
if (TAILQ_EMPTY(&nfs4_cb_socks)) {
nfs4_callback_timer_on = 0;
- lck_mtx_unlock(nfs_global_mutex);
+ lck_mtx_unlock(&nfs_global_mutex);
return;
}
microuptime(&now);
continue;
}
TAILQ_REMOVE(&nfs4_cb_socks, ncbsp, ncbs_link);
- lck_mtx_unlock(nfs_global_mutex);
+ lck_mtx_unlock(&nfs_global_mutex);
sock_shutdown(ncbsp->ncbs_so, SHUT_RDWR);
sock_close(ncbsp->ncbs_so);
nfs_rpc_record_state_cleanup(&ncbsp->ncbs_rrs);
nfs4_callback_timer_on = 1;
nfs_interval_timer_start(nfs4_callback_timer_call,
NFS4_CB_TIMER_PERIOD * 1000);
- lck_mtx_unlock(nfs_global_mutex);
+ lck_mtx_unlock(&nfs_global_mutex);
}
/*
microuptime(&now);
ncbsp->ncbs_stamp = now.tv_sec;
- lck_mtx_lock(nfs_global_mutex);
+ lck_mtx_lock(&nfs_global_mutex);
/* add it to the list */
TAILQ_INSERT_HEAD(&nfs4_cb_socks, ncbsp, ncbs_link);
nfs_interval_timer_start(nfs4_callback_timer_call, 500);
}
- lck_mtx_unlock(nfs_global_mutex);
+ lck_mtx_unlock(&nfs_global_mutex);
}
/*
mbuf_t m;
int error = 0, recv = 1;
- lck_mtx_lock(nfs_global_mutex);
+ lck_mtx_lock(&nfs_global_mutex);
while (ncbsp->ncbs_flags & NCBSOCK_UPCALL) {
/* wait if upcall is already in progress */
ncbsp->ncbs_flags |= NCBSOCK_UPCALLWANT;
- msleep(ncbsp, nfs_global_mutex, PSOCK, "cbupcall", &ts);
+ msleep(ncbsp, &nfs_global_mutex, PSOCK, "cbupcall", &ts);
}
ncbsp->ncbs_flags |= NCBSOCK_UPCALL;
- lck_mtx_unlock(nfs_global_mutex);
+ lck_mtx_unlock(&nfs_global_mutex);
/* loop while we make error-free progress */
while (!error && recv) {
ncbsp->ncbs_stamp = now.tv_sec;
}
- lck_mtx_lock(nfs_global_mutex);
+ lck_mtx_lock(&nfs_global_mutex);
ncbsp->ncbs_flags &= ~NCBSOCK_UPCALL;
- lck_mtx_unlock(nfs_global_mutex);
+ lck_mtx_unlock(&nfs_global_mutex);
wakeup(ncbsp);
}
goto nfsmout;
}
/* match the callback ID to a registered mount */
- lck_mtx_lock(nfs_global_mutex);
+ lck_mtx_lock(&nfs_global_mutex);
TAILQ_FOREACH(nmp, &nfs4_cb_mounts, nm_cblink) {
if (nmp->nm_cbid != cbid) {
continue;
if (nmp) {
nmp->nm_cbrefs++;
}
- lck_mtx_unlock(nfs_global_mutex);
+ lck_mtx_unlock(&nfs_global_mutex);
if (!nmp) {
/* if no mount match, just drop socket. */
error = EPERM;
nfsm_chain_null(&nmrep);
/* drop the callback reference on the mount */
- lck_mtx_lock(nfs_global_mutex);
+ lck_mtx_lock(&nfs_global_mutex);
nmp->nm_cbrefs--;
if (!nmp->nm_cbid) {
wakeup(&nmp->nm_cbrefs);
}
- lck_mtx_unlock(nfs_global_mutex);
+ lck_mtx_unlock(&nfs_global_mutex);
break;
}
* Loop through the request list to match up the reply
* Iff no match, just drop it.
*/
- lck_mtx_lock(nfs_request_mutex);
+ lck_mtx_lock(&nfs_request_mutex);
TAILQ_FOREACH(req, &nfs_reqq, r_chain) {
if (req->r_nmrep.nmc_mhead || (rxid != R_XID32(req->r_xid))) {
continue;
}
#endif /* CONFIG_NFS_GSS */
lck_mtx_unlock(&req->r_mtx);
- lck_mtx_unlock(nfs_request_mutex);
+ lck_mtx_unlock(&nfs_request_mutex);
/* if it's an async RPC with a callback, queue it up */
if (asyncioq) {
nfs_asyncio_finish(req);
if (!req) {
/* not matched to a request, so drop it. */
- lck_mtx_unlock(nfs_request_mutex);
+ lck_mtx_unlock(&nfs_request_mutex);
OSAddAtomic64(1, &nfsstats.rpcunexpected);
mbuf_freem(mrep);
}
panic("nfs_request: invalid NFSv4 RPC request %d\n", procnum);
}
- lck_mtx_init(&req->r_mtx, nfs_request_grp, LCK_ATTR_NULL);
+ lck_mtx_init(&req->r_mtx, &nfs_request_grp, LCK_ATTR_NULL);
req->r_nmp = nmp;
nmp->nm_ref++;
req->r_np = np;
* Still on an async I/O queue?
* %%% But which one, we may be on a local iod.
*/
- lck_mtx_lock(nfsiod_mutex);
+ lck_mtx_lock(&nfsiod_mutex);
if (nmp && req->r_achain.tqe_next != NFSREQNOLIST) {
TAILQ_REMOVE(&nmp->nm_iodq, req, r_achain);
req->r_achain.tqe_next = NFSREQNOLIST;
}
- lck_mtx_unlock(nfsiod_mutex);
+ lck_mtx_unlock(&nfsiod_mutex);
}
lck_mtx_lock(&req->r_mtx);
if (nmp) {
nfs_mount_rele(nmp);
}
- lck_mtx_destroy(&req->r_mtx, nfs_request_grp);
+ lck_mtx_destroy(&req->r_mtx, &nfs_request_grp);
if (req->r_flags & R_ALLOCATED) {
NFS_ZFREE(nfs_req_zone, req);
}
req->r_flags |= R_SENDING;
lck_mtx_unlock(&req->r_mtx);
- lck_mtx_lock(nfs_request_mutex);
+ lck_mtx_lock(&nfs_request_mutex);
nmp = req->r_nmp;
if (nfs_mount_gone(nmp)) {
- lck_mtx_unlock(nfs_request_mutex);
+ lck_mtx_unlock(&nfs_request_mutex);
return ENXIO;
}
nfs_interval_timer_start(nfs_request_timer_call,
NFS_REQUESTDELAY);
}
- lck_mtx_unlock(nfs_request_mutex);
+ lck_mtx_unlock(&nfs_request_mutex);
/* Send the request... */
return nfs_send(req, wait);
void
nfs_reqdequeue(struct nfsreq *req)
{
- lck_mtx_lock(nfs_request_mutex);
+ lck_mtx_lock(&nfs_request_mutex);
while (req->r_lflags & RL_BUSY) {
req->r_lflags |= RL_WAITING;
- msleep(&req->r_lflags, nfs_request_mutex, PSOCK, "reqdeq", NULL);
+ msleep(&req->r_lflags, &nfs_request_mutex, PSOCK, "reqdeq", NULL);
}
if (req->r_lflags & RL_QUEUED) {
TAILQ_REMOVE(&nfs_reqq, req, r_chain);
req->r_lflags &= ~RL_QUEUED;
}
- lck_mtx_unlock(nfs_request_mutex);
+ lck_mtx_unlock(&nfs_request_mutex);
}
/*
TAILQ_INIT(&nfs_mount_poke_queue);
restart:
- lck_mtx_lock(nfs_request_mutex);
+ lck_mtx_lock(&nfs_request_mutex);
req = TAILQ_FIRST(&nfs_reqq);
if (req == NULL) { /* no requests - turn timer off */
nfs_request_timer_on = 0;
- lck_mtx_unlock(nfs_request_mutex);
+ lck_mtx_unlock(&nfs_request_mutex);
return;
}
TAILQ_REMOVE(&nfs_mount_poke_queue, nmp, nm_pokeq);
}
/* Release our lock state, so we can become a zombie */
- lck_mtx_unlock(nfs_request_mutex);
+ lck_mtx_unlock(&nfs_request_mutex);
/*
* Note nfs_mount_make zombie(nmp) must be
* work we release nm_lock in
* nfs_make_mount_zombie with out acquiring any
* other locks. (Later, in nfs_mount_zombie we
- * will acquire nfs_request_mutex, r_mtx,
+ * will acquire &nfs_request_mutex, r_mtx,
* nm_lock in that order). So we should not be
* introducing deadlock here. We take a reference
* on the mount so that its still there when we
lck_mtx_unlock(&req->r_mtx);
}
- lck_mtx_unlock(nfs_request_mutex);
+ lck_mtx_unlock(&nfs_request_mutex);
/* poke any sockets */
while ((nmp = TAILQ_FIRST(&nfs_mount_poke_queue))) {
* This is used to determine if we need to bail on a mount.
* ETIMEDOUT is returned if there has been a soft timeout.
* EINTR is returned if there is a signal pending that is not being ignored
+ * ESHUTDOWN is return if the system is in shutdown.
* and the mount is interruptable, or if we are a thread that is in the process
* of cancellation (also SIGKILL posted).
*/
return ENXIO;
}
+ if (get_system_inshutdown()) {
+ NFS_SOCK_DBG("Shutdown in progress\n");
+ return ESHUTDOWN;
+ }
+
if (req && (req->r_flags & R_SOFTTERM)) {
return ETIMEDOUT; /* request has been terminated. */
}
int wake = (slp->ns_flag & SLP_WORKTODO);
lck_rw_done(&slp->ns_rwlock);
if (wake && nfsd_thread_count) {
- lck_mtx_lock(nfsd_mutex);
+ lck_mtx_lock(&nfsd_mutex);
nfsrv_wakenfsd(slp);
- lck_mtx_unlock(nfsd_mutex);
+ lck_mtx_unlock(&nfsd_mutex);
}
}
}
TAILQ_HEAD(nfsrv_reqcache_lru, nfsrvcache) nfsrv_reqcache_lruhead;
u_long nfsrv_reqcache_hash;
-lck_grp_t *nfsrv_reqcache_lck_grp;
-lck_mtx_t *nfsrv_reqcache_mutex;
+static LCK_GRP_DECLARE(nfsrv_reqcache_lck_grp, "nfsrv_reqcache");
+LCK_MTX_DECLARE(nfsrv_reqcache_mutex, &nfsrv_reqcache_lck_grp);
/*
* Static array that defines which nfs rpc's are nonidempotent
return;
}
- lck_mtx_lock(nfsrv_reqcache_mutex);
+ lck_mtx_lock(&nfsrv_reqcache_mutex);
/* init nfs server request cache hash table */
nfsrv_reqcache_hashtbl = hashinit(nfsrv_reqcache_size, M_NFSD, &nfsrv_reqcache_hash);
TAILQ_INIT(&nfsrv_reqcache_lruhead);
- lck_mtx_unlock(nfsrv_reqcache_mutex);
+ lck_mtx_unlock(&nfsrv_reqcache_mutex);
}
/*
if (!nd->nd_nam2) {
return RC_DOIT;
}
- lck_mtx_lock(nfsrv_reqcache_mutex);
+ lck_mtx_lock(&nfsrv_reqcache_mutex);
loop:
for (rp = NFSRCHASH(nd->nd_retxid)->lh_first; rp != 0;
rp = rp->rc_hash.le_next) {
netaddr_match(rp->rc_family, &rp->rc_haddr, nd->nd_nam)) {
if ((rp->rc_flag & RC_LOCKED) != 0) {
rp->rc_flag |= RC_WANTED;
- msleep(rp, nfsrv_reqcache_mutex, PZERO - 1, "nfsrc", NULL);
+ msleep(rp, &nfsrv_reqcache_mutex, PZERO - 1, "nfsrc", NULL);
goto loop;
}
rp->rc_flag |= RC_LOCKED;
rp->rc_flag &= ~RC_WANTED;
wakeup(rp);
}
- lck_mtx_unlock(nfsrv_reqcache_mutex);
+ lck_mtx_unlock(&nfsrv_reqcache_mutex);
return ret;
}
}
if (!rp) {
/* no entry to reuse? */
/* OK, we just won't be able to cache this request */
- lck_mtx_unlock(nfsrv_reqcache_mutex);
+ lck_mtx_unlock(&nfsrv_reqcache_mutex);
return RC_DOIT;
}
while ((rp->rc_flag & RC_LOCKED) != 0) {
rp->rc_flag |= RC_WANTED;
- msleep(rp, nfsrv_reqcache_mutex, PZERO - 1, "nfsrc", NULL);
+ msleep(rp, &nfsrv_reqcache_mutex, PZERO - 1, "nfsrc", NULL);
rp = nfsrv_reqcache_lruhead.tqh_first;
}
rp->rc_flag |= RC_LOCKED;
rp->rc_flag &= ~RC_WANTED;
wakeup(rp);
}
- lck_mtx_unlock(nfsrv_reqcache_mutex);
+ lck_mtx_unlock(&nfsrv_reqcache_mutex);
return RC_DOIT;
}
if (!nd->nd_nam2) {
return;
}
- lck_mtx_lock(nfsrv_reqcache_mutex);
+ lck_mtx_lock(&nfsrv_reqcache_mutex);
loop:
for (rp = NFSRCHASH(nd->nd_retxid)->lh_first; rp != 0;
rp = rp->rc_hash.le_next) {
netaddr_match(rp->rc_family, &rp->rc_haddr, nd->nd_nam)) {
if ((rp->rc_flag & RC_LOCKED) != 0) {
rp->rc_flag |= RC_WANTED;
- msleep(rp, nfsrv_reqcache_mutex, PZERO - 1, "nfsrc", NULL);
+ msleep(rp, &nfsrv_reqcache_mutex, PZERO - 1, "nfsrc", NULL);
goto loop;
}
rp->rc_flag |= RC_LOCKED;
rp->rc_flag &= ~RC_WANTED;
wakeup(rp);
}
- lck_mtx_unlock(nfsrv_reqcache_mutex);
+ lck_mtx_unlock(&nfsrv_reqcache_mutex);
return;
}
}
- lck_mtx_unlock(nfsrv_reqcache_mutex);
+ lck_mtx_unlock(&nfsrv_reqcache_mutex);
}
/*
{
struct nfsrvcache *rp, *nextrp;
- lck_mtx_lock(nfsrv_reqcache_mutex);
+ lck_mtx_lock(&nfsrv_reqcache_mutex);
for (rp = nfsrv_reqcache_lruhead.tqh_first; rp != 0; rp = nextrp) {
nextrp = rp->rc_lru.tqe_next;
LIST_REMOVE(rp, rc_hash);
}
nfsrv_reqcache_count = 0;
FREE(nfsrv_reqcache_hashtbl, M_TEMP);
- lck_mtx_unlock(nfsrv_reqcache_mutex);
+ lck_mtx_unlock(&nfsrv_reqcache_mutex);
}
#endif /* CONFIG_NFS_SERVER */
{
struct timeval tv;
- lck_mtx_lock(nfs_request_mutex);
+ lck_mtx_lock(&nfs_request_mutex);
if (!nfs_xid) {
/*
* Derive initial xid from system time.
nfs_xid++;
}
*xidp = nfs_xid + (nfs_xidwrap << 32);
- lck_mtx_unlock(nfs_request_mutex);
+ lck_mtx_unlock(&nfs_request_mutex);
}
/*
struct radix_node *rn;
struct sockaddr *saddr, *smask;
struct domain *dom;
- size_t i;
+ size_t i, ss_minsize;
int error;
unsigned int net;
user_addr_t uaddr;
kauth_cred_t cred;
uaddr = unxa->nxa_nets;
+ ss_minsize = sizeof(((struct sockaddr_storage *)0)->ss_len) + sizeof(((struct sockaddr_storage *)0)->ss_family);
for (net = 0; net < unxa->nxa_netcount; net++, uaddr += sizeof(nxna)) {
error = copyin(uaddr, &nxna, sizeof(nxna));
if (error) {
}
if (nxna.nxna_addr.ss_len > sizeof(struct sockaddr_storage) ||
+ (nxna.nxna_addr.ss_len != 0 && nxna.nxna_addr.ss_len < ss_minsize) ||
nxna.nxna_mask.ss_len > sizeof(struct sockaddr_storage) ||
+ (nxna.nxna_mask.ss_len != 0 && nxna.nxna_mask.ss_len < ss_minsize) ||
nxna.nxna_addr.ss_family > AF_MAX ||
nxna.nxna_mask.ss_family > AF_MAX) {
return EINVAL;
struct radix_node *rn;
struct nfsrv_free_netopt_arg fna;
struct nfs_netopt *nno;
+ size_t ss_minsize;
user_addr_t uaddr;
unsigned int net;
int i, error;
/* delete only the exports specified */
uaddr = unxa->nxa_nets;
+ ss_minsize = sizeof(((struct sockaddr_storage *)0)->ss_len) + sizeof(((struct sockaddr_storage *)0)->ss_family);
for (net = 0; net < unxa->nxa_netcount; net++, uaddr += sizeof(nxna)) {
error = copyin(uaddr, &nxna, sizeof(nxna));
if (error) {
continue;
}
+ if (nxna.nxna_addr.ss_len > sizeof(struct sockaddr_storage) ||
+ (nxna.nxna_addr.ss_len != 0 && nxna.nxna_addr.ss_len < ss_minsize) ||
+ nxna.nxna_addr.ss_family > AF_MAX) {
+ printf("nfsrv_free_addrlist: invalid socket address (%u)\n", net);
+ continue;
+ }
+
+ if (nxna.nxna_mask.ss_len > sizeof(struct sockaddr_storage) ||
+ (nxna.nxna_mask.ss_len != 0 && nxna.nxna_mask.ss_len < ss_minsize) ||
+ nxna.nxna_mask.ss_family > AF_MAX) {
+ printf("nfsrv_free_addrlist: invalid socket mask (%u)\n", net);
+ continue;
+ }
+
if ((rnh = nx->nx_rtable[nxna.nxna_addr.ss_family]) == 0) {
/* AF not initialized? */
if (!(unxa->nxa_flags & NXA_ADD)) {
void enablequotas(struct mount *mp, vfs_context_t ctx); // XXX
+#define DATA_VOLUME_MP "/System/Volumes/Data" // PLATFORM_DATA_VOLUME_MOUNT_POINT
+
int
nfsrv_export(struct user_nfs_export_args *unxa, vfs_context_t ctx)
{
int error = 0;
- size_t pathlen;
+ size_t pathlen, nxfs_pathlen;
struct nfs_exportfs *nxfs, *nxfs2, *nxfs3;
struct nfs_export *nx, *nx2, *nx3;
struct nfs_filehandle nfh;
struct nameidata mnd, xnd;
vnode_t mvp = NULL, xvp = NULL;
mount_t mp = NULL;
- char path[MAXPATHLEN];
+ char path[MAXPATHLEN], *nxfs_path;
char fl_pathbuff[MAXPATHLEN];
int fl_pathbuff_len = MAXPATHLEN;
int expisroot;
+ size_t datavol_len = strlen(DATA_VOLUME_MP);
if (unxa->nxa_flags == NXA_CHECK) {
/* just check if the path is an NFS-exportable file system */
}
if (nxfs) {
/* verify exported FS path matches given path */
- if (strncmp(path, nxfs->nxfs_path, MAXPATHLEN)) {
+ if (strncmp(path, nxfs->nxfs_path, MAXPATHLEN) &&
+ (strncmp(path, DATA_VOLUME_MP, datavol_len) || strncmp(path + datavol_len, nxfs->nxfs_path, MAXPATHLEN - datavol_len))) {
error = EEXIST;
goto unlock_out;
}
}
bzero(nxfs, sizeof(struct nfs_exportfs));
nxfs->nxfs_id = unxa->nxa_fsid;
- MALLOC(nxfs->nxfs_path, char*, pathlen, M_TEMP, M_WAITOK);
+ if (mp) {
+ nxfs_path = mp->mnt_vfsstat.f_mntonname;
+ nxfs_pathlen = sizeof(mp->mnt_vfsstat.f_mntonname);
+ } else {
+ nxfs_path = path;
+ nxfs_pathlen = pathlen;
+ }
+ MALLOC(nxfs->nxfs_path, char*, nxfs_pathlen, M_TEMP, M_WAITOK);
if (!nxfs->nxfs_path) {
FREE(nxfs, M_TEMP);
error = ENOMEM;
goto out;
}
- bcopy(path, nxfs->nxfs_path, pathlen);
+ bcopy(nxfs_path, nxfs->nxfs_path, nxfs_pathlen);
/* insert into list in reverse-sorted order */
nxfs3 = NULL;
LIST_FOREACH(nxfs2, &nfsrv_exports, nxfs_next) {
}
ulist->node_count = 0;
- lck_mtx_init(&ulist->user_mutex, nfsrv_active_user_mutex_group, LCK_ATTR_NULL);
+ lck_mtx_init(&ulist->user_mutex, &nfsrv_active_user_mutex_group, LCK_ATTR_NULL);
}
/* Free all nodes in an active user list */
}
ulist->node_count = 0;
- lck_mtx_destroy(&ulist->user_mutex, nfsrv_active_user_mutex_group);
+ lck_mtx_destroy(&ulist->user_mutex, &nfsrv_active_user_mutex_group);
}
/* Reclaim old expired user nodes from active user lists. */
nfsiod_terminate(struct nfsiod *niod)
{
nfsiod_thread_count--;
- lck_mtx_unlock(nfsiod_mutex);
+ lck_mtx_unlock(&nfsiod_mutex);
if (niod) {
FREE(niod, M_TEMP);
} else {
MALLOC(niod, struct nfsiod *, sizeof(struct nfsiod), M_TEMP, M_WAITOK);
if (!niod) {
- lck_mtx_lock(nfsiod_mutex);
+ lck_mtx_lock(&nfsiod_mutex);
nfsiod_thread_count--;
wakeup(current_thread());
- lck_mtx_unlock(nfsiod_mutex);
+ lck_mtx_unlock(&nfsiod_mutex);
thread_terminate(current_thread());
/*NOTREACHED*/
}
bzero(niod, sizeof(*niod));
- lck_mtx_lock(nfsiod_mutex);
+ lck_mtx_lock(&nfsiod_mutex);
TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link);
wakeup(current_thread());
- error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE * hz, nfsiod_continue);
+ error = msleep0(niod, &nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE * hz, nfsiod_continue);
/* shouldn't return... so we have an error */
/* remove an old nfsiod struct and terminate */
- lck_mtx_lock(nfsiod_mutex);
+ lck_mtx_lock(&nfsiod_mutex);
if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) {
TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
}
{
thread_t thd = THREAD_NULL;
- lck_mtx_lock(nfsiod_mutex);
+ lck_mtx_lock(&nfsiod_mutex);
if ((nfsiod_thread_count >= NFSIOD_MAX) && (nfsiod_thread_count > 0)) {
- lck_mtx_unlock(nfsiod_mutex);
+ lck_mtx_unlock(&nfsiod_mutex);
return EBUSY;
}
nfsiod_thread_count++;
if (kernel_thread_start((thread_continue_t)nfsiod_thread, NULL, &thd) != KERN_SUCCESS) {
- lck_mtx_unlock(nfsiod_mutex);
+ lck_mtx_unlock(&nfsiod_mutex);
return EBUSY;
}
/* wait for the thread to complete startup */
- msleep(thd, nfsiod_mutex, PWAIT | PDROP, "nfsiodw", NULL);
+ msleep(thd, &nfsiod_mutex, PWAIT | PDROP, "nfsiodw", NULL);
thread_deallocate(thd);
return 0;
}
struct nfs_reqqhead iodq;
int morework;
- lck_mtx_lock(nfsiod_mutex);
+ lck_mtx_lock(&nfsiod_mutex);
niod = TAILQ_FIRST(&nfsiodwork);
if (!niod) {
/* there's no work queued up */
req->r_flags |= R_IOD;
lck_mtx_unlock(&req->r_mtx);
}
- lck_mtx_unlock(nfsiod_mutex);
+ lck_mtx_unlock(&nfsiod_mutex);
/* process the queue */
TAILQ_FOREACH_SAFE(req, &iodq, r_achain, treq) {
}
/* now check if there's more/other work to be done */
- lck_mtx_lock(nfsiod_mutex);
+ lck_mtx_lock(&nfsiod_mutex);
morework = !TAILQ_EMPTY(&nmp->nm_iodq);
if (!morework || !TAILQ_EMPTY(&nfsiodmounts)) {
/*
/* queue ourselves back up - if there aren't too many threads running */
if (nfsiod_thread_count <= NFSIOD_MAX) {
TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link);
- error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE * hz, nfsiod_continue);
+ error = msleep0(niod, &nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE * hz, nfsiod_continue);
/* shouldn't return... so we have an error */
/* remove an old nfsiod struct and terminate */
- lck_mtx_lock(nfsiod_mutex);
+ lck_mtx_lock(&nfsiod_mutex);
if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) {
TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
}
return ENOMEM;
}
bzero((caddr_t)slp, sizeof(struct nfsrv_sock));
- lck_rw_init(&slp->ns_rwlock, nfsrv_slp_rwlock_group, LCK_ATTR_NULL);
- lck_mtx_init(&slp->ns_wgmutex, nfsrv_slp_mutex_group, LCK_ATTR_NULL);
+ lck_rw_init(&slp->ns_rwlock, &nfsrv_slp_rwlock_group, LCK_ATTR_NULL);
+ lck_mtx_init(&slp->ns_wgmutex, &nfsrv_slp_mutex_group, LCK_ATTR_NULL);
- lck_mtx_lock(nfsd_mutex);
+ lck_mtx_lock(&nfsd_mutex);
if (soprotocol == IPPROTO_UDP) {
if (sodomain == AF_INET) {
/* There should be only one UDP/IPv4 socket */
if (nfsrv_udpsock) {
- lck_mtx_unlock(nfsd_mutex);
+ lck_mtx_unlock(&nfsd_mutex);
nfsrv_slpfree(slp);
mbuf_freem(mynam);
return EEXIST;
if (sodomain == AF_INET6) {
/* There should be only one UDP/IPv6 socket */
if (nfsrv_udp6sock) {
- lck_mtx_unlock(nfsd_mutex);
+ lck_mtx_unlock(&nfsd_mutex);
nfsrv_slpfree(slp);
mbuf_freem(mynam);
return EEXIST;
slp->ns_flag = SLP_VALID | SLP_NEEDQ;
nfsrv_wakenfsd(slp);
- lck_mtx_unlock(nfsd_mutex);
+ lck_mtx_unlock(&nfsd_mutex);
return 0;
}
return ENOMEM;
}
bzero(nfsd, sizeof(struct nfsd));
- lck_mtx_lock(nfsd_mutex);
+ lck_mtx_lock(&nfsd_mutex);
if (nfsd_thread_count++ == 0) {
nfsrv_initcache(); /* Init the server request cache */
}
TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
- lck_mtx_unlock(nfsd_mutex);
+ lck_mtx_unlock(&nfsd_mutex);
context.vc_thread = current_thread();
} else {
/* need to find work to do */
error = 0;
- lck_mtx_lock(nfsd_mutex);
+ lck_mtx_lock(&nfsd_mutex);
while (!nfsd->nfsd_slp && TAILQ_EMPTY(&nfsrv_sockwait) && TAILQ_EMPTY(&nfsrv_sockwork)) {
if (nfsd_thread_count > nfsd_thread_max) {
/*
}
nfsd->nfsd_flag |= NFSD_WAITING;
TAILQ_INSERT_HEAD(&nfsd_queue, nfsd, nfsd_queue);
- error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd", &to);
+ error = msleep(nfsd, &nfsd_mutex, PSOCK | PCATCH, "nfsd", &to);
if (error) {
if (nfsd->nfsd_flag & NFSD_WAITING) {
TAILQ_REMOVE(&nfsd_queue, nfsd, nfsd_queue);
slp->ns_flag |= SLP_WORKQ;
lck_rw_done(&slp->ns_rwlock);
}
- lck_mtx_unlock(nfsd_mutex);
+ lck_mtx_unlock(&nfsd_mutex);
if (!slp) {
continue;
}
}
NFS_ZFREE(nfsrv_descript_zone, nd);
nfsrv_slpderef(slp);
- lck_mtx_lock(nfsd_mutex);
+ lck_mtx_lock(&nfsd_mutex);
goto done;
}
break;
nfsrv_slpderef(slp);
}
}
- lck_mtx_lock(nfsd_mutex);
+ lck_mtx_lock(&nfsd_mutex);
done:
TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain);
FREE(nfsd, M_NFSD);
if (--nfsd_thread_count == 0) {
nfsrv_cleanup();
}
- lck_mtx_unlock(nfsd_mutex);
+ lck_mtx_unlock(&nfsd_mutex);
return error;
}
}
LIST_INIT(&slp->ns_tq);
- lck_rw_destroy(&slp->ns_rwlock, nfsrv_slp_rwlock_group);
- lck_mtx_destroy(&slp->ns_wgmutex, nfsrv_slp_mutex_group);
+ lck_rw_destroy(&slp->ns_rwlock, &nfsrv_slp_rwlock_group);
+ lck_mtx_destroy(&slp->ns_wgmutex, &nfsrv_slp_mutex_group);
FREE(slp, M_NFSSVC);
}
void
nfsrv_slpderef(struct nfsrv_sock *slp)
{
- lck_mtx_lock(nfsd_mutex);
+ lck_mtx_lock(&nfsd_mutex);
nfsrv_slpderef_locked(slp);
- lck_mtx_unlock(nfsd_mutex);
+ lck_mtx_unlock(&nfsd_mutex);
}
/*
time_t time_to_wait = nfsrv_sock_idle_timeout;
microuptime(&now);
- lck_mtx_lock(nfsd_mutex);
+ lck_mtx_lock(&nfsd_mutex);
/* Turn off the timer if we're suppose to and get out */
if (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT) {
}
if ((nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) || (nfsrv_sock_idle_timeout == 0)) {
nfsrv_idlesock_timer_on = 0;
- lck_mtx_unlock(nfsd_mutex);
+ lck_mtx_unlock(&nfsd_mutex);
return;
}
nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
/* Remember when the next timer will fire for nfssvc_addsock. */
nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
- lck_mtx_unlock(nfsd_mutex);
+ lck_mtx_unlock(&nfsd_mutex);
}
/*
/*
* Flush pending file write fsevents
*/
- lck_mtx_lock(nfsrv_fmod_mutex);
+ lck_mtx_lock(&nfsrv_fmod_mutex);
for (i = 0; i < NFSRVFMODHASHSZ; i++) {
for (fp = LIST_FIRST(&nfsrv_fmod_hashtbl[i]); fp; fp = nfp) {
/*
}
}
nfsrv_fmod_pending = 0;
- lck_mtx_unlock(nfsrv_fmod_mutex);
+ lck_mtx_unlock(&nfsrv_fmod_mutex);
#endif
nfsrv_uc_cleanup(); /* Stop nfs socket up-call threads */
TAILQ_HEAD(nfsrv_uc_q, nfsrv_uc_arg);
static struct nfsrv_uc_queue {
- lck_mtx_t *ucq_lock;
+ lck_mtx_t ucq_lock;
struct nfsrv_uc_q ucq_queue[1];
thread_t ucq_thd;
uint32_t ucq_flags;
} nfsrv_uc_queue_tbl[NFS_UC_HASH_SZ];
#define NFS_UC_QUEUE_SLEEPING 0x0001
-static lck_grp_t *nfsrv_uc_group;
-static lck_mtx_t *nfsrv_uc_shutdown_lock;
+static LCK_GRP_DECLARE(nfsrv_uc_group, "nfs_upcall_locks");
+static LCK_MTX_DECLARE(nfsrv_uc_shutdown_lock, &nfsrv_uc_group);
static volatile int nfsrv_uc_shutdown = 0;
static int32_t nfsrv_uc_thread_count;
DPRINT("nfsrv_uc_thread %d started\n", qi);
while (!nfsrv_uc_shutdown) {
- lck_mtx_lock(myqueue->ucq_lock);
+ lck_mtx_lock(&myqueue->ucq_lock);
while (!nfsrv_uc_shutdown && TAILQ_EMPTY(myqueue->ucq_queue)) {
myqueue->ucq_flags |= NFS_UC_QUEUE_SLEEPING;
- error = msleep(myqueue, myqueue->ucq_lock, PSOCK, "nfsd_upcall_handler", NULL);
+ error = msleep(myqueue, &myqueue->ucq_lock, PSOCK, "nfsd_upcall_handler", NULL);
myqueue->ucq_flags &= ~NFS_UC_QUEUE_SLEEPING;
if (error) {
printf("nfsrv_uc_thread received error %d\n", error);
}
}
if (nfsrv_uc_shutdown) {
- lck_mtx_unlock(myqueue->ucq_lock);
+ lck_mtx_unlock(&myqueue->ucq_lock);
break;
}
ep->nua_flags &= ~NFS_UC_QUEUED;
- lck_mtx_unlock(myqueue->ucq_lock);
+ lck_mtx_unlock(&myqueue->ucq_lock);
#ifdef NFS_UC_Q_DEBUG
OSDecrementAtomic(&nfsrv_uc_queue_count);
nfsrv_rcv(ep->nua_so, (void *)ep->nua_slp, ep->nua_waitflag);
}
- lck_mtx_lock(nfsrv_uc_shutdown_lock);
+ lck_mtx_lock(&nfsrv_uc_shutdown_lock);
nfsrv_uc_thread_count--;
wakeup(&nfsrv_uc_thread_count);
- lck_mtx_unlock(nfsrv_uc_shutdown_lock);
+ lck_mtx_unlock(&nfsrv_uc_shutdown_lock);
thread_terminate(current_thread());
}
return;
}
/* If we're queued we might race with nfsrv_uc_thread */
- lck_mtx_lock(myqueue->ucq_lock);
+ lck_mtx_lock(&myqueue->ucq_lock);
if (ap->nua_flags & NFS_UC_QUEUED) {
printf("nfsrv_uc_dequeue remove %p\n", ap);
TAILQ_REMOVE(myqueue->ucq_queue, ap, nua_svcq);
}
FREE(slp->ns_ua, M_TEMP);
slp->ns_ua = NULL;
- lck_mtx_unlock(myqueue->ucq_lock);
+ lck_mtx_unlock(&myqueue->ucq_lock);
}
/*
void
nfsrv_uc_init(void)
{
- int i;
-
- nfsrv_uc_group = lck_grp_alloc_init("nfs_upcall_locks", LCK_GRP_ATTR_NULL);
- for (i = 0; i < NFS_UC_HASH_SZ; i++) {
+ for (int i = 0; i < NFS_UC_HASH_SZ; i++) {
TAILQ_INIT(nfsrv_uc_queue_tbl[i].ucq_queue);
- nfsrv_uc_queue_tbl[i].ucq_lock = lck_mtx_alloc_init(nfsrv_uc_group, LCK_ATTR_NULL);
+ lck_mtx_init(&nfsrv_uc_queue_tbl[i].ucq_lock, &nfsrv_uc_group, LCK_ATTR_NULL);
nfsrv_uc_queue_tbl[i].ucq_thd = THREAD_NULL;
nfsrv_uc_queue_tbl[i].ucq_flags = 0;
}
- nfsrv_uc_shutdown_lock = lck_mtx_alloc_init(nfsrv_uc_group, LCK_ATTR_NULL);
}
/*
DPRINT("nfsrv_uc_start\n");
/* Wait until previous shutdown finishes */
- lck_mtx_lock(nfsrv_uc_shutdown_lock);
+ lck_mtx_lock(&nfsrv_uc_shutdown_lock);
while (nfsrv_uc_shutdown || nfsrv_uc_thread_count > 0) {
- msleep(&nfsrv_uc_thread_count, nfsrv_uc_shutdown_lock, PSOCK, "nfsd_upcall_shutdown_wait", NULL);
+ msleep(&nfsrv_uc_thread_count, &nfsrv_uc_shutdown_lock, PSOCK, "nfsd_upcall_shutdown_wait", NULL);
}
/* Start up-call threads */
nfsrv_uc_queue_count = 0ULL;
nfsrv_uc_queue_max_seen = 0ULL;
#endif
- lck_mtx_unlock(nfsrv_uc_shutdown_lock);
+ lck_mtx_unlock(&nfsrv_uc_shutdown_lock);
}
/*
/* Signal up-call threads to stop */
nfsrv_uc_shutdown = 1;
for (i = 0; i < thread_count; i++) {
- lck_mtx_lock(nfsrv_uc_queue_tbl[i].ucq_lock);
+ lck_mtx_lock(&nfsrv_uc_queue_tbl[i].ucq_lock);
wakeup(&nfsrv_uc_queue_tbl[i]);
- lck_mtx_unlock(nfsrv_uc_queue_tbl[i].ucq_lock);
+ lck_mtx_unlock(&nfsrv_uc_queue_tbl[i].ucq_lock);
}
/* Wait until they are done shutting down */
- lck_mtx_lock(nfsrv_uc_shutdown_lock);
+ lck_mtx_lock(&nfsrv_uc_shutdown_lock);
while (nfsrv_uc_thread_count > 0) {
- msleep(&nfsrv_uc_thread_count, nfsrv_uc_shutdown_lock, PSOCK, "nfsd_upcall_shutdown_stop", NULL);
+ msleep(&nfsrv_uc_thread_count, &nfsrv_uc_shutdown_lock, PSOCK, "nfsd_upcall_shutdown_stop", NULL);
}
/* Deallocate old threads */
/* Enable restarting */
nfsrv_uc_shutdown = 0;
- lck_mtx_unlock(nfsrv_uc_shutdown_lock);
+ lck_mtx_unlock(&nfsrv_uc_shutdown_lock);
}
/*
for (i = 0; i < NFS_UC_HASH_SZ; i++) {
struct nfsrv_uc_queue *queue = &nfsrv_uc_queue_tbl[i];
- lck_mtx_lock(queue->ucq_lock);
+ lck_mtx_lock(&queue->ucq_lock);
while (!TAILQ_EMPTY(queue->ucq_queue)) {
struct nfsrv_uc_arg *ep = TAILQ_FIRST(queue->ucq_queue);
TAILQ_REMOVE(queue->ucq_queue, ep, nua_svcq);
ep->nua_flags &= ~NFS_UC_QUEUED;
}
- lck_mtx_unlock(queue->ucq_lock);
+ lck_mtx_unlock(&queue->ucq_lock);
}
nfsrv_uc_stop();
int qi = uap->nua_qi;
struct nfsrv_uc_queue *myqueue = &nfsrv_uc_queue_tbl[qi];
- lck_mtx_lock(myqueue->ucq_lock);
+ lck_mtx_lock(&myqueue->ucq_lock);
DPRINT("nfsrv_uc_proxy called for %p (%p)\n", uap, uap->nua_slp);
DPRINT("\tUp-call queued on %d for wakeup of %p\n", qi, myqueue);
if (uap == NULL || uap->nua_flags & NFS_UC_QUEUED) {
- lck_mtx_unlock(myqueue->ucq_lock);
+ lck_mtx_unlock(&myqueue->ucq_lock);
return; /* Already queued or freed */
}
}
}
#endif
- lck_mtx_unlock(myqueue->ucq_lock);
+ lck_mtx_unlock(&myqueue->ucq_lock);
}
sizeof(struct nfsmount), ZC_ZFREE_CLEARMEM);
int nfs_ticks;
-static lck_grp_t *nfs_global_grp, *nfs_mount_grp;
-lck_mtx_t *nfs_global_mutex;
+static LCK_GRP_DECLARE(nfs_global_grp, "nfs_global");
+static LCK_GRP_DECLARE(nfs_mount_grp, "nfs_mount");
+LCK_MTX_DECLARE(nfs_global_mutex, &nfs_global_grp);
uint32_t nfs_fs_attr_bitmap[NFS_ATTR_BITMAP_LEN];
uint32_t nfs_object_attr_bitmap[NFS_ATTR_BITMAP_LEN];
uint32_t nfs_getattr_bitmap[NFS_ATTR_BITMAP_LEN];
/* NFS requests */
struct nfs_reqqhead nfs_reqq;
-lck_grp_t *nfs_request_grp;
-lck_mtx_t *nfs_request_mutex;
+LCK_GRP_DECLARE(nfs_request_grp, "nfs_request");
+LCK_MTX_DECLARE(nfs_request_mutex, &nfs_request_grp);
thread_call_t nfs_request_timer_call;
int nfs_request_timer_on;
u_int64_t nfs_xid = 0;
thread_call_t nfs_buf_timer_call;
/* NFSv4 */
-lck_grp_t *nfs_open_grp;
+LCK_GRP_DECLARE(nfs_open_grp, "nfs_open");
uint32_t nfs_open_owner_seqnum = 0;
uint32_t nfs_lock_owner_seqnum = 0;
thread_call_t nfs4_callback_timer_call;
char nfs4_default_domain[MAXPATHLEN];
/* nfsiod */
-lck_grp_t *nfsiod_lck_grp;
-lck_mtx_t *nfsiod_mutex;
+static LCK_GRP_DECLARE(nfsiod_lck_grp, "nfsiod");
+LCK_MTX_DECLARE(nfsiod_mutex, &nfsiod_lck_grp);
struct nfsiodlist nfsiodfree, nfsiodwork;
struct nfsiodmountlist nfsiodmounts;
int nfsiod_thread_count = 0;
TAILQ_INIT(&nfsiodfree);
TAILQ_INIT(&nfsiodwork);
TAILQ_INIT(&nfsiodmounts);
- nfsiod_lck_grp = lck_grp_alloc_init("nfsiod", LCK_GRP_ATTR_NULL);
- nfsiod_mutex = lck_mtx_alloc_init(nfsiod_lck_grp, LCK_ATTR_NULL);
-
- /* init lock groups, etc. */
- nfs_mount_grp = lck_grp_alloc_init("nfs_mount", LCK_GRP_ATTR_NULL);
- nfs_open_grp = lck_grp_alloc_init("nfs_open", LCK_GRP_ATTR_NULL);
- nfs_global_grp = lck_grp_alloc_init("nfs_global", LCK_GRP_ATTR_NULL);
-
- nfs_global_mutex = lck_mtx_alloc_init(nfs_global_grp, LCK_ATTR_NULL);
-
- /* init request list mutex */
- nfs_request_grp = lck_grp_alloc_init("nfs_request", LCK_GRP_ATTR_NULL);
- nfs_request_mutex = lck_mtx_alloc_init(nfs_request_grp, LCK_ATTR_NULL);
/* initialize NFS request list */
TAILQ_INIT(&nfs_reqq);
nfs_nbinit(); /* Init the nfsbuf table */
- nfs_nhinit(); /* Init the nfsnode table */
- nfs_lockinit(); /* Init the nfs lock state */
#if CONFIG_NFS_GSS
nfs_gss_init(); /* Init RPCSEC_GSS security */
#endif
/* convert address to universal address string */
if (ss.ss_family == AF_INET) {
- sinaddr = &((struct sockaddr_in*)&ss)->sin_addr;
+ if (ss.ss_len != sizeof(struct sockaddr_in)) {
+ error = EINVAL;
+ } else {
+ sinaddr = &((struct sockaddr_in*)&ss)->sin_addr;
+ }
} else if (ss.ss_family == AF_INET6) {
- sinaddr = &((struct sockaddr_in6*)&ss)->sin6_addr;
+ if (ss.ss_len != sizeof(struct sockaddr_in6)) {
+ error = EINVAL;
+ } else {
+ sinaddr = &((struct sockaddr_in6*)&ss)->sin6_addr;
+ }
} else {
sinaddr = NULL;
}
+ nfsmout_if(error);
+
if (!sinaddr || (inet_ntop(ss.ss_family, sinaddr, uaddr, sizeof(uaddr)) != uaddr)) {
error = EINVAL;
goto nfsmout;
*npp = NULL;
fh.fh_len = dirfh.fh_len = 0;
+ lck_mtx_init(&nmp->nm_timer_lock, &nfs_mount_grp, LCK_ATTR_NULL);
TAILQ_INIT(&nmp->nm_open_owners);
TAILQ_INIT(&nmp->nm_delegations);
TAILQ_INIT(&nmp->nm_dreturnq);
}
/* set up lease renew timer */
- nmp->nm_renew_timer = thread_call_allocate(nfs4_renew_timer, nmp);
+ nmp->nm_renew_timer = thread_call_allocate_with_options(nfs4_renew_timer, nmp, THREAD_CALL_PRIORITY_HIGH, THREAD_CALL_OPTIONS_ONCE);
interval = nmp->nm_fsattr.nfsa_lease / 2;
if (interval < 1) {
interval = 1;
} else {
/* allocate an NFS mount structure for this mount */
nmp = zalloc_flags(nfsmnt_zone, Z_WAITOK | Z_ZERO);
- lck_mtx_init(&nmp->nm_lock, nfs_mount_grp, LCK_ATTR_NULL);
+ lck_mtx_init(&nmp->nm_lock, &nfs_mount_grp, LCK_ATTR_NULL);
TAILQ_INIT(&nmp->nm_resendq);
TAILQ_INIT(&nmp->nm_iodq);
TAILQ_INIT(&nmp->nm_gsscl);
vfs_unmountbyfsid(&hinfo.fsid, 0, vfs_context_kernel());
}
- lck_mtx_lock(nfs_global_mutex);
+ lck_mtx_lock(&nfs_global_mutex);
if (!hinfo.mountcount) {
/* no more ephemeral mounts - don't need timer */
nfs_ephemeral_mount_harvester_on = 0;
thread_call_enter_delayed(nfs_ephemeral_mount_harvester_timer, deadline);
nfs_ephemeral_mount_harvester_on = 1;
}
- lck_mtx_unlock(nfs_global_mutex);
+ lck_mtx_unlock(&nfs_global_mutex);
/* thread done */
thread_terminate(current_thread());
{
uint64_t deadline;
- lck_mtx_lock(nfs_global_mutex);
+ lck_mtx_lock(&nfs_global_mutex);
if (nfs_ephemeral_mount_harvester_on) {
- lck_mtx_unlock(nfs_global_mutex);
+ lck_mtx_unlock(&nfs_global_mutex);
return;
}
if (nfs_ephemeral_mount_harvester_timer == NULL) {
clock_interval_to_deadline(NFS_EPHEMERAL_MOUNT_HARVEST_INTERVAL, NSEC_PER_SEC, &deadline);
thread_call_enter_delayed(nfs_ephemeral_mount_harvester_timer, deadline);
nfs_ephemeral_mount_harvester_on = 1;
- lck_mtx_unlock(nfs_global_mutex);
+ lck_mtx_unlock(&nfs_global_mutex);
}
#endif
int error, port = 0;
if (nmp->nm_lockmode == NFS_LOCK_MODE_ENABLED) {
- bcopy(sa, &ss, sa->sa_len);
+ if (sa->sa_len > sizeof(ss)) {
+ return EINVAL;
+ }
+ bcopy(sa, &ss, MIN(sa->sa_len, sizeof(ss)));
error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss, NULL, RPCPROG_STAT, RPCMNT_VER1, NM_OMFLAG(nmp, MNTUDP) ? SOCK_DGRAM : sotype, timeo);
if (!error) {
if (ss.ss_family == AF_INET) {
/* cancel any renew timer */
if ((nmp->nm_vers >= NFS_VER4) && nmp->nm_renew_timer) {
+ lck_mtx_lock(&nmp->nm_timer_lock);
thread_call_cancel(nmp->nm_renew_timer);
thread_call_free(nmp->nm_renew_timer);
nmp->nm_renew_timer = NULL;
+ lck_mtx_unlock(&nmp->nm_timer_lock);
}
+
#endif
lck_mtx_unlock(&nmp->nm_lock);
#if CONFIG_NFS4
if ((nmp->nm_vers >= NFS_VER4) && nmp->nm_longid) {
/* remove/deallocate the client ID data */
- lck_mtx_lock(nfs_global_mutex);
+ lck_mtx_lock(&nfs_global_mutex);
TAILQ_REMOVE(&nfsclientids, nmp->nm_longid, nci_link);
if (nmp->nm_longid->nci_id) {
FREE(nmp->nm_longid->nci_id, M_TEMP);
}
FREE(nmp->nm_longid, M_TEMP);
nmp->nm_longid = NULL;
- lck_mtx_unlock(nfs_global_mutex);
+ lck_mtx_unlock(&nfs_global_mutex);
}
#endif
/*
* and removed from the resend queue.
*/
TAILQ_INIT(&resendq);
- lck_mtx_lock(nfs_request_mutex);
+ lck_mtx_lock(&nfs_request_mutex);
TAILQ_FOREACH(req, &nfs_reqq, r_chain) {
if (req->r_nmp == nmp) {
lck_mtx_lock(&req->r_mtx);
lck_mtx_unlock(&req->r_mtx);
}
}
- lck_mtx_unlock(nfs_request_mutex);
+ lck_mtx_unlock(&nfs_request_mutex);
/* Since we've drop the request mutex we can now safely unreference the request */
TAILQ_FOREACH_SAFE(req, &resendq, r_rchain, treq) {
* local iod queue for processing.
*/
TAILQ_INIT(&iodq);
- lck_mtx_lock(nfs_request_mutex);
- lck_mtx_lock(nfsiod_mutex);
+ lck_mtx_lock(&nfs_request_mutex);
+ lck_mtx_lock(&nfsiod_mutex);
TAILQ_FOREACH(req, &nfs_reqq, r_chain) {
if (req->r_nmp == nmp) {
lck_mtx_lock(&req->r_mtx);
TAILQ_REMOVE(&nfsiodmounts, nmp, nm_iodlink);
}
TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain);
- lck_mtx_unlock(nfsiod_mutex);
- lck_mtx_unlock(nfs_request_mutex);
+ lck_mtx_unlock(&nfsiod_mutex);
+ lck_mtx_unlock(&nfs_request_mutex);
TAILQ_FOREACH_SAFE(req, &iodq, r_achain, treq) {
TAILQ_REMOVE(&iodq, req, r_achain);
lck_mtx_unlock(&nmp->nm_lock);
- lck_mtx_destroy(&nmp->nm_lock, nfs_mount_grp);
+ lck_mtx_destroy(&nmp->nm_lock, &nfs_mount_grp);
if (nmp->nm_fh) {
NFS_ZFREE(nfs_fhandle_zone, nmp->nm_fh);
}
+#if CONFIG_NFS4
+ if (nmp->nm_vers >= NFS_VER4) {
+ lck_mtx_destroy(&nmp->nm_timer_lock, &nfs_mount_grp);
+ }
+#endif
+
NFS_ZFREE(nfsmnt_zone, nmp);
}
* how long the threads have been waiting.
*/
- lck_mtx_lock(nfs_request_mutex);
+ lck_mtx_lock(&nfs_request_mutex);
lck_mtx_lock(&nmp->nm_lock);
/*
if (req->oldptr == USER_ADDR_NULL) { // Caller is querying buffer size
lck_mtx_unlock(&nmp->nm_lock);
- lck_mtx_unlock(nfs_request_mutex);
+ lck_mtx_unlock(&nfs_request_mutex);
return SYSCTL_OUT(req, NULL, totlen);
}
if (req->oldlen < totlen) { // Check if caller's buffer is big enough
lck_mtx_unlock(&nmp->nm_lock);
- lck_mtx_unlock(nfs_request_mutex);
+ lck_mtx_unlock(&nfs_request_mutex);
return ERANGE;
}
MALLOC(nsp, struct netfs_status *, totlen, M_TEMP, M_WAITOK | M_ZERO);
if (nsp == NULL) {
lck_mtx_unlock(&nmp->nm_lock);
- lck_mtx_unlock(nfs_request_mutex);
+ lck_mtx_unlock(&nfs_request_mutex);
return ENOMEM;
}
timeoutmask = NFSSTA_TIMEO | NFSSTA_LOCKTIMEO | NFSSTA_JUKEBOXTIMEO;
}
lck_mtx_unlock(&nmp->nm_lock);
- lck_mtx_unlock(nfs_request_mutex);
+ lck_mtx_unlock(&nfs_request_mutex);
error = SYSCTL_OUT(req, nsp, totlen);
FREE(nsp, M_TEMP);
}
/* lock the node while we remove the file */
- lck_mtx_lock(nfs_node_hash_mutex);
+ lck_mtx_lock(&nfs_node_hash_mutex);
while (np->n_hflag & NHLOCKED) {
np->n_hflag |= NHLOCKWANT;
- msleep(np, nfs_node_hash_mutex, PINOD, "nfs_remove", NULL);
+ msleep(np, &nfs_node_hash_mutex, PINOD, "nfs_remove", NULL);
}
np->n_hflag |= NHLOCKED;
- lck_mtx_unlock(nfs_node_hash_mutex);
+ lck_mtx_unlock(&nfs_node_hash_mutex);
if (!namedattrs) {
nfs_dulookup_init(dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
if (!inuse || (np->n_sillyrename && (nvattr->nva_nlink > 1))) {
if (!inuse && !flushed) { /* flush all the buffers first */
/* unlock the node */
- lck_mtx_lock(nfs_node_hash_mutex);
+ lck_mtx_lock(&nfs_node_hash_mutex);
np->n_hflag &= ~NHLOCKED;
if (np->n_hflag & NHLOCKWANT) {
np->n_hflag &= ~NHLOCKWANT;
wakeup(np);
}
- lck_mtx_unlock(nfs_node_hash_mutex);
+ lck_mtx_unlock(&nfs_node_hash_mutex);
nfs_node_clear_busy2(dnp, np);
error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
FSDBG(260, np, np->n_size, np->n_vattr.nva_size, 0xf00d0011);
* again if another object gets created with the same filehandle
* before this vnode gets reclaimed
*/
- lck_mtx_lock(nfs_node_hash_mutex);
+ lck_mtx_lock(&nfs_node_hash_mutex);
if (np->n_hflag & NHHASHED) {
LIST_REMOVE(np, n_hash);
np->n_hflag &= ~NHHASHED;
FSDBG(266, 0, np, np->n_flag, 0xb1eb1e);
}
- lck_mtx_unlock(nfs_node_hash_mutex);
+ lck_mtx_unlock(&nfs_node_hash_mutex);
/* clear flags now: won't get nfs_vnop_inactive for recycled vnode */
/* clear all flags other than these */
nfs_node_lock_force(np);
}
out:
/* unlock the node */
- lck_mtx_lock(nfs_node_hash_mutex);
+ lck_mtx_lock(&nfs_node_hash_mutex);
np->n_hflag &= ~NHLOCKED;
if (np->n_hflag & NHLOCKWANT) {
np->n_hflag &= ~NHLOCKWANT;
wakeup(np);
}
- lck_mtx_unlock(nfs_node_hash_mutex);
+ lck_mtx_unlock(&nfs_node_hash_mutex);
nfs_node_clear_busy2(dnp, np);
if (setsize) {
ubc_setsize(vp, 0);
if (tvp && (tvp != fvp)) {
/* lock the node while we rename over the existing file */
- lck_mtx_lock(nfs_node_hash_mutex);
+ lck_mtx_lock(&nfs_node_hash_mutex);
while (tnp->n_hflag & NHLOCKED) {
tnp->n_hflag |= NHLOCKWANT;
- msleep(tnp, nfs_node_hash_mutex, PINOD, "nfs_rename", NULL);
+ msleep(tnp, &nfs_node_hash_mutex, PINOD, "nfs_rename", NULL);
}
tnp->n_hflag |= NHLOCKED;
- lck_mtx_unlock(nfs_node_hash_mutex);
+ lck_mtx_unlock(&nfs_node_hash_mutex);
locked = 1;
}
tvprecycle = (!error && !vnode_isinuse(tvp, 0) &&
(nfs_getattrcache(tnp, nvattr, 0) || (nvattr->nva_nlink == 1)));
nfs_node_unlock(tnp);
- lck_mtx_lock(nfs_node_hash_mutex);
+ lck_mtx_lock(&nfs_node_hash_mutex);
if (tvprecycle && (tnp->n_hflag & NHHASHED)) {
/*
* remove nfsnode from hash now so we can't accidentally find it
tnp->n_hflag &= ~NHHASHED;
FSDBG(266, 0, tnp, tnp->n_flag, 0xb1eb1e);
}
- lck_mtx_unlock(nfs_node_hash_mutex);
+ lck_mtx_unlock(&nfs_node_hash_mutex);
}
/* purge the old name cache entries and enter the new one */
nfs_getattr(tdnp, NULL, ctx, NGA_CACHED);
if (locked) {
/* unlock node */
- lck_mtx_lock(nfs_node_hash_mutex);
+ lck_mtx_lock(&nfs_node_hash_mutex);
tnp->n_hflag &= ~NHLOCKED;
if (tnp->n_hflag & NHLOCKWANT) {
tnp->n_hflag &= ~NHLOCKWANT;
wakeup(tnp);
}
- lck_mtx_unlock(nfs_node_hash_mutex);
+ lck_mtx_unlock(&nfs_node_hash_mutex);
}
nfs_node_clear_busy4(fdnp, fnp, tdnp, tnp);
FREE(nvattr, M_TEMP);
* again if another object gets created with the same filehandle
* before this vnode gets reclaimed
*/
- lck_mtx_lock(nfs_node_hash_mutex);
+ lck_mtx_lock(&nfs_node_hash_mutex);
if (np->n_hflag & NHHASHED) {
LIST_REMOVE(np, n_hash);
np->n_hflag &= ~NHHASHED;
FSDBG(266, 0, np, np->n_flag, 0xb1eb1e);
}
- lck_mtx_unlock(nfs_node_hash_mutex);
+ lck_mtx_unlock(&nfs_node_hash_mutex);
}
NFS_ZFREE(nfs_req_zone, req);
FREE(dul, M_TEMP);
* Invalidate cached directory information, except for the actual directory
* blocks (which are invalidated separately).
*/
-void
-nfs_invaldir(nfsnode_t dnp)
+static void
+nfs_invaldir_cookies(nfsnode_t dnp)
{
if (vnode_vtype(NFSTOV(dnp)) != VDIR) {
return;
memset(dnp->n_cookiecache->next, -1, NFSNUMCOOKIES);
}
+void
+nfs_invaldir(nfsnode_t dnp)
+{
+
+ nfs_invaldir_cookies(dnp);
+}
+
/*
* calculate how much space is available for additional directory entries.
*/
dpptc = NULL;
found = 0;
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
/*
* Scan the list of buffers, keeping them in order.
* Note that itercomplete inserts each of the remaining buffers
}
nfs_buf_itercomplete(dnp, &blist, NBI_CLEAN);
}
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
if (found) {
OSAddAtomic64(1, &nfsstats.direofcache_hits);
return 0;
lbn = nextlbn;
}
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
if (found) {
dnp->n_lastdbl = lbn;
goto done;
}
done:
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
if (!error && found && !purge) {
error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh->fh_data,
nmrsize = nmp->nm_rsize;
bigcookies = nmp->nm_state & NFSSTA_BIGCOOKIES;
fh = zalloc(nfs_fhandle_zone);
-noplus:
+resend:
rdirplus = ((nfsvers > NFS_VER2) && NMFLAG(nmp, RDIRPLUS)) ? 1 : 0;
if ((lockerror = nfs_node_lock(dnp))) {
lck_mtx_lock(&nmp->nm_lock);
NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_RDIRPLUS);
lck_mtx_unlock(&nmp->nm_lock);
- goto noplus;
+ nfsm_chain_cleanup(&nmreq);
+ nfsm_chain_cleanup(&nmrep);
+ goto resend;
}
nfsmout_if(error);
if (!auth_is_kerberized(mp->nm_auth)) {
return ENOTSUP;
}
- error = nfs_gss_clnt_ctx_remove(mp, vfs_context_ucred(ctx));
+ if ((error = nfs_gss_clnt_ctx_remove(mp, vfs_context_ucred(ctx))) == ENOENT) {
+ error = 0;
+ }
break;
case NFS_IOC_SET_CRED:
case NFS_IOC_SET_CRED64:
xsize = f_offset + size - off;
}
lbn = (daddr64_t)(off / biosize);
- lck_mtx_lock(nfs_buf_mutex);
+ lck_mtx_lock(&nfs_buf_mutex);
if ((bp = nfs_buf_incore(np, lbn))) {
FSDBG(323, off, bp, bp->nb_lflags, bp->nb_flags);
if (nfs_buf_acquire(bp, NBAC_NOWAIT, 0, 0)) {
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
nfs_data_unlock_noupdate(np);
/* no panic. just tell vm we are busy */
if (!nofreeupl) {
nfsbufdelwricnt++;
nfs_buf_drop(bp);
nfs_buf_delwri_push(1);
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
nfs_data_unlock_noupdate(np);
if (!nofreeupl) {
ubc_upl_abort_range(pl, pl_offset, size, 0);
FSDBG(323, bp, bp->nb_dirtyoff, bp->nb_dirtyend, 0xd00dee00);
/* we're leaving this block dirty */
nfs_buf_drop(bp);
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
continue;
}
}
nfs_buf_remfree(bp);
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
SET(bp->nb_flags, NB_INVAL);
nfs_node_lock_force(np);
if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
nfs_node_unlock(np);
nfs_buf_release(bp, 1);
} else {
- lck_mtx_unlock(nfs_buf_mutex);
+ lck_mtx_unlock(&nfs_buf_mutex);
}
}
uint64_t mounttime; /* used as client ID verifier */
uint64_t clientid; /* client ID, short form */
thread_call_t renew_timer; /* RENEW timer call */
+ lck_mtx_t timer_lock; /* RENEW timer lock */
nfs_fsid fsid; /* NFS file system id */
TAILQ_HEAD(, nfsnode) delegations; /* list of nodes with delegations */
TAILQ_HEAD(, nfsnode) dreturnq; /* list of nodes with delegations to return */
#define nm_mounttime nm_un.v4.mounttime
#define nm_fsid nm_un.v4.fsid
#define nm_renew_timer nm_un.v4.renew_timer
+#define nm_timer_lock nm_un.v4.timer_lock
#define nm_cbid nm_un.v4.cbid
#define nm_cblink nm_un.v4.cblink
#define nm_cbrefs nm_un.v4.cbrefs
LIST_HEAD(nfsbuflists, nfsbuf);
TAILQ_HEAD(nfsbuffreehead, nfsbuf);
-extern lck_mtx_t *nfs_buf_mutex;
+extern lck_mtx_t nfs_buf_mutex;
extern int nfsbufcnt, nfsbufmin, nfsbufmax, nfsbufmetacnt, nfsbufmetamax;
extern int nfsbuffreecnt, nfsbuffreemetacnt, nfsbufdelwricnt, nfsneedbuffer;
extern int nfs_nbdwrite;
} while (0)
-extern lck_grp_t *nfs_open_grp;
+extern lck_grp_t nfs_open_grp;
extern uint32_t nfs_open_owner_seqnum, nfs_lock_owner_seqnum;
/*
#define NFSTOV(np) ((np)->n_vnode)
/* nfsnode hash table mutex */
-extern lck_mtx_t *nfs_node_hash_mutex;
+extern lck_mtx_t nfs_node_hash_mutex;
/*
* printf-like helper macro that also outputs node name.
TAILQ_HEAD(nfsiodmountlist, nfsmount);
extern struct nfsiodlist nfsiodfree, nfsiodwork;
extern struct nfsiodmountlist nfsiodmounts;
-extern lck_mtx_t *nfsiod_mutex;
+extern lck_mtx_t nfsiod_mutex;
#if defined(KERNEL)
#define RC_INETADDR 0x20
#define RC_NAM 0x40
-extern lck_grp_t *nfsrv_reqcache_lck_grp;
-extern lck_mtx_t *nfsrv_reqcache_mutex;
-
#endif /* __APPLE_API_PRIVATE */
#endif /* _NFS_NFSRVCACHE_H_ */
.ipc_port_copyout_send = ipc_port_copyout_send,
.task_get_ipcspace = get_task_ipcspace,
.vm_map_page_info = vm_map_page_info,
+ .ipc_port_copyout_send_pinned = ipc_port_copyout_send_pinned,
.thread_set_wq_state32 = thread_set_wq_state32,
#if !defined(__arm__)
.thread_set_wq_state64 = thread_set_wq_state64,
.semaphore_signal_internal_trap = semaphore_signal_internal_trap,
.current_map = _current_map,
.thread_create = thread_create,
+ /* should be removed once rdar://70892168 lands */
+ .thread_create_pinned = thread_create_pinned,
+ .thread_create_immovable = thread_create_immovable,
+ .thread_terminate_pinned = thread_terminate_pinned,
.thread_resume = thread_resume,
.kevent_workq_internal = kevent_workq_internal,
.convert_thread_to_port = convert_thread_to_port,
+ .convert_thread_to_port_pinned = convert_thread_to_port_pinned,
.proc_get_stack_addr_hint = proc_get_stack_addr_hint,
.proc_set_stack_addr_hint = proc_set_stack_addr_hint,
priority_queue_init(&wq->wq_constrained_queue);
priority_queue_init(&wq->wq_special_queue);
- wq->wq_delayed_call = thread_call_allocate_with_options(
- workq_add_new_threads_call, p, THREAD_CALL_PRIORITY_KERNEL,
+ /* We are only using the delayed thread call for the constrained pool
+ * which can't have work at >= UI QoS and so we can be fine with a
+ * UI QoS thread call.
+ */
+ wq->wq_delayed_call = thread_call_allocate_with_qos(
+ workq_add_new_threads_call, p, THREAD_QOS_USER_INTERACTIVE,
THREAD_CALL_OPTIONS_ONCE);
wq->wq_immediate_call = thread_call_allocate_with_options(
workq_add_new_threads_call, p, THREAD_CALL_PRIORITY_KERNEL,
/*
* Compute a metric for many how many threads are active. We find the
- * highest priority request outstanding and then add up the number of
- * active threads in that and all higher-priority buckets. We'll also add
- * any "busy" threads which are not active but blocked recently enough that
- * we can't be sure they've gone idle yet. We'll then compare this metric
- * to our max concurrency to decide whether to add a new thread.
+ * highest priority request outstanding and then add up the number of active
+ * threads in that and all higher-priority buckets. We'll also add any
+ * "busy" threads which are not currently active but blocked recently enough
+ * that we can't be sure that they won't be unblocked soon and start
+ * being active again.
+ *
+ * We'll then compare this metric to our max concurrency to decide whether
+ * to add a new thread.
*/
uint32_t busycount, thactive_count;
thactive_count, busycount, 0);
}
- if (busycount && may_start_timer) {
+ if (may_start_timer) {
/*
* If this is called from the add timer, we won't have another timer
* fire when the thread exits the "busy" state, so rearm the timer.
workq_thread_reset_pri(wq, uth, req, /*unpark*/ true);
- thread_unfreeze_base_pri(uth->uu_thread);
-#if 0 // <rdar://problem/55259863> to turn this back on
if (__improbable(thread_unfreeze_base_pri(uth->uu_thread) && !is_creator)) {
if (req_ts) {
workq_perform_turnstile_operation_locked(wq, ^{
WQ_TRACE_WQ(TRACE_wq_select_threadreq | DBG_FUNC_NONE, wq, 3, 0, 0, 0);
goto park_thawed;
}
-#endif
/*
* We passed all checks, dequeue the request, bind to it, and set it up
park:
thread_unfreeze_base_pri(uth->uu_thread);
-#if 0 // <rdar://problem/55259863>
park_thawed:
-#endif
workq_park_and_unlock(p, wq, uth, setup_flags);
}
}
if (uth->uu_workq_thport == MACH_PORT_NULL) {
- /* convert_thread_to_port() consumes a reference */
+ /* convert_thread_to_port_pinned() consumes a reference */
thread_reference(th);
- ipc_port_t port = convert_thread_to_port(th);
- uth->uu_workq_thport = ipc_port_copyout_send(port, get_task_ipcspace(p->task));
+ /* Convert to immovable/pinned thread port, but port is not pinned yet */
+ ipc_port_t port = convert_thread_to_port_pinned(th);
+ /* Atomically, pin and copy out the port */
+ uth->uu_workq_thport = ipc_port_copyout_send_pinned(port, get_task_ipcspace(p->task));
}
/*
if (*vnode_mac_labelp != NULL) {
mac.m_buflen = MAC_AUDIT_LABEL_LEN;
mac.m_string = *vnode_mac_labelp;
- mac_vnode_label_externalize_audit(vp, &mac);
+ if (mac_vnode_label_externalize_audit(vp, &mac)) {
+ return;
+ }
}
}
#endif
}
mac.m_buflen = MAC_AUDIT_LABEL_LEN;
mac.m_string = ar->k_ar.ar_cred_mac_labels;
- mac_cred_label_externalize_audit(p, &mac);
+ if (mac_cred_label_externalize_audit(p, &mac)) {
+ zfree(audit_mac_label_zone, ar->k_ar.ar_cred_mac_labels);
+ return 1;
+ }
/*
* grab space for the reconds.
struct auditinfo_addr * const audit_default_aia_p = &audit_default_se.se_auinfo;
/* Copied from <ipc/ipc_object.h> */
-#define IPC_KMSG_FLAGS_ALLOW_IMMOVABLE_SEND 0x1
+#define IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND 0x1
kern_return_t ipc_object_copyin(ipc_space_t, mach_port_name_t,
mach_msg_type_name_t, ipc_port_t *, mach_port_context_t, mach_msg_guard_flags_t *, uint32_t);
void ipc_port_release_send(ipc_port_t);
if (ipc_object_copyin(get_task_ipcspace(p->task), send,
- MACH_MSG_TYPE_COPY_SEND, &port, 0, NULL, IPC_KMSG_FLAGS_ALLOW_IMMOVABLE_SEND) != KERN_SUCCESS) {
+ MACH_MSG_TYPE_COPY_SEND, &port, 0, NULL, IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND) != KERN_SUCCESS) {
*ret_asid = AU_DEFAUDITSID;
err = EINVAL;
} else {
*/
int bufattr_ioscheduled(bufattr_t bap);
+/*!
+ * @function bufattr_markexpeditedmeta
+ * @abstract Mark a metadata I/O buffer as expedited (i.e. requires a high I/O tier).
+ * @param bap Buffer attributes to mark.
+ * @discussion Marks the buffer so that spec_strategy() will know that it should be expedited
+ */
+void bufattr_markexpeditedmeta(bufattr_t bap);
+
+/*!
+ * @function bufattr_expeditedmeta
+ * @abstract Check if a buffer is marked as expedited metadata I/O.
+ * @param bap Buffer attributes to test.
+ * @return Nonzero if the buffer is marked expedited metadata I/O, 0 otherwise.
+ */
+int bufattr_expeditedmeta(bufattr_t bap);
+
#ifdef KERNEL_PRIVATE
void buf_setfilter(buf_t, void (*)(buf_t, void *), void *, void(**)(buf_t, void *), void **);
/*
* These flags are kept in b_lflags...
- * buf_mtxp must be held before examining/updating
+ * buf_mtx must be held before examining/updating
*/
#define BL_BUSY 0x00000001 /* I/O in progress. */
#define BL_WANTED 0x00000002 /* Process wants this buffer. */
#define BA_STRATEGY_TRACKED_IO 0x00002000 /* tracked by spec_strategy */
#define BA_IO_TIER_UPGRADE 0x00004000 /* effective I/O tier is higher than BA_IO_TIER */
#define BA_IO_SCHEDULED 0x00008000 /* buf is associated with a mount point that is io scheduled */
+#define BA_EXPEDITED_META_IO 0x00010000 /* metadata I/O which needs a high I/O tier */
#define GET_BUFATTR_IO_TIER(bap) ((bap->ba_flags & BA_IO_TIER_MASK) >> BA_IO_TIER_SHIFT)
#define SET_BUFATTR_IO_TIER(bap, tier) \
uint64_t Ticks_per_sec;
} new_commpage_timeofday_data_t;
+/*!
+ * @macro COMM_PAGE_SLOT_TYPE
+ *
+ * @brief
+ * Macro that expands to the proper type for a pointer to a commpage slot,
+ * to be used in a local variable declaration.
+ *
+ * @description
+ * Usage is something like:
+ * <code>
+ * COMM_PAGE_SLOT_TYPE(uint64_t) slot = COMM_PAGE_SLOT(uint64_t, FOO);
+ * </code>
+ *
+ * @param type The scalar base type for the slot.
+ */
+#if __has_feature(address_sanitizer)
+#define COMM_PAGE_SLOT_TYPE(type_t) type_t __attribute__((address_space(1))) volatile *
+#else
+#define COMM_PAGE_SLOT_TYPE(type_t) type_t volatile *
+#endif
+
+/*!
+ * @macro COMM_PAGE_SLOT
+ *
+ * @brief
+ * Macro that expands to the properly typed address for a commpage slot.
+ *
+ * @param type The scalar base type for the slot.
+ * @param name The slot name, without its @c _COMM_PAGE_ prefix.
+ */
+#define COMM_PAGE_SLOT(type_t, name) ((COMM_PAGE_SLOT_TYPE(type_t))_COMM_PAGE_##name)
+
+/*!
+ * @macro COMM_PAGE_READ
+ *
+ * @brief
+ * Performs a single read from the commpage in a way that doesn't trip
+ * address sanitizers.
+ *
+ * @description
+ * Typical use looks like this:
+ * <code>
+ * uint64_t foo_value = COMM_PAGE_READ(uint64_t, FOO);
+ * </code>
+ *
+ * @param type The scalar base type for the slot.
+ * @param name The slot name, without its @c _COMM_PAGE_ prefix.
+ */
+#define COMM_PAGE_READ(type_t, slot) (*(COMM_PAGE_SLOT(type_t, slot)))
+
#endif
#endif
#define CDEVSW_IS_PTS 0x08
struct thread;
-
-typedef struct devsw_lock {
- TAILQ_ENTRY(devsw_lock) dl_list;
- struct thread *dl_thread;
- dev_t dl_dev;
- int dl_mode;
-} *devsw_lock_t;
-
#endif /* BSD_KERNEL_PRIVATE */
*/
__BEGIN_DECLS
#ifdef KERNEL_PRIVATE
-void devsw_init(void);
extern struct cdevsw cdevsw[];
extern int cdevsw_setkqueueok(int, const struct cdevsw*, int);
#endif /* KERNEL_PRIVATE */
/*
* DTrace Implementation Locks
*/
+extern lck_attr_t dtrace_lck_attr;
+extern lck_grp_t dtrace_lck_grp;
extern lck_mtx_t dtrace_procwaitfor_lock;
/*
extern int dtrace_assfail(const char *, const char *, int);
extern int dtrace_attached(void);
extern hrtime_t dtrace_gethrestime(void);
-extern void dtrace_isa_init(void);
extern void dtrace_flush_caches(void);
#include <kern/debug.h> /* panic */
#include <pthread/priority_private.h>
-#ifdef MALLOC_DECLARE
-MALLOC_DECLARE(M_KQUEUE);
-#endif
-
LIST_HEAD(knote_list, knote);
TAILQ_HEAD(kqtailq, knote); /* a list of "queued" events */
#include <uuid/uuid.h>
extern int evh_debug;
-extern lck_grp_t *el_lock_grp;
-extern lck_attr_t *el_lock_attr;
+extern lck_grp_t el_lock_grp;
+extern lck_attr_t el_lock_attr;
extern struct eventhandler_entry_arg eventhandler_entry_dummy_arg;
struct eventhandler_lists_ctxt {
typedef struct eventhandler_entry *eventhandler_tag;
-#define EHL_LOCK_INIT(p) lck_mtx_init(&(p)->el_lock, el_lock_grp, el_lock_attr)
+#define EHL_LOCK_INIT(p) lck_mtx_init(&(p)->el_lock, &el_lock_grp, &el_lock_attr)
#define EHL_LOCK(p) lck_mtx_lock(&(p)->el_lock)
#define EHL_LOCK_SPIN(p) lck_mtx_lock_spin(&(p)->el_lock)
#define EHL_LOCK_CONVERT(p) lck_mtx_convert_spin(&(p)->el_lock)
#define EHL_UNLOCK(p) lck_mtx_unlock(&(p)->el_lock)
#define EHL_LOCK_ASSERT(p, x) LCK_MTX_ASSERT(&(p)->el_lock, x)
-#define EHL_LOCK_DESTROY(p) lck_mtx_destroy(&(p)->el_lock, el_lock_grp)
+#define EHL_LOCK_DESTROY(p) lck_mtx_destroy(&(p)->el_lock, &el_lock_grp)
#define evhlog(x) do { if (evh_debug >= 1) log x; } while (0)
/*
- * Copyright (c) 2006-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2006-2020 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
void imageboot_setup(imageboot_type_t type);
int imageboot_format_is_valid(const char *root_path);
int imageboot_mount_image(const char *root_path, int height, imageboot_type_t type);
-int imageboot_pivot_image(const char *image_path, imageboot_type_t type, const char *mount_path, const char *outgoing_root_path, const bool rooted_dmg);
+int imageboot_pivot_image(const char *image_path, imageboot_type_t type, const char *mount_path,
+ const char *outgoing_root_path, const bool rooted_dmg, const bool skip_signature_check);
int imageboot_read_file(struct kalloc_heap *kheap, const char *path, void **bufp, size_t *bufszp);
int imageboot_read_file_from_offset(struct kalloc_heap *kheap, const char *path, off_t offset, void **bufp, size_t *bufszp);
kern_asl_msg(int level, const char *facility, size_t num_pairs, ...);
extern int escape_str(char *str, size_t len, size_t buflen);
-extern void fpxlog_init(void);
extern void fpxlog(int, uint32_t, uint32_t, uint32_t);
#endif /* !_SYS_KASL_H_ */
/*
* Initialisation.
*/
-extern lck_grp_t *kauth_lck_grp;
#ifdef XNU_KERNEL_PRIVATE
__BEGIN_DECLS
+
+extern lck_grp_t kauth_lck_grp;
+
extern void kauth_init(void);
extern void kauth_cred_init(void);
+/*
+ * If you need accounting for KM_KAUTH consider using
+ * KALLOC_HEAP_DEFINE to define a view.
+ */
+#define KM_KAUTH KHEAP_DEFAULT
#if CONFIG_EXT_RESOLVER
-extern void kauth_identity_init(void);
-extern void kauth_groups_init(void);
-extern void kauth_resolver_init(void);
extern void kauth_resolver_identity_reset(void);
#endif
__END_DECLS
#define DBG_MACH_SCHED_CLUTCH 0xA9 /* Clutch scheduler */
#define DBG_MACH_IO 0xAA /* I/O */
#define DBG_MACH_WORKGROUP 0xAB /* Workgroup subsystem */
+#define DBG_MACH_HV 0xAC /* Hypervisor subsystem */
/* Codes for DBG_MACH_IO */
#define DBC_MACH_IO_MMIO_READ 0x1
#define MACH_TURNSTILE_KERNEL_CHANGE 0x40 /* sched priority change because of turnstile */
#define MACH_SCHED_WI_AUTO_JOIN 0x41 /* work interval auto join events */
#define MACH_SCHED_WI_DEFERRED_FINISH 0x42 /* work interval pending finish events for auto-join thread groups */
+#define MACH_SET_RT_DEADLINE 0x43 /* set thread->realtime.deadline */
+#define MACH_CANCEL_RT_DEADLINE 0x44 /* cancel thread->realtime.deadline */
#define MACH_PSET_AVG_EXEC_TIME 0x50
/* Codes for Clutch/Edge Scheduler (DBG_MACH_SCHED_CLUTCH) */
#define PMAP__UPDATE_CACHING 0x15
#define PMAP__ATTRIBUTE_CLEAR_RANGE 0x16
#define PMAP__CLEAR_USER_TTB 0x17
+#define PMAP__IOMMU_INIT 0x18
+#define PMAP__IOMMU_IOVMALLOC 0x19
+#define PMAP__IOMMU_IOVMFREE 0x1a
+#define PMAP__IOMMU_MAP 0x1b
+#define PMAP__IOMMU_UNMAP 0x1c
+#define PMAP__IOMMU_IOCTL 0x1d
+#define PMAP__IOMMU_GRANT_PAGE 0x1e
/* Codes for clock (DBG_MACH_CLOCK) */
#define MACH_EPOCH_CHANGE 0x0 /* wake epoch change */
#define RMON_LOGWRITES_VIOLATED_K32B 0x025
#define RMON_DISABLE_IO_MONITOR 0x02f
+/* Codes for Hypervisor (DBG_MACH_HV) */
+#define HV_GUEST_ENTER 0x000
+#define HV_GUEST_ERROR 0x001
+
/* **** The Kernel Debug Sub Classes for Network (DBG_NETWORK) **** */
#define DBG_NETIP 1 /* Internet Protocol */
#define DBG_NETARP 2 /* Address Resolution Protocol */
#define DBG_HFS_UPDATE_MINOR 0x40
#define DBG_HFS_UPDATE_SKIPPED 0x80
+/*
+ * Codes for Kernel Debug Sub Class DBG_VFS
+ */
+#define DBG_VFS_IO_COMPRESSION_STATS 0x1000
+
/* The Kernel Debug Sub Classes for BSD */
#define DBG_BSD_PROC 0x01 /* process/signals related */
#define DBG_BSD_MEMSTAT 0x02 /* memorystatus / jetsam operations */
#define MEMORYSTATUS_CMD_INCREASE_JETSAM_TASK_LIMIT 22 /* Used by DYLD to increase the jetsam active and inactive limits, when using roots */
#if PRIVATE
-#define MEMORYSTATUS_CMD_SET_JETSAM_SNAPSHOT_OWNERSHIP 23 /* Used by unit tests in the development kernel only. */
+#define MEMORYSTATUS_CMD_SET_TESTING_PID 23 /* Used by unit tests in the development kernel only. */
#endif /* PRIVATE */
#define MEMORYSTATUS_CMD_GET_PROCESS_IS_FROZEN 24 /* Check if the process is frozen. */
#define MEMORYSTATUS_FLAGS_GRP_SET_PROBABILITY 0x10 /* Set probability of use for a group of processes */
#if PRIVATE
-#define MEMORYSTATUS_FLAGS_SNAPSHOT_TAKE_OWNERSHIP 0x20 /* Only used by xnu unit tests. */
-#define MEMORYSTATUS_FLAGS_SNAPSHOT_DROP_OWNERSHIP 0x40 /* Only used by xnu unit tests. */
+#define MEMORYSTATUS_FLAGS_SET_TESTING_PID 0x20 /* Only used by xnu unit tests. */
+#define MEMORYSTATUS_FLAGS_UNSET_TESTING_PID 0x40 /* Only used by xnu unit tests. */
#endif /* PRIVATE */
#define MEMORYSTATUS_FLAGS_SNAPSHOT_FREEZER 0x80 /* A snapshot buffer containing app kills since last consumption */
extern unsigned int memorystatus_suspended_count;
extern unsigned int memorystatus_thaw_count; /* # of processes that have been thawed in the current interval. */
extern unsigned int memorystatus_refreeze_eligible_count; /* # of processes currently thawed i.e. have state on disk & in-memory */
+extern uint32_t memorystatus_freeze_current_interval; /* Monotonically increasing interval id. */
void memorystatus_freeze_init(void);
extern int memorystatus_freeze_process_sync(proc_t p);
void memorystatus_freeze_init_proc(proc_t p);
errno_t memorystatus_get_process_is_frozen(pid_t pid, int *is_freezable);
+/* Freezer counters collected for telemtry */
+struct memorystatus_freezer_stats_t {
+ /*
+ * # of processes that we've considered freezing.
+ * Used to normalize the error reasons below.
+ */
+ uint64_t mfs_process_considered_count;
+
+ /*
+ * The following counters track how many times we've failed to freeze
+ * a process because of a specific FREEZER_ERROR.
+ */
+ /* EXCESS_SHARED_MEMORY */
+ uint64_t mfs_error_excess_shared_memory_count;
+ /* LOW_PRIVATE_SHARED_RATIO */
+ uint64_t mfs_error_low_private_shared_ratio_count;
+ /* NO_COMPRESSOR_SPACE */
+ uint64_t mfs_error_no_compressor_space_count;
+ /* NO_SWAP_SPACE */
+ uint64_t mfs_error_no_swap_space_count;
+ /* pages < memorystatus_freeze_pages_min */
+ uint64_t mfs_error_below_min_pages_count;
+ /* dasd determined it was unlikely to be relaunched. */
+ uint64_t mfs_error_low_probability_of_use_count;
+ /* transient reasons (like inability to acquire a lock). */
+ uint64_t mfs_error_other_count;
+
+ /*
+ * # of times that we saw memorystatus_available_pages <= memorystatus_freeze_threshold.
+ * Used to normalize skipped_full_count and shared_mb_high_count.
+ */
+ uint64_t mfs_below_threshold_count;
+
+ /* Skipped running the freezer because we were out of slots */
+ uint64_t mfs_skipped_full_count;
+
+ /* Skipped running the freezer because we were over the shared mb limit*/
+ uint64_t mfs_skipped_shared_mb_high_count;
+
+ /*
+ * How many pages have not been sent to swap because they were in a shared object?
+ * This is being used to gather telemtry so we can understand the impact we'd have
+ * on our NAND budget if we did swap out these pages.
+ */
+ uint64_t mfs_shared_pages_skipped;
+
+ /*
+ * A running sum of the total number of bytes sent to NAND during
+ * refreeze operations since boot.
+ */
+ uint64_t mfs_bytes_refrozen;
+ /* The number of refreeze operations since boot */
+ uint64_t mfs_refreeze_count;
+
+ /* The number of proceses which have been frozen at least once in the current interval. */
+ uint64_t mfs_processes_frozen;
+ /* THe number of processes which have been thawed at least once in the current interval. */
+ uint64_t mfs_processes_thawed;
+};
+extern struct memorystatus_freezer_stats_t memorystatus_freezer_stats;
+
#endif /* CONFIG_FREEZE */
#endif /* XNU_KERNEL_PRIVATE */
struct vnop_advlock_args;
struct vnode;
-#ifdef MALLOC_DECLARE
-MALLOC_DECLARE(M_LOCKF);
-#endif
-
#if IMPORTANCE_INHERITANCE
#define LF_NOT_BOOSTED 0
#define LF_BOOSTED 1
#define M_LAST 129 /* Must be last type + 1 */
+/*
+ * If you need accounting consider using
+ * KALLOC_HEAP_DEFINE to define a view.
+ */
+#define KM_SHM KHEAP_DEFAULT
+
#define MALLOC(space, cast, size, type, flags) \
({ VM_ALLOC_SITE_STATIC(0, 0); \
(space) = (cast)__MALLOC(size, type, flags, &site); })
/* checksum start adjustment has been done */
#define CSUM_ADJUST_DONE 0x00020000
+/* VLAN encapsulation present */
+#define CSUM_VLAN_ENCAP_PRESENT 0x00040000 /* mbuf has vlan encapsulation */
+
/* TCP Segment Offloading requested on this mbuf */
#define CSUM_TSO_IPV4 0x00100000 /* This mbuf needs to be segmented by the NIC */
#define CSUM_TSO_IPV6 0x00200000 /* This mbuf needs to be segmented by the NIC */
u_int32_t m_bigclusters; /* clusters obtained from page pool */
u_int32_t m_bigclfree; /* free clusters */
u_int32_t m_bigmclbytes; /* length of an mbuf cluster */
+ u_int32_t m_forcedefunct; /* times we force defunct'ed an app's sockets */
};
/* Compatibillity with 10.3 */
u_int32_t mc_nwretry_cnt; /* # of no-wait retry attempts */
u_int32_t mc_nwfail_cnt; /* # of no-wait retries that failed */
decl_lck_mtx_data(, mc_sync_lock); /* protects purges and reenables */
- lck_attr_t *mc_sync_lock_attr;
lck_grp_t *mc_sync_lock_grp;
- lck_grp_attr_t *mc_sync_lock_grp_attr;
/*
* Keep CPU and buckets layers lock statistics separate.
*/
- lck_attr_t *mc_cpu_lock_attr;
lck_grp_t *mc_cpu_lock_grp;
- lck_grp_attr_t *mc_cpu_lock_grp_attr;
/*
* Bucket layer common to all CPUs
*/
decl_lck_mtx_data(, mc_bkt_lock);
- lck_attr_t *mc_bkt_lock_attr;
lck_grp_t *mc_bkt_lock_grp;
- lck_grp_attr_t *mc_bkt_lock_grp_attr;
mcache_bkttype_t *cache_bkttype; /* bucket type */
mcache_bktlist_t mc_full; /* full buckets */
mcache_bktlist_t mc_empty; /* empty buckets */
#define MCA_TRN_MAX 2 /* Number of transactions to record */
+#define DUMP_MCA_BUF_SIZE 512
+
typedef struct mcache_audit {
struct mcache_audit *mca_next; /* next audit struct */
void *mca_addr; /* address of buffer */
void *, size_t, size_t);
__private_extern__ void mcache_audit_free_verify_set(mcache_audit_t *,
void *, size_t, size_t);
-__private_extern__ char *mcache_dump_mca(mcache_audit_t *);
+__private_extern__ char *mcache_dump_mca(char buf[DUMP_MCA_BUF_SIZE], mcache_audit_t *);
__private_extern__ void mcache_audit_panic(mcache_audit_t *, void *, size_t,
int64_t, int64_t) __abortlike;
#else /* KERNEL */
#ifdef XNU_KERNEL_PRIVATE
void pshm_cache_init(void); /* for bsd_init() */
-void pshm_lock_init(void);
/*
* XXX routine exported by posix_shm.c, but never used there, only used in
#define MT_KDBG_TMPTH_START(CODE) MT_KDBG_TMPTH_(CODE, DBG_FUNC_START)
#define MT_KDBG_TMPTH_END(CODE) MT_KDBG_TMPTH_(CODE, DBG_FUNC_END)
-extern lck_grp_t * mt_lock_grp;
+extern lck_grp_t mt_lock_grp;
int mt_dev_init(void);
void vfs_setcompoundopen(mount_t mp);
uint64_t vfs_throttle_mask(mount_t mp);
int vfs_isswapmount(mount_t mp);
+int vfs_context_dataless_materialization_is_prevented(vfs_context_t);
boolean_t vfs_context_is_dataless_manipulator(vfs_context_t);
boolean_t vfs_context_can_resolve_triggers(vfs_context_t);
void vfs_setmntsystem(mount_t mp);
int vfs_switch_root(const char *, const char *, vfs_switch_root_flags_t);
int vfs_mountroot(void);
-void vfs_unmountall(void);
+void vfs_unmountall(int only_non_system);
int safedounmount(struct mount *, int, vfs_context_t);
int dounmount(struct mount *, int, int, vfs_context_t);
void dounmount_submounts(struct mount *, int, vfs_context_t);
#define KERNEL_MOUNT_PREBOOTVOL 0x20 /* mount the Preboot volume */
#define KERNEL_MOUNT_RECOVERYVOL 0x40 /* mount the Recovery volume */
#define KERNEL_MOUNT_BASESYSTEMROOT 0x80 /* mount a base root volume "instead of" the full root volume (only used during bsd_init) */
+#define KERNEL_MOUNT_DEVFS 0x100 /* kernel startup mount of devfs */
/* mask for checking if any of the "mount volume by role" flags are set */
#define KERNEL_MOUNT_VOLBYROLE_MASK (KERNEL_MOUNT_DATAVOL | KERNEL_MOUNT_VMVOL | KERNEL_MOUNT_PREBOOTVOL | KERNEL_MOUNT_RECOVERYVOL)
extern int num_trailing_0(uint64_t n);
/* sync lock */
-extern lck_mtx_t * sync_mtx_lck;
-
extern int sync_timeout_seconds;
extern zone_t mount_zone;
void munge_wwws(void *args);
void munge_wwwsw(void *args);
void munge_llllll(void *args);
+void munge_llll(void *args);
void munge_l(void *args);
void munge_ll(void *args);
void munge_lw(void *args);
#include <mach/coalition.h> /* COALITION_NUM_TYPES */
#endif
+#ifndef KERNEL
+#include <Availability.h>
+#endif
+
#if defined(XNU_KERNEL_PRIVATE) || !defined(KERNEL)
struct session;
/* true if the process ignores errors from content protection APIs */
extern bool proc_ignores_content_protection(proc_t proc);
+/* true if the file system shouldn't update mtime for operations by the process */
+extern bool proc_skip_mtime_update(proc_t proc);
+
/*!
* @function proc_exitstatus
* @abstract KPI to determine a process's exit status.
int pid_suspend(int pid);
int pid_resume(int pid);
-int task_inspect_for_pid(unsigned int target_tport, int pid, unsigned int *t);
-int task_read_for_pid(unsigned int target_tport, int pid, unsigned int *t);
+__API_AVAILABLE(macos(11.3), ios(14.5), tvos(14.5), watchos(7.3))
+int task_inspect_for_pid(unsigned int target_tport, int pid, unsigned int *t); /* Returns task inspect port */
+__API_AVAILABLE(macos(11.3), ios(14.5), tvos(14.5), watchos(7.3))
+int task_read_for_pid(unsigned int target_tport, int pid, unsigned int *t); /* Returns task read port */
#if defined(__arm__) || defined(__arm64__)
int pid_hibernate(int pid);
uint32_t p_memstat_freeze_sharedanon_pages; /* shared pages left behind after freeze */
uint32_t p_memstat_frozen_count;
uint32_t p_memstat_thaw_count;
+ uint32_t p_memstat_last_thaw_interval; /* In which freezer interval was this last thawed? */
#endif /* CONFIG_FREEZE */
#endif /* CONFIG_MEMORYSTATUS */
#define P_VFS_IOPOLICY_STATFS_NO_DATA_VOLUME 0x0008
#define P_VFS_IOPOLICY_TRIGGER_RESOLVE_DISABLE 0x0010
#define P_VFS_IOPOLICY_IGNORE_CONTENT_PROTECTION 0x0020
-#define P_VFS_IOPOLICY_VALID_MASK (P_VFS_IOPOLICY_ATIME_UPDATES | P_VFS_IOPOLICY_FORCE_HFS_CASE_SENSITIVITY | P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES | P_VFS_IOPOLICY_STATFS_NO_DATA_VOLUME | P_VFS_IOPOLICY_TRIGGER_RESOLVE_DISABLE | P_VFS_IOPOLICY_IGNORE_CONTENT_PROTECTION)
+#define P_VFS_IOPOLICY_IGNORE_NODE_PERMISSIONS 0x0040
+#define P_VFS_IOPOLICY_SKIP_MTIME_UPDATE 0x0080
+#define P_VFS_IOPOLICY_VALID_MASK (P_VFS_IOPOLICY_ATIME_UPDATES | P_VFS_IOPOLICY_FORCE_HFS_CASE_SENSITIVITY | P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES | P_VFS_IOPOLICY_STATFS_NO_DATA_VOLUME | \
+ P_VFS_IOPOLICY_TRIGGER_RESOLVE_DISABLE | P_VFS_IOPOLICY_IGNORE_CONTENT_PROTECTION | P_VFS_IOPOLICY_IGNORE_NODE_PERMISSIONS | P_VFS_IOPOLICY_SKIP_MTIME_UPDATE)
/* process creation arguments */
#define PROC_CREATE_FORK 0 /* independent child (running) */
#define PID_MAX 99999
#define NO_PID 100000
-extern lck_mtx_t * proc_list_mlock;
-extern lck_mtx_t * proc_klist_mlock;
+extern lck_mtx_t proc_list_mlock;
#define BSD_SIMUL_EXECS 33 /* 32 , allow for rounding */
#define BSD_PAGEABLE_SIZE_PER_EXEC (NCARGS + PAGE_SIZE + PAGE_SIZE) /* page for apple vars, page for executable header */
extern LIST_HEAD(sesshashhead, session) * sesshashtbl;
extern u_long sesshash;
-extern lck_grp_t * proc_lck_grp;
-extern lck_grp_t * proc_fdmlock_grp;
-extern lck_grp_t * proc_kqhashlock_grp;
-extern lck_grp_t * proc_knhashlock_grp;
-extern lck_grp_t * proc_mlock_grp;
-extern lck_grp_t * proc_ucred_mlock_grp;
-extern lck_grp_t * proc_slock_grp;
-extern lck_grp_t * proc_dirslock_grp;
-extern lck_grp_attr_t * proc_lck_grp_attr;
-extern lck_attr_t * proc_lck_attr;
+extern lck_attr_t proc_lck_attr;
+extern lck_grp_t proc_fdmlock_grp;
+extern lck_grp_t proc_lck_grp;
+extern lck_grp_t proc_kqhashlock_grp;
+extern lck_grp_t proc_knhashlock_grp;
+extern lck_grp_t proc_slock_grp;
+extern lck_grp_t proc_mlock_grp;
+extern lck_grp_t proc_ucred_mlock_grp;
+extern lck_grp_t proc_dirslock_grp;
LIST_HEAD(proclist, proc);
extern struct proclist allproc; /* List of all processes. */
extern struct proc_ident proc_ident(proc_t p);
+/*
+ * True if the process ignores file permissions in case it owns the
+ * file/directory
+ */
+bool proc_ignores_node_permissions(proc_t proc);
+
#endif /* !_SYS_PROC_INTERNAL_H_ */
/* osfmk/vm/vm_map.h */
kern_return_t (*vm_map_page_info)(vm_map_t map, vm_map_offset_t offset, vm_page_info_flavor_t flavor, vm_page_info_t info, mach_msg_type_number_t *count);
- void *__unused_was_vm_map_switch;
+ mach_port_name_t (*ipc_port_copyout_send_pinned)(ipc_port_t sright, ipc_space_t space);
/* wq functions */
kern_return_t (*thread_set_wq_state32)(thread_t thread, thread_state_t state);
uint16_t (*thread_set_tag)(thread_t thread, uint16_t tag);
uint16_t (*thread_get_tag)(thread_t thread);
- void *__unused_was_proc_usynch_thread_qos_squash_override_for_resource;
- void *__unused_was_task_get_default_manager_qos;
- void *__unused_was_thread_create_workq_waiting;
+ kern_return_t (*thread_create_pinned)(task_t parent_task, thread_t *new_thread);
+ kern_return_t (*thread_terminate_pinned)(thread_t thread);
+ ipc_port_t (*convert_thread_to_port_pinned)(thread_t th);
user_addr_t (*proc_get_stack_addr_hint)(struct proc *p);
void (*proc_set_stack_addr_hint)(struct proc *p, user_addr_t stack_addr_hint);
- void *__unused_was_proc_get_return_to_kernel_offset;
+ kern_return_t (*thread_create_immovable)(task_t parent_task, thread_t *new_thread);
void (*proc_set_return_to_kernel_offset)(struct proc *t, uint64_t offset);
void *__unused_was_workloop_fulfill_threadreq;
void dqflush(struct vnode *);
int dqget(u_int32_t, struct quotafile *, int, struct dquot **);
void dqhashinit(void);
-void dqinit(void);
int dqisinitialized(void);
void dqref(struct dquot *);
void dqrele(struct dquot *);
#define IOPOL_TYPE_VFS_STATFS_NO_DATA_VOLUME 4
#define IOPOL_TYPE_VFS_TRIGGER_RESOLVE 5
#define IOPOL_TYPE_VFS_IGNORE_CONTENT_PROTECTION 6
+#define IOPOL_TYPE_VFS_IGNORE_PERMISSIONS 7
+#define IOPOL_TYPE_VFS_SKIP_MTIME_UPDATE 8
/* scope */
#define IOPOL_SCOPE_PROCESS 0
#define IOPOL_VFS_CONTENT_PROTECTION_DEFAULT 0
#define IOPOL_VFS_CONTENT_PROTECTION_IGNORE 1
+#define IOPOL_VFS_IGNORE_PERMISSIONS_OFF 0
+#define IOPOL_VFS_IGNORE_PERMISSIONS_ON 1
+
+#define IOPOL_VFS_SKIP_MTIME_UPDATE_OFF 0
+#define IOPOL_VFS_SKIP_MTIME_UPDATE_ON 1
+
#ifdef PRIVATE
/*
* Structures for use in communicating via iopolicysys() between Libc and the
void sbuf_delete(struct sbuf *);
#endif
-#ifdef KERNEL
-struct uio;
-struct sbuf *sbuf_uionew(struct sbuf *, struct uio *, int *);
-int sbuf_bcopyin(struct sbuf *, const void *, size_t);
-int sbuf_copyin(struct sbuf *, const void *, size_t);
-#endif
__END_DECLS
#endif
void selrecord(proc_t selector, struct selinfo *, void *);
void selwakeup(struct selinfo *);
void selthreadclear(struct selinfo *);
+#if XNU_KERNEL_PRIVATE
+struct _select;
+void select_cleanup_uthread(struct _select *);
+#endif
__END_DECLS
__END_DECLS
#else /* KERNEL */
-void psem_lock_init(void);
void psem_cache_init(void);
#endif /* KERNEL */
#include <sys/eventhandler.h>
#endif /* BSD_KERNEL_PRIVATE */
#endif /* KERNEL_PRIVATE */
+#if !KERNEL
+#include <TargetConditionals.h>
+#endif
typedef u_quad_t so_gen_t;
uid_t so_uid; /* XXX */
};
-#if XNU_TARGET_OS_OSX || !(TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR)
+#if XNU_TARGET_OS_OSX || KERNEL || !(TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR)
struct xsocket64 {
u_int32_t xso_len; /* length of this structure */
u_int64_t xso_so; /* makes a convenient handle */
struct xsockbuf so_snd;
uid_t so_uid; /* XXX */
};
-#endif /* XNU_TARGET_OS_OSX || !(TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR) */
+#endif /* XNU_TARGET_OS_OSX || KERNEL || !(TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR) */
#ifdef PRIVATE
#define XSO_SOCKET 0x001
#include <sys/time.h>
#include <sys/ucred.h>
#else
-#ifndef XNU_KERNEL_PRIVATE
+#ifdef XNU_KERNEL_PRIVATE
+#include <kern/startup.h>
+#include <libkern/section_keywords.h>
+#else
#include <libkern/sysctl.h>
#include <os/base.h>
-#endif
+#endif /* XNU_KERNEL_PRIVATE */
+#endif /* KERNEL */
-#endif
#include <sys/proc.h>
#include <sys/vm.h>
-#ifdef XNU_KERNEL_PRIVATE
-#include <sys/linker_set.h>
-#endif
-
/*
* Definitions for sysctl call. The sysctl call uses a hierarchical name
* for objects that can be examined or modified. The name is expressed as
int ctl_type; /* type of name */
};
-#define CTLTYPE 0xf /* Mask for the type */
-#define CTLTYPE_NODE 1 /* name is a node */
-#define CTLTYPE_INT 2 /* name describes an integer */
-#define CTLTYPE_STRING 3 /* name describes a string */
-#define CTLTYPE_QUAD 4 /* name describes a 64-bit number */
-#define CTLTYPE_OPAQUE 5 /* name describes a structure */
-#define CTLTYPE_STRUCT CTLTYPE_OPAQUE /* name describes a structure */
-
-#define CTLFLAG_RD 0x80000000 /* Allow reads of variable */
-#define CTLFLAG_WR 0x40000000 /* Allow writes to the variable */
-#define CTLFLAG_RW (CTLFLAG_RD|CTLFLAG_WR)
-#define CTLFLAG_NOLOCK 0x20000000 /* XXX Don't Lock */
-#define CTLFLAG_ANYBODY 0x10000000 /* All users can set this var */
-#define CTLFLAG_SECURE 0x08000000 /* Permit set only if securelevel<=0 */
-#define CTLFLAG_MASKED 0x04000000 /* deprecated variable, do not display */
-#define CTLFLAG_NOAUTO 0x02000000 /* do not auto-register */
-#define CTLFLAG_KERN 0x01000000 /* valid inside the kernel */
-#define CTLFLAG_LOCKED 0x00800000 /* node will handle locking itself */
-#define CTLFLAG_OID2 0x00400000 /* struct sysctl_oid has version info */
+#define CTLTYPE 0xf /* Mask for the type */
+#define CTLTYPE_NODE 1 /* name is a node */
+#define CTLTYPE_INT 2 /* name describes an integer */
+#define CTLTYPE_STRING 3 /* name describes a string */
+#define CTLTYPE_QUAD 4 /* name describes a 64-bit number */
+#define CTLTYPE_OPAQUE 5 /* name describes a structure */
+#define CTLTYPE_STRUCT CTLTYPE_OPAQUE /* name describes a structure */
+
+#define CTLFLAG_RD 0x80000000 /* Allow reads of variable */
+#define CTLFLAG_WR 0x40000000 /* Allow writes to the variable */
+#define CTLFLAG_RW (CTLFLAG_RD|CTLFLAG_WR)
+#define CTLFLAG_NOLOCK 0x20000000 /* XXX Don't Lock */
+#define CTLFLAG_ANYBODY 0x10000000 /* All users can set this var */
+#define CTLFLAG_SECURE 0x08000000 /* Permit set only if securelevel<=0 */
+#define CTLFLAG_MASKED 0x04000000 /* deprecated variable, do not display */
+#define CTLFLAG_NOAUTO 0x02000000 /* do not auto-register */
+#define CTLFLAG_KERN 0x01000000 /* valid inside the kernel */
+#define CTLFLAG_LOCKED 0x00800000 /* node will handle locking itself */
+#define CTLFLAG_OID2 0x00400000 /* struct sysctl_oid has version info */
+#if XNU_KERNEL_PRIVATE
+#define CTLFLAG_PERMANENT 0x00200000 /* permanent sysctl_oid */
+#endif
+#define CTLFLAG_EXPERIMENT 0x00100000 /* Allows writing w/ the trial experiment entitlement. */
/*
* USE THIS instead of a hardwired number from the categories below
* in I/O-Kit. In this case, you have to call sysctl_register_oid()
* manually - just like in a KEXT.
*/
-#define OID_AUTO (-1)
-#define OID_AUTO_START 100 /* conventional */
+#define OID_AUTO (-1)
+#if XNU_KERNEL_PRIVATE
+/*
+ * Used to allow for most of the core kernel sysctl OIDs to be in immutable
+ * memory. The nodes that can be extensible have a fake first node with this
+ * particular oid_number which hangs a second mutable list from this node.
+ *
+ * This node is always first when it is used
+ */
+#define OID_MUTABLE_ANCHOR (INT_MIN)
+#endif
+#define OID_AUTO_START 100 /* conventional */
#ifdef KERNEL
-#define SYSCTL_HANDLER_ARGS (struct sysctl_oid *oidp, void *arg1, int arg2, \
+#define SYSCTL_HANDLER_ARGS \
+ (struct sysctl_oid *oidp __unused, void *arg1 __unused, int arg2 __unused, \
struct sysctl_req *req)
void sysctl_register_oid(struct sysctl_oid *oidp);
void sysctl_unregister_oid(struct sysctl_oid *oidp);
-void sysctl_load_devicetree_entries(void);
#define nvram_osenvironment "osenvironment"
void sysctl_set_osenvironment(unsigned int size, const void* value);
void sysctl_unblock_osenvironment(void);
#define SYSCTL_DECL(name) \
extern struct sysctl_oid_list sysctl_##name##_children
-#ifdef XNU_KERNEL_PRIVATE
-#define SYSCTL_LINKER_SET_ENTRY LINKER_SET_ENTRY
-#else
-#define SYSCTL_LINKER_SET_ENTRY(a, b)
-#endif
/*
* Macros to define sysctl entries. Which to use? Pure data that are
* returned without modification, SYSCTL_<data type> is for you, like
/* This constructs a "raw" MIB oid. */
-#define SYSCTL_STRUCT_INIT(parent, nbr, name, kind, a1, a2, handler, fmt, descr) \
- { \
- &sysctl_##parent##_children, { NULL }, \
- nbr, (int)(kind|CTLFLAG_OID2), a1, (int)(a2), #name, handler, fmt, descr, SYSCTL_OID_VERSION, 0 \
+#define SYSCTL_STRUCT_INIT(parent, nbr, name, kind, a1, a2, fn, fmt, desc) { \
+ .oid_parent = &sysctl_##parent##_children, \
+ .oid_number = nbr, \
+ .oid_kind = (int)(kind | CTLFLAG_OID2), \
+ .oid_arg1 = a1, \
+ .oid_arg2 = (int)(a2), \
+ .oid_name = #name, \
+ .oid_handler = fn, \
+ .oid_fmt = fmt, \
+ .oid_descr = desc, \
+ .oid_version = SYSCTL_OID_VERSION, \
}
+#define __SYSCTL_OID(parent, nbr, name, kind, a1, a2, handler, fmt, descr) \
+ struct sysctl_oid sysctl_##parent##_##name = SYSCTL_STRUCT_INIT(\
+ parent, nbr, name, kind, a1, a2, handler, fmt, descr)
+
+#if XNU_KERNEL_PRIVATE
+
+/*
+ * Core kernel registers sysctls before lockdown and protects those entries
+ * in immutable memory.
+ *
+ * When a node needs to support dynamic extension after lockdown, it needs to be
+ * declared with SYSCTL_EXTENSIBLE_NODE() to insert a dummy "OID_MUTABLE_ANCHOR"
+ * node in this node chain which will allow extensibility.
+ *
+ * OIDs that are to be inserted dynamically based on system properties that
+ * aren't known at compile time, have three options, in increasing order of
+ * unsafety:
+ *
+ * - The OID can use the CTLFLAG_NOAUTO flag. Such entries aren't inserted to
+ * the sysctl tree automatically but will be made read-only at lock down.
+ *
+ * Such entries must be inserted in the STARTUP_SUB_SYSCTL "Middle" phase
+ * using sysctl_register_oid_early().
+ *
+ * - The OID can be always registered and test whether it is ready to operate.
+ * When it is not, it must return ENOENT which simulates an absent entry.
+ *
+ * This however has the downside that the entry is still resolvable as an MIB
+ * or listed in `sysctl -a` when it isn't masked.
+ *
+ * This is acceptable for sysctls that will become valid quickly during boot
+ * (but after lockdown).
+ *
+ * - SYSCTL_OID_MANUAL / SYSCTL_NODE_MANUAL can be used for completely
+ * dynamic/manual oid registration. Such nodes must be registered with
+ * sysctl_register_oid() after lockdown.
+ *
+ * This is the least preferred solution.
+ */
+
+__BEGIN_DECLS
+void sysctl_register_oid_early(struct sysctl_oid *oidp);
+__END_DECLS
+
+#define SYSCTL_OID_MANUAL(parent, nbr, name, kind, a1, a2, handler, fmt, descr) \
+ __XNU_PRIVATE_EXTERN \
+ __SYSCTL_OID(parent, nbr, name, kind, a1, a2, handler, fmt, descr)
+
+#define SYSCTL_NODE_MANUAL(parent, nbr, name, access, handler, descr) \
+ struct sysctl_oid_list sysctl_##parent##_##name##_children; \
+ __XNU_PRIVATE_EXTERN \
+ __SYSCTL_OID(parent, nbr, name, CTLTYPE_NODE|access, \
+ &sysctl_##parent##_##name##_children, 0, handler, "N", descr);
+
+#define SYSCTL_OID(parent, nbr, name, kind, a1, a2, handler, fmt, descr) \
+ __security_const_late __XNU_PRIVATE_EXTERN \
+ __SYSCTL_OID(parent, nbr, name, CTLFLAG_PERMANENT|kind, \
+ a1, a2, handler, fmt, descr); \
+ __STARTUP_ARG(sysctl_##parent, _##name, \
+ SYSCTL, STARTUP_RANK_SECOND, sysctl_register_oid_early, \
+ &sysctl_##parent##_##name)
+
+#define __SYSCTL_NODE(parent, nbr, name, access, handler, descr) \
+ __security_const_late \
+ struct sysctl_oid_list sysctl_##parent##_##name##_children; \
+ __security_const_late __XNU_PRIVATE_EXTERN \
+ __SYSCTL_OID(parent, nbr, name, CTLFLAG_PERMANENT|CTLTYPE_NODE|access, \
+ &sysctl_##parent##_##name##_children, 0, handler, "N", descr); \
+ __STARTUP_ARG(sysctl_##parent, _##name, \
+ SYSCTL, STARTUP_RANK_FIRST, sysctl_register_oid_early, \
+ &sysctl_##parent##_##name)
+
+#define __SYSCTL_EXTENSION_NODE(name) \
+ static __security_read_write \
+ struct sysctl_oid_list sysctl_##name##_children_mutable; \
+ static __security_const_late \
+ struct sysctl_oid sysctl_##name##_wranchor = { \
+ .oid_parent = &sysctl_##name##_children, \
+ .oid_number = OID_MUTABLE_ANCHOR, \
+ .oid_kind = CTLFLAG_OID2 | CTLFLAG_PERMANENT, \
+ .oid_arg1 = &sysctl_##name##_children_mutable, \
+ .oid_name = "__anchor__(" #name ")", \
+ .oid_version = SYSCTL_OID_VERSION, \
+ }; \
+ __STARTUP_ARG(sysctl_##name, _wranchor, \
+ SYSCTL, STARTUP_RANK_LAST, sysctl_register_oid_early, \
+ &sysctl_##name##_wranchor)
+
+#define SYSCTL_NODE(parent, nbr, name, access, handler, descr) \
+ __XNU_PRIVATE_EXTERN \
+ __SYSCTL_NODE(parent, nbr, name, access, handler, descr)
+
+#define SYSCTL_EXTENSIBLE_NODE(parent, nbr, name, access, handler, descr) \
+ __SYSCTL_NODE(parent, nbr, name, access, handler, descr); \
+ __SYSCTL_EXTENSION_NODE(parent##_##name)
+#else
#define SYSCTL_OID(parent, nbr, name, kind, a1, a2, handler, fmt, descr) \
- struct sysctl_oid sysctl_##parent##_##name = SYSCTL_STRUCT_INIT(parent, nbr, name, kind, a1, a2, handler, fmt, descr); \
- SYSCTL_LINKER_SET_ENTRY(__sysctl_set, sysctl_##parent##_##name)
+ __SYSCTL_OID(parent, nbr, name, kind, a1, a2, handler, fmt, descr)
/* This constructs a node from which other oids can hang. */
-#define SYSCTL_NODE(parent, nbr, name, access, handler, descr) \
- struct sysctl_oid_list sysctl_##parent##_##name##_children; \
- SYSCTL_OID(parent, nbr, name, CTLTYPE_NODE|access, \
- (void*)&sysctl_##parent##_##name##_children, 0, handler, \
- "N", descr)
+#define SYSCTL_NODE(parent, nbr, name, access, handler, descr) \
+ struct sysctl_oid_list sysctl_##parent##_##name##_children; \
+ SYSCTL_OID(parent, nbr, name, CTLTYPE_NODE|access, \
+ &sysctl_##parent##_##name##_children, 0, handler, "N", descr)
+#endif /* XNU_KERNEL_PRIVATE */
/* Oid for a string. len can be 0 to indicate '\0' termination. */
#define SYSCTL_STRING(parent, nbr, name, access, arg, len, descr) \
SYSCTL_OID(parent, nbr, name, CTLTYPE_STRING|access, \
- arg, len, sysctl_handle_string, "A", descr)
+ arg, len, sysctl_handle_string, "A", descr)
#define SYSCTL_COMPAT_INT(parent, nbr, name, access, ptr, val, descr) \
SYSCTL_OID(parent, nbr, name, CTLTYPE_INT|access, \
- ptr, val, sysctl_handle_int, "I", descr)
+ ptr, val, sysctl_handle_int, "I", descr)
#define SYSCTL_COMPAT_UINT(parent, nbr, name, access, ptr, val, descr) \
SYSCTL_OID(parent, nbr, name, CTLTYPE_INT|access, \
- ptr, val, sysctl_handle_int, "IU", descr)
+ ptr, val, sysctl_handle_int, "IU", descr)
/* Oid for an int. If ptr is NULL, val is returned. */
#define SYSCTL_INT(parent, nbr, name, access, ptr, val, descr) \
SYSCTL_OID(parent, nbr, name, CTLTYPE_INT|access, \
- ptr, val, sysctl_handle_int, "I", descr); \
- typedef char _sysctl_##parent##_##name##_size_check[(__builtin_constant_p(ptr) || sizeof(*(ptr)) == sizeof(int)) ? 0 : -1]
+ ptr, val, sysctl_handle_int, "I", descr); \
+ _Static_assert(__builtin_constant_p(ptr) || sizeof(*(ptr)) == sizeof(int), \
+ "must be integer sized");
/* Oid for an unsigned int. If ptr is NULL, val is returned. */
#define SYSCTL_UINT(parent, nbr, name, access, ptr, val, descr) \
SYSCTL_OID(parent, nbr, name, CTLTYPE_INT|access, \
- ptr, val, sysctl_handle_int, "IU", descr); \
- typedef char _sysctl_##parent##_##name##_size_check[(__builtin_constant_p(ptr) || sizeof(*(ptr)) == sizeof(unsigned int)) ? 0 : -1]
+ ptr, val, sysctl_handle_int, "IU", descr); \
+ _Static_assert(__builtin_constant_p(ptr) || sizeof(*(ptr)) == sizeof(unsigned int), \
+ "must be integer sized");
/* Oid for a long. The pointer must be non NULL. */
#define SYSCTL_LONG(parent, nbr, name, access, ptr, descr) \
SYSCTL_OID(parent, nbr, name, CTLTYPE_INT|access, \
- ptr, 0, sysctl_handle_long, "L", descr); \
- typedef char _sysctl_##parent##_##name##_size_check[(__builtin_constant_p(ptr) || sizeof(*(ptr)) == sizeof(long)) ? 0 : -1]
+ ptr, 0, sysctl_handle_long, "L", descr); \
+ _Static_assert(__builtin_constant_p(ptr) || sizeof(*(ptr)) == sizeof(long), \
+ "must be long sized");
/* Oid for a unsigned long. The pointer must be non NULL. */
#define SYSCTL_ULONG(parent, nbr, name, access, ptr, descr) \
SYSCTL_OID(parent, nbr, name, CTLTYPE_INT|access, \
- ptr, 0, sysctl_handle_long, "LU", descr); \
- typedef char _sysctl_##parent##_##name##_size_check[(__builtin_constant_p(ptr) || sizeof(*(ptr)) == sizeof(unsigned long)) ? 0 : -1]
+ ptr, 0, sysctl_handle_long, "LU", descr); \
+ _Static_assert(__builtin_constant_p(ptr) || sizeof(*(ptr)) == sizeof(unsigned long), \
+ "must be long sized");
/* Oid for a quad. The pointer must be non NULL. */
#define SYSCTL_QUAD(parent, nbr, name, access, ptr, descr) \
SYSCTL_OID(parent, nbr, name, CTLTYPE_QUAD|access, \
- ptr, 0, sysctl_handle_quad, "Q", descr); \
- typedef char _sysctl_##parent##_##name##_size_check[(__builtin_constant_p(ptr) || sizeof(*(ptr)) == sizeof(long long)) ? 0 : -1]
+ ptr, 0, sysctl_handle_quad, "Q", descr); \
+ _Static_assert(__builtin_constant_p(ptr) || sizeof(*(ptr)) == sizeof(long long), \
+ "must be long long sized");
/* Oid for an opaque object. Specified by a pointer and a length. */
#define SYSCTL_OPAQUE(parent, nbr, name, access, ptr, len, fmt, descr) \
/* Oid for a struct. Specified by a pointer and a type. */
#define SYSCTL_STRUCT(parent, nbr, name, access, ptr, type, descr) \
SYSCTL_OID(parent, nbr, name, CTLTYPE_OPAQUE|access, \
- ptr, sizeof(struct type), sysctl_handle_opaque, \
- "S," #type, descr)
+ ptr, sizeof(struct type), sysctl_handle_opaque, \
+ "S," #type, descr)
/*
* Oid for a procedure. Specified by a pointer and an arg.
*/
#define SYSCTL_PROC(parent, nbr, name, access, ptr, arg, handler, fmt, descr) \
SYSCTL_OID(parent, nbr, name, access, \
- ptr, arg, handler, fmt, descr)
+ ptr, arg, handler, fmt, descr)
+
+/*
+ * The EXPERIMENT macros below expose values for on-device experimentation (A/B testing) via Trial.
+ * These values will be set shortly after boot by the KRExperiments framework based on any
+ * active experiments on the device.
+ * Values exposed via these macros are still normal sysctls and can be set by the superuser in the
+ * development or debug kernel. However, on the release kernel they can ONLY be set by processes
+ * with the com.apple.private.write-kr-experiment-factors entitlement.
+ * In addition, for numeric types, special macros are provided that enforce a valid range for the value (inclusive)
+ * to ensure that an errant experiment can't set a totally unexpected value. These macros also track which
+ * values have been modified via sycstl(3) so that they can be inspected with the showexperiments lldb macro.
+ */
+
+struct experiment_spec {
+ void *ptr; /* ptr to numeric experiment factor. */
+ uint64_t min_value; /* Min value that can be set via sysctl(3) (inclusive). */
+ uint64_t max_value; /* Max value that can be set via sysctl(3) (inclusive). */
+ uint64_t original_value; /* First value that was overwritten via sysctl(3). */
+ _Atomic bool modified; /* Has this value ever been overwritten via sysctl(3)? */
+};
+
+/*
+ * The handlers for the numeric types can be easily parameterized by type.
+ * So they're defined via an X macro.
+ */
+#define experiment_factor_numeric_types \
+ X(uint, unsigned int) \
+ X(int, int) \
+ X(ulong, unsigned long) \
+ X(long, long) \
+ X(uint64, uint64_t) \
+ X(int64, int64_t)
+
+#define X(experiment_factor_typename, _) \
+int experiment_factor_##experiment_factor_typename##_handler SYSCTL_HANDLER_ARGS;
+
+experiment_factor_numeric_types
+#undef X
+
+#define __EXPERIMENT_FACTOR_SPEC(parent, name, p, min, max) \
+ struct experiment_spec experiment_##parent##_##name = { \
+ .ptr = p, \
+ .min_value = min, \
+ .max_value = max, \
+ .original_value = 0, \
+ .modified = false \
+ }
+
+#define EXPERIMENT_FACTOR_UINT(parent, name, ptr, min, max, descr) \
+ __EXPERIMENT_FACTOR_SPEC(parent, name, ptr, min, max); \
+ _Static_assert(sizeof(*(ptr)) == sizeof(unsigned int), "must be integer sized"); \
+ SYSCTL_PROC(parent, OID_AUTO, name, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_EXPERIMENT, &experiment_##parent##_##name, 1, &experiment_factor_uint_handler, "IU", descr);
+
+#define EXPERIMENT_FACTOR_INT(parent, name, ptr, min, max, descr) \
+ __EXPERIMENT_FACTOR_SPEC(parent, name, ptr, min, max); \
+ _Static_assert(sizeof(*(ptr)) == sizeof(int), "must be integer sized"); \
+ SYSCTL_PROC(parent, OID_AUTO, name, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_EXPERIMENT, &experiment_##parent##_##name, 1, &experiment_factor_int_handler, "I", descr);
+
+#define EXPERIMENT_FACTOR_ULONG(parent, name, ptr, min, max, descr) \
+ __EXPERIMENT_FACTOR_SPEC(parent, name, ptr, min, max); \
+ _Static_assert(sizeof(*(ptr)) == sizeof(unsigned long), "must be long sized"); \
+ SYSCTL_PROC(parent, OID_AUTO, name, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_EXPERIMENT, &experiment_##parent##_##name, 1, &experiment_factor_ulong_handler, "LU", descr);
+#define EXPERIMENT_FACTOR_LONG(parent, name, ptr, min, max, descr) \
+ __EXPERIMENT_FACTOR_SPEC(parent, name, ptr, min, max); \
+ _Static_assert(sizeof(*(ptr)) == sizeof(long), "must be long sized"); \
+ SYSCTL_PROC(parent, OID_AUTO, name, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_EXPERIMENT, &experiment_##parent##_##name, 1, &experiment_factor_long_handler, "L", descr);
+
+#define EXPERIMENT_FACTOR_UINT64(parent, name, ptr, min, max, descr) \
+ __EXPERIMENT_FACTOR_SPEC(parent, name, ptr, min, max); \
+ _Static_assert(sizeof(*(ptr)) == sizeof(uint64_t), "must be 8 bytes"); \
+ SYSCTL_PROC(parent, OID_AUTO, name, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_EXPERIMENT, &experiment_##parent##_##name, 1, &experiment_factor_uint64_handler, "QU", descr);
+
+#define EXPERIMENT_FACTOR_INT64(parent, name, ptr, min, max, descr) \
+ __EXPERIMENT_FACTOR_SPEC(parent, name, ptr, min, max); \
+ _Static_assert(sizeof(*(ptr)) == sizeof(int64_t), "must be 8 bytes"); \
+ SYSCTL_PROC(parent, OID_AUTO, name, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_EXPERIMENT, &experiment_##parent##_##name, 1, &experiment_factor_int64_handler, "Q", descr);
+
+/*
+ * Calls an user provided handler to read / write this factor.
+ * Entitlement checking will still be done by sysctl, but it's the callers responsibility to validate any new values.
+ * This factor will not be printed out via the showexperiments lldb macro.
+ */
+#define EXPERIMENT_FACTOR_PROC(parent, name, access, ptr, arg, handler, fmt, descr) \
+ _Static_assert(arg != 1, "arg can not be 1") \
+ SYSCTL_PROC(parent, OID_AUTO, name, access | CTLFLAG_ANYBODY | CTLFLAG_EXPERIMENT, ptr, arg, handler, fmt, descr);
+
+#ifdef XNU_KERNEL_PRIVATE
+/*
+ * Sysctl handler for reading a simple counter.
+ * Using this directly is not recommended. Use the SYSCTL_SCALABLE_COUNTER macro
+ */
+int scalable_counter_sysctl_handler SYSCTL_HANDLER_ARGS;
+
+/*!
+ * @macro SYSCTL_SCALABLE_COUNTER
+ *
+ * @abstract
+ * Provides a sysctl for reading the value of a percpu counter.
+ */
+#define SYSCTL_SCALABLE_COUNTER(parent, name, counter, descr) \
+SYSCTL_PROC(parent, OID_AUTO, name, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, \
+ (void *)(&counter), 0, &scalable_counter_sysctl_handler, "Q", descr);
+#endif /* XNU_KERNEL_PRIVATE */
extern struct sysctl_oid_list sysctl__children;
SYSCTL_DECL(_kern);
extern char macosversion[];
#endif
-struct linker_set;
-
-void sysctl_register_set(const char *set);
-void sysctl_unregister_set(const char *set);
void sysctl_mib_init(void);
-
-int sysctl_int(user_addr_t, size_t *, user_addr_t, size_t, int *);
-int sysctl_quad(user_addr_t, size_t *, user_addr_t, size_t, quad_t *);
-
-void sysctl_early_init(void);
+void hvg_bsd_init(void);
#endif /* BSD_KERNEL_PRIVATE */
#else /* !KERNEL */
/* returns TRUE if the throttle_lowpri_io called with the same sleep_amount would've slept */
int throttle_lowpri_io_will_be_throttled(int sleep_amount);
void throttle_set_thread_io_policy(int policy);
-int throttle_get_thread_effective_io_policy(void);
+int throttle_get_thread_effective_io_policy(void);
+int throttle_thread_io_tier_above_metadata(void);
typedef struct __throttle_info_handle *throttle_info_handle_t;
int throttle_info_ref_by_mask(uint64_t throttle_mask, throttle_info_handle_t *throttle_info_handle);
#define PTS_MAJOR 4
#define PTC_MAJOR 5
+/*
+ * If you need accounting consider using
+ * KALLOC_HEAP_DEFINE to define a view.
+ */
+#define KM_TTYS KHEAP_DEFAULT
#endif /* defined(XNU_KERNEL_PRIVATE) */
__END_DECLS
off_t csb_end_offset; /* Blob coverage area end, from csb_base_offset */
vm_size_t csb_mem_size;
vm_offset_t csb_mem_offset;
- vm_address_t csb_mem_kaddr;
+ void * XNU_PTRAUTH_SIGNED_PTR("cs_blob.csb_mem_kaddr") csb_mem_kaddr;
unsigned char csb_cdhash[CS_CDHASH_LEN];
ptrauth_generic_signature_t csb_cdhash_signature;
const struct cs_hash *csb_hashtype;
char * XNU_PTRAUTH_SIGNED_PTR("cs_blob.csb_supplement_teamid") csb_supplement_teamid;
#endif
const CS_GenericBlob * XNU_PTRAUTH_SIGNED_PTR("cs_blob.csb_entitlements_blob") csb_entitlements_blob; /* raw blob, subrange of csb_mem_kaddr */
+ ptrauth_generic_signature_t csb_entitlements_blob_signature;
void * XNU_PTRAUTH_SIGNED_PTR("cs_blob.csb_entitlements") csb_entitlements; /* The entitlements as an OSDictionary */
unsigned int csb_signer_type;
unsigned int csb_reconstituted; /* signature has potentially been modified after validation */
uid_t cr_ruid; /* real user id */
uid_t cr_svuid; /* saved user id */
u_short cr_ngroups; /* number of groups in advisory list */
+#if XNU_KERNEL_PRIVATE
+ u_short __cr_padding;
+#endif
gid_t cr_groups[NGROUPS];/* advisory group list */
gid_t cr_rgid; /* real group id */
gid_t cr_svgid; /* saved group id */
#include <sys/un.h>
#include <sys/ucred.h>
#include <sys/socketvar.h>
+#if !KERNEL && PRIVATE
+#include <TargetConditionals.h>
+#endif
/*
* Protocol control block for an active
u_quad_t xu_alignment_hack;
};
-#if XNU_TARGET_OS_OSX || !(TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR)
+#if XNU_TARGET_OS_OSX || KERNEL || !(TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR)
struct xunpcb64_list_entry {
u_int64_t le_next;
struct xsocket64 xu_socket;
};
-#endif /* XNU_TARGET_OS_OSX || !(TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR) */
+#endif /* XNU_TARGET_OS_OSX || KERNEL || !(TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR) */
#pragma pack()
/* Document Tracking struct used to track a "tombstone" for a document */
struct doc_tombstone *t_tombstone;
+ /* Field to be used by filesystems */
+ uint64_t t_fs_private;
+
struct os_reason *uu_exit_reason;
};
#include <sys/kernel_types.h>
#include <sys/param.h>
#include <sys/signal.h>
-#endif
+#else
+#include <stdint.h>
+#endif /* KERNEL */
/*
* The vnode is the focus of all file activity in UNIX. There is a
#endif
+/* Supported filesystem tags for vfs_[set|get]_thread_fs_private */
+#define FS_PRIVATE_TAG_APFS (1)
+
+/*!
+ * @function vfs_set_thread_fs_private
+ * @abstract Set the per-thread filesystem private data field.
+ * @discussion Allows a filesystem to store an implementation specific value in the thread struct.
+ * Note that this field is common to all filesystems thus re-entrancy should be taken into consideration.
+ * @param tag Filesystem identification tag.
+ * @param fs_private The value to be set.
+ * @return 0 for success, ENOTSUP if the filesystem tag is not supported.
+ */
+int vfs_set_thread_fs_private(uint8_t tag, uint64_t fs_private);
+
+/*!
+ * @function vfs_get_thread_fs_private
+ * @abstract Return the per-thread filesystem private data field.
+ * @discussion Returns the per-thread value that was set by vfs_set_thread_fs_private().
+ * @param tag Filesystem identification tag.
+ * @param fs_private The stored per-thread value.
+ * @return 0 for success, ENOTSUP if the filesystem tag is not supported.
+ */
+int vfs_get_thread_fs_private(uint8_t tag, uint64_t *fs_private);
+
/*!
* @function vflush
* @abstract Reclaim the vnodes associated with a mount.
int vnode_isnoflush(vnode_t);
void vnode_setnoflush(vnode_t);
void vnode_clearnoflush(vnode_t);
+#if CONFIG_IO_COMPRESSION_STATS
+void vnode_iocs_record_and_free(vnode_t);
+#endif /* CONFIG_IO_COMPRESSION_STATS */
#define BUILDPATH_NO_FS_ENTER 0x1 /* Use cache values, do not enter file system */
#define BUILDPATH_CHECKACCESS 0x2 /* Check if parents have search rights */
#endif /* KERNEL */
+/*
+ * Structure for vnode level IO compression stats
+ */
+
+#define IOCS_BUFFER_NUM_SIZE_BUCKETS 10
+#define IOCS_BUFFER_MAX_BUCKET 9
+#define IOCS_BUFFER_NUM_COMPRESSION_BUCKETS 7
+#define IOCS_BLOCK_NUM_SIZE_BUCKETS 16
+
+struct io_compression_stats {
+ uint64_t uncompressed_size;
+ uint64_t compressed_size;
+ uint32_t buffer_size_compression_dist[IOCS_BUFFER_NUM_SIZE_BUCKETS][IOCS_BUFFER_NUM_COMPRESSION_BUCKETS];
+ uint32_t block_compressed_size_dist[IOCS_BLOCK_NUM_SIZE_BUCKETS];
+};
+typedef struct io_compression_stats *io_compression_stats_t;
+
+#define IOCS_SBE_PATH_LEN 128
+#define IOCS_PATH_START_BYTES_TO_COPY 108
+#define IOCS_PATH_END_BYTES_TO_COPY 20 /* Includes null termination */
+
+#define IOCS_SYSCTL_LIVE 0x00000001
+#define IOCS_SYSCTL_STORE_BUFFER_RD_ONLY 0x00000002
+#define IOCS_SYSCTL_STORE_BUFFER_MARK 0x00000004
+
+struct iocs_store_buffer_entry {
+ char path_name[IOCS_SBE_PATH_LEN];
+ struct io_compression_stats iocs;
+};
+
#endif /* !_VNODE_H_ */
* v_freelist is locked by the global vnode_list_lock
* v_mntvnodes is locked by the mount_lock
* v_nclinks and v_ncchildren are protected by the global name_cache_lock
- * v_cleanblkhd and v_dirtyblkhd and v_iterblkflags are locked via the global buf_mtxp
+ * v_cleanblkhd and v_dirtyblkhd and v_iterblkflags are locked via the global buf_mtx
* the rest of the structure is protected by the vnode_lock
*/
struct vnode {
* if VFLINKTARGET is set, if VFLINKTARGET is not
* set, points to target */
#endif /* CONFIG_FIRMLINKS */
+#if CONFIG_IO_COMPRESSION_STATS
+ io_compression_stats_t io_compression_stats; /* IO compression statistics */
+#endif /* CONFIG_IO_COMPRESSION_STATS */
};
#define v_mountedhere v_un.vu_mountedhere
#endif /* BSD_KERNEL_PRIVATE */
+#if CONFIG_IO_COMPRESSION_STATS
+/*
+ * update the IO compression stats tracked at block granularity
+ */
+int vnode_updateiocompressionblockstats(vnode_t vp, uint32_t size_bucket);
+
+/*
+ * update the IO compression stats tracked for the buffer
+ */
+int vnode_updateiocompressionbufferstats(vnode_t vp, uint64_t uncompressed_size, uint64_t compressed_size, uint32_t size_bucket, uint32_t compression_bucket);
+
+#endif /* CONFIG_IO_COMPRESSION_STATS */
+
extern bool rootvp_is_ssd;
#endif /* !_SYS_VNODE_INTERNAL_H_ */
struct vsockpcbinfo {
// PCB locking.
- lck_attr_t *vsock_lock_attr;
- lck_grp_t *vsock_lock_grp;
- lck_grp_attr_t *vsock_lock_grp_attr;
- lck_rw_t *all_lock;
- lck_rw_t *bound_lock;
+ lck_rw_t all_lock;
+ lck_rw_t bound_lock;
// PCB lists.
TAILQ_HEAD(, vsockpcb) all;
LIST_HEAD(, vsockpcb) bound;
/* Kernel-supplied flag: Work interval has been ignored by the kernel */
#define WORK_INTERVAL_FLAG_IGNORED (0x20)
+/* Specifies that the work interval requests the system to provide just enough performance
+ * to be able to finish at the provided deadline and no sooner. */
+#define WORK_INTERVAL_FLAG_FINISH_AT_DEADLINE (0x40)
+
/* Flags to describe the interval flavor to the performance controller */
#define WORK_INTERVAL_TYPE_MASK (0xF0000000)
#define WORK_INTERVAL_TYPE_DEFAULT (0x0 << 28)
#define WORK_INTERVAL_TYPE_CA_CLIENT (0x3 << 28)
#define WORK_INTERVAL_TYPE_HID_DELIVERY (0x4 << 28)
#define WORK_INTERVAL_TYPE_COREMEDIA (0x5 << 28)
+#define WORK_INTERVAL_TYPE_ARKIT (0x6 << 28)
#define WORK_INTERVAL_TYPE_LAST (0xF << 28)
#ifndef KERNEL
#define MACH_BRIDGE_OBSV_RATE 0x7 /* out of range observed rates */
/* DBG_SKYWALK has same toplevel code as DBG_DLIL, so don't reuse subcodes */
+#define DBG_SKYWALK_ALWAYSON 0x10
#define DBG_SKYWALK_FLOWSWITCH 0x11
#define DBG_SKYWALK_NETIF 0x12
#define DBG_SKYWALK_CHANNEL 0x13
+#define DBG_SKYWALK_PACKET 0x14
#define PPT_TEST 0x01
#define PPT_JETSAM_HIWAT 0x02
#ifdef __arm64__
XNUPOST_TEST_CONFIG_BASIC(arm64_lock_test),
#endif
+#if !KASAN // <rdar://71151361>
#if defined(__arm__) || defined(__arm64__)
XNUPOST_TEST_CONFIG_BASIC(pmap_test),
#endif /* defined(__arm__) || defined(__arm64__) */
#if __ARM_PAN_AVAILABLE__
XNUPOST_TEST_CONFIG_BASIC(arm64_late_pan_test),
#endif
+#endif /* !KASAN */
XNUPOST_TEST_CONFIG_BASIC(kalloc_test),
XNUPOST_TEST_CONFIG_BASIC(ipi_test),
#if HAS_TWO_STAGE_SPR_LOCK
XNUPOST_TEST_CONFIG_BASIC(arm64_spr_lock_test),
#endif
+#if !KASAN
XNUPOST_TEST_CONFIG_BASIC(copyio_test),
+#endif /* KASAN */
};
uint32_t bsd_post_tests_count = sizeof(bsd_post_tests) / sizeof(xnupost_test_data_t);
struct vnode_attr * vap, vfs_context_t ctx);
#endif /* CONFIG_APPLEDOUBLE */
-extern lck_rw_t * rootvnode_rw_lock;
+extern lck_rw_t rootvnode_rw_lock;
static errno_t post_rename(vnode_t fdvp, vnode_t fvp, vnode_t tdvp, vnode_t tvp);
return 0;
}
+int
+vfs_set_thread_fs_private(uint8_t tag, uint64_t fs_private)
+{
+ struct uthread *ut;
+
+ if (tag != FS_PRIVATE_TAG_APFS) {
+ return ENOTSUP;
+ }
+
+ ut = get_bsdthread_info(current_thread());
+ ut->t_fs_private = fs_private;
+
+ return 0;
+}
+
+int
+vfs_get_thread_fs_private(uint8_t tag, uint64_t *fs_private)
+{
+ struct uthread *ut;
+
+ if (tag != FS_PRIVATE_TAG_APFS) {
+ return ENOTSUP;
+ }
+
+ ut = get_bsdthread_info(current_thread());
+ *fs_private = ut->t_fs_private;
+
+ return 0;
+}
+
int
vfs_isswapmount(mount_t mnt)
{
{
int error;
- lck_rw_lock_shared(rootvnode_rw_lock);
+ lck_rw_lock_shared(&rootvnode_rw_lock);
error = vnode_get(rootvnode);
- lck_rw_unlock_shared(rootvnode_rw_lock);
+ lck_rw_unlock_shared(&rootvnode_rw_lock);
if (error) {
return (vnode_t)0;
} else {
static int needbuffer;
static int need_iobuffer;
-static lck_grp_t *buf_mtx_grp;
-static lck_attr_t *buf_mtx_attr;
-static lck_grp_attr_t *buf_mtx_grp_attr;
-static lck_mtx_t *iobuffer_mtxp;
-static lck_mtx_t *buf_mtxp;
-static lck_mtx_t *buf_gc_callout;
+static LCK_GRP_DECLARE(buf_mtx_grp, "buffer cache");
+static LCK_ATTR_DECLARE(buf_mtx_attr, 0, 0);
+static LCK_MTX_DECLARE_ATTR(iobuffer_mtxp, &buf_mtx_grp, &buf_mtx_attr);
+static LCK_MTX_DECLARE_ATTR(buf_mtx, &buf_mtx_grp, &buf_mtx_attr);
+static LCK_MTX_DECLARE_ATTR(buf_gc_callout, &buf_mtx_grp, &buf_mtx_attr);
static uint32_t buf_busycount;
}
/*
- * buf_mtxp held.
+ * buf_mtx held.
*/
static __inline__ void
bmovelaundry(buf_t bp)
return 0;
}
+void
+bufattr_markexpeditedmeta(bufattr_t bap)
+{
+ SET(bap->ba_flags, BA_EXPEDITED_META_IO);
+}
+
+int
+bufattr_expeditedmeta(bufattr_t bap)
+{
+ if ((bap->ba_flags & BA_EXPEDITED_META_IO)) {
+ return 1;
+ }
+ return 0;
+}
+
errno_t
buf_error(buf_t bp)
{
}
*(buf_t *)(&io_bp->b_orig) = bp;
- lck_mtx_lock_spin(buf_mtxp);
+ lck_mtx_lock_spin(&buf_mtx);
io_bp->b_lflags |= BL_SHADOW;
io_bp->b_shadow = bp->b_shadow;
bp->b_data_ref++;
}
#endif
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
} else {
if (external_storage) {
#ifdef BUF_MAKE_PRIVATE
bcopy((caddr_t)bp->b_datap, (caddr_t)my_buf.b_datap, bp->b_bcount);
- lck_mtx_lock_spin(buf_mtxp);
+ lck_mtx_lock_spin(&buf_mtx);
for (t_bp = bp->b_shadow; t_bp; t_bp = t_bp->b_shadow) {
if (!ISSET(bp->b_lflags, BL_EXTERNAL)) {
}
if (ds_bp == NULL) {
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
buf_free_meta_store(&my_buf);
bp->b_data_ref = 0;
bp->b_datap = my_buf.b_datap;
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
KERNEL_DEBUG(0xbbbbc004 | DBG_FUNC_END, bp, bp->b_shadow_ref, 0, 0, 0);
return 0;
}
for (i = 0; i < num_lists; i++) {
- lck_mtx_lock(buf_mtxp);
+ lck_mtx_lock(&buf_mtx);
if (buf_iterprepare(vp, &local_iterblkhd, list[i].flag)) {
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
continue;
}
while (!LIST_EMPTY(&local_iterblkhd)) {
}
}
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
retval = callout(bp, arg);
if (bp) {
buf_brelse(bp);
}
- lck_mtx_lock(buf_mtxp);
+ lck_mtx_lock(&buf_mtx);
goto out;
case BUF_CLAIMED_DONE:
- lck_mtx_lock(buf_mtxp);
+ lck_mtx_lock(&buf_mtx);
goto out;
}
- lck_mtx_lock(buf_mtxp);
+ lck_mtx_lock(&buf_mtx);
} /* while list has more nodes */
out:
buf_itercomplete(vp, &local_iterblkhd, list[i].flag);
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
} /* for each list */
} /* buf_iterate */
return 0;
}
- lck_mtx_lock(buf_mtxp);
+ lck_mtx_lock(&buf_mtx);
for (;;) {
if (must_rescan == 0) {
* the lists may not be empty, but all that's left at this
* point are metadata or B_LOCKED buffers which are being
* skipped... we know this because we made it through both
- * the clean and dirty lists without dropping buf_mtxp...
- * each time we drop buf_mtxp we bump "must_rescan"
+ * the clean and dirty lists without dropping buf_mtx...
+ * each time we drop buf_mtx we bump "must_rescan"
*/
break;
}
if (error == EDEADLK) {
/*
* this buffer was marked B_LOCKED...
- * we didn't drop buf_mtxp, so we
+ * we didn't drop buf_mtx, so we
* we don't need to rescan
*/
continue;
if (error == EAGAIN) {
/*
* found a busy buffer... we blocked and
- * dropped buf_mtxp, so we're going to
+ * dropped buf_mtx, so we're going to
* need to rescan after this pass is completed
*/
must_rescan++;
*/
buf_itercomplete(vp, &local_iterblkhd, VBI_CLEAN);
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
return error;
}
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
if (bp->b_flags & B_LOCKED) {
KERNEL_DEBUG(0xbbbbc038, bp, 0, 0, 0, 0);
SET(bp->b_flags, B_INVAL);
buf_brelse(bp);
- lck_mtx_lock(buf_mtxp);
+ lck_mtx_lock(&buf_mtx);
/*
- * by dropping buf_mtxp, we allow new
+ * by dropping buf_mtx, we allow new
* buffers to be added to the vnode list(s)
* we'll have to rescan at least once more
* if the queues aren't empty
if (error == EDEADLK) {
/*
* this buffer was marked B_LOCKED...
- * we didn't drop buf_mtxp, so we
+ * we didn't drop buf_mtx, so we
* we don't need to rescan
*/
continue;
if (error == EAGAIN) {
/*
* found a busy buffer... we blocked and
- * dropped buf_mtxp, so we're going to
+ * dropped buf_mtx, so we're going to
* need to rescan after this pass is completed
*/
must_rescan++;
*/
buf_itercomplete(vp, &local_iterblkhd, VBI_DIRTY);
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
return error;
}
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
if (bp->b_flags & B_LOCKED) {
KERNEL_DEBUG(0xbbbbc038, bp, 0, 0, 1, 0);
buf_brelse(bp);
}
- lck_mtx_lock(buf_mtxp);
+ lck_mtx_lock(&buf_mtx);
/*
- * by dropping buf_mtxp, we allow new
+ * by dropping buf_mtx, we allow new
* buffers to be added to the vnode list(s)
* we'll have to rescan at least once more
* if the queues aren't empty
}
buf_itercomplete(vp, &local_iterblkhd, VBI_DIRTY);
}
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
return 0;
}
lock_flags |= BAC_SKIP_NONLOCKED;
}
loop:
- lck_mtx_lock(buf_mtxp);
+ lck_mtx_lock(&buf_mtx);
if (buf_iterprepare(vp, &local_iterblkhd, VBI_DIRTY) == 0) {
while (!LIST_EMPTY(&local_iterblkhd)) {
}
continue;
}
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
bp->b_flags &= ~B_LOCKED;
}
writes_issued++;
- lck_mtx_lock(buf_mtxp);
+ lck_mtx_lock(&buf_mtx);
}
buf_itercomplete(vp, &local_iterblkhd, VBI_DIRTY);
}
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
if (wait) {
(void)vnode_waitforwrites(vp, 0, 0, 0, msg);
/*
- * called with buf_mtxp held...
+ * called with buf_mtx held...
* this lock protects the queue manipulation
*/
static int
while (vp->v_iterblkflags & VBI_ITER) {
vp->v_iterblkflags |= VBI_ITERWANT;
- msleep(&vp->v_iterblkflags, buf_mtxp, 0, "buf_iterprepare", NULL);
+ msleep(&vp->v_iterblkflags, &buf_mtx, 0, "buf_iterprepare", NULL);
}
if (LIST_EMPTY(listheadp)) {
LIST_INIT(iterheadp);
}
/*
- * called with buf_mtxp held...
+ * called with buf_mtx held...
* this lock protects the queue manipulation
*/
static void
/*
* Associate a buffer with a vnode.
- * buf_mtxp must be locked on entry
+ * buf_mtx must be locked on entry
*/
static void
bgetvp_locked(vnode_t vp, buf_t bp)
/*
* Disassociate a buffer from a vnode.
- * buf_mtxp must be locked on entry
+ * buf_mtx must be locked on entry
*/
static void
brelvp_locked(buf_t bp)
printf("buf_reassign: NULL");
return;
}
- lck_mtx_lock_spin(buf_mtxp);
+ lck_mtx_lock_spin(&buf_mtx);
/*
* Delete from old vnode list, if on one.
}
bufinsvn(bp, listheadp);
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
}
static __inline__ void
binsheadfree(bp, &iobufqueue, -1);
}
- /*
- * allocate lock group attribute and group
- */
- buf_mtx_grp_attr = lck_grp_attr_alloc_init();
- buf_mtx_grp = lck_grp_alloc_init("buffer cache", buf_mtx_grp_attr);
-
- /*
- * allocate the lock attribute
- */
- buf_mtx_attr = lck_attr_alloc_init();
-
- /*
- * allocate and initialize mutex's for the buffer and iobuffer pools
- */
- buf_mtxp = lck_mtx_alloc_init(buf_mtx_grp, buf_mtx_attr);
- iobuffer_mtxp = lck_mtx_alloc_init(buf_mtx_grp, buf_mtx_attr);
- buf_gc_callout = lck_mtx_alloc_init(buf_mtx_grp, buf_mtx_attr);
-
- if (iobuffer_mtxp == NULL) {
- panic("couldn't create iobuffer mutex");
- }
-
- if (buf_mtxp == NULL) {
- panic("couldn't create buf mutex");
- }
-
- if (buf_gc_callout == NULL) {
- panic("couldn't create buf_gc_callout mutex");
- }
-
/*
* allocate and initialize cluster specific global locks...
*/
#endif
int need_wakeup = 0;
- lck_mtx_lock_spin(buf_mtxp);
+ lck_mtx_lock_spin(&buf_mtx);
__IGNORE_WCASTALIGN(bp_head = (buf_t)bp->b_orig);
need_wakeup = 1;
}
}
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
if (need_wakeup) {
wakeup(bp_head);
*/
buf_release_credentials(bp);
- lck_mtx_lock_spin(buf_mtxp);
+ lck_mtx_lock_spin(&buf_mtx);
if (bp->b_shadow_ref) {
SET(bp->b_lflags, BL_WAITSHADOW);
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
return;
}
if (delayed_buf_free_meta_store == TRUE) {
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
finish_shadow_master:
buf_free_meta_store(bp);
- lck_mtx_lock_spin(buf_mtxp);
+ lck_mtx_lock_spin(&buf_mtx);
}
CLR(bp->b_flags, (B_META | B_ZALLOC | B_DELWRI | B_LOCKED | B_AGE | B_ASYNC | B_NOCACHE | B_FUA));
bp->b_timestamp = buf_timestamp();
- lck_mtx_lock_spin(buf_mtxp);
+ lck_mtx_lock_spin(&buf_mtx);
/*
* the buf_brelse_shadow routine doesn't take 'ownership'
* of the parent buf_t... it updates state that is protected by
- * the buf_mtxp, and checks for BL_BUSY to determine whether to
+ * the buf_mtx, and checks for BL_BUSY to determine whether to
* put the buf_t back on a free list. b_shadow_ref is protected
* by the lock, and since we have not yet cleared B_BUSY, we need
* to check it while holding the lock to insure that one of us
if (needbuffer) {
/*
* needbuffer is a global
- * we're currently using buf_mtxp to protect it
+ * we're currently using buf_mtx to protect it
* delay doing the actual wakeup until after
- * we drop buf_mtxp
+ * we drop buf_mtx
*/
needbuffer = 0;
need_wakeup = 1;
if (ISSET(bp->b_lflags, BL_WANTED)) {
/*
* delay the actual wakeup until after we
- * clear BL_BUSY and we've dropped buf_mtxp
+ * clear BL_BUSY and we've dropped buf_mtx
*/
need_bp_wakeup = 1;
}
CLR(bp->b_lflags, (BL_BUSY | BL_WANTED));
buf_busycount--;
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
if (need_wakeup) {
/*
dp = BUFHASH(vp, blkno);
- lck_mtx_lock_spin(buf_mtxp);
+ lck_mtx_lock_spin(&buf_mtx);
if (incore_locked(vp, blkno, dp)) {
retval = TRUE;
} else {
retval = FALSE;
}
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
return retval;
}
dp = BUFHASH(vp, blkno);
- lck_mtx_lock_spin(buf_mtxp);
+ lck_mtx_lock_spin(&buf_mtx);
for (;;) {
if ((bp = incore_locked(vp, blkno, dp)) == NULL) {
SET(bp->b_lflags, BL_WANTED_REF);
- (void) msleep(bp, buf_mtxp, PSPIN | (PRIBIO + 1), "buf_wait_for_shadow", NULL);
+ (void) msleep(bp, &buf_mtx, PSPIN | (PRIBIO + 1), "buf_wait_for_shadow", NULL);
}
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
}
/* XXX FIXME -- Update the comment to reflect the UBC changes (please) -- */
operation &= ~BLK_ONLYVALID;
dp = BUFHASH(vp, blkno);
start:
- lck_mtx_lock_spin(buf_mtxp);
+ lck_mtx_lock_spin(&buf_mtx);
if ((bp = incore_locked(vp, blkno, dp))) {
/*
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 396)) | DBG_FUNC_NONE,
(uintptr_t)blkno, size, operation, 0, 0);
- err = msleep(bp, buf_mtxp, slpflag | PDROP | (PRIBIO + 1), "buf_getblk", &ts);
+ err = msleep(bp, &buf_mtx, slpflag | PDROP | (PRIBIO + 1), "buf_getblk", &ts);
/*
* Callers who call with PCATCH or timeout are
bremfree_locked(bp);
bufstats.bufs_incore++;
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
#ifdef JOE_DEBUG
bp->b_owner = current_thread();
bp->b_tag = 1;
int queue = BQ_EMPTY; /* Start with no preference */
if (ret_only_valid) {
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
return NULL;
}
if ((vnode_isreg(vp) == 0) || (UBCINFOEXISTS(vp) == 0) /*|| (vnode_issystem(vp) == 1)*/) {
SET(bp->b_flags, B_INVAL);
binshash(bp, &invalhash);
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
buf_brelse(bp);
goto start;
bgetvp_locked(vp, bp);
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
allocbuf(bp, size);
/*
* buffer data is invalid...
*
- * I don't want to have to retake buf_mtxp,
+ * I don't want to have to retake buf_mtx,
* so the miss and vmhits counters are done
* with Atomic updates... all other counters
* in bufstats are protected with either
- * buf_mtxp or iobuffer_mtxp
+ * buf_mtx or iobuffer_mtxp
*/
OSAddAtomicLong(1, &bufstats.bufs_miss);
break;
int queue = BQ_EMPTY;
do {
- lck_mtx_lock_spin(buf_mtxp);
+ lck_mtx_lock_spin(&buf_mtx);
bp = getnewbuf(0, 0, &queue);
} while (bp == NULL);
binshash(bp, &invalhash);
bufstats.bufs_eblk++;
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
allocbuf(bp, size);
buf_t bp;
void *ptr = NULL;
- lck_mtx_lock_spin(buf_mtxp);
+ lck_mtx_lock_spin(&buf_mtx);
TAILQ_FOREACH(bp, &bufqueues[BQ_META], b_freelist) {
if (ISSET(bp->b_flags, B_DELWRI) || bp->b_bufsize != (uint32_t)nsize) {
bcleanbuf(bp, TRUE);
break;
}
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
return ptr;
}
* Remove the buffer from the hash. Return the buffer and the queue
* on which it was found.
*
- * buf_mtxp is held upon entry
- * returns with buf_mtxp locked if new buf available
- * returns with buf_mtxp UNlocked if new buf NOT available
+ * buf_mtx is held upon entry
+ * returns with buf_mtx locked if new buf available
+ * returns with buf_mtx UNlocked if new buf NOT available
*/
static buf_t
*/
add_newbufs:
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
/* Create a new temporary buffer header */
bp = (struct buf *)zalloc(buf_hdr_zone);
SET(bp->b_flags, B_HDRALLOC);
*queue = BQ_EMPTY;
}
- lck_mtx_lock_spin(buf_mtxp);
+ lck_mtx_lock_spin(&buf_mtx);
if (bp) {
binshash(bp, &invalhash);
/* the hz value is 100; which leads to 10ms */
ts.tv_nsec = (slptimeo % 1000) * NSEC_PER_USEC * 1000 * 10;
- msleep(&needbuffer, buf_mtxp, slpflag | PDROP | (PRIBIO + 1), "getnewbuf", &ts);
+ msleep(&needbuffer, &buf_mtx, slpflag | PDROP | (PRIBIO + 1), "getnewbuf", &ts);
return NULL;
}
* Returns 1 if issued a buf_bawrite() to indicate
* that the buffer is not ready.
*
- * buf_mtxp is held upon entry
- * returns with buf_mtxp locked
+ * buf_mtx is held upon entry
+ * returns with buf_mtx locked
*/
int
bcleanbuf(buf_t bp, boolean_t discard)
bmovelaundry(bp);
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
wakeup(&bufqueues[BQ_LAUNDRY]);
/*
*/
(void)thread_block(THREAD_CONTINUE_NULL);
- lck_mtx_lock_spin(buf_mtxp);
+ lck_mtx_lock_spin(&buf_mtx);
return 1;
}
brelvp_locked(bp);
}
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
BLISTNONE(bp);
/* If discarding, just move to the empty queue */
if (discard) {
- lck_mtx_lock_spin(buf_mtxp);
+ lck_mtx_lock_spin(&buf_mtx);
CLR(bp->b_flags, (B_META | B_ZALLOC | B_DELWRI | B_LOCKED | B_AGE | B_ASYNC | B_NOCACHE | B_FUA));
bp->b_whichq = BQ_EMPTY;
binshash(bp, &invalhash);
bp->b_validoff = bp->b_validend = 0;
bzero(&bp->b_attr, sizeof(struct bufattr));
- lck_mtx_lock_spin(buf_mtxp);
+ lck_mtx_lock_spin(&buf_mtx);
}
return 0;
}
dp = BUFHASH(vp, lblkno);
relook:
- lck_mtx_lock_spin(buf_mtxp);
+ lck_mtx_lock_spin(&buf_mtx);
if ((bp = incore_locked(vp, lblkno, dp)) == (struct buf *)0) {
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
return 0;
}
if (ISSET(bp->b_lflags, BL_BUSY)) {
if (!ISSET(flags, BUF_WAIT)) {
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
return EBUSY;
}
SET(bp->b_lflags, BL_WANTED);
- error = msleep((caddr_t)bp, buf_mtxp, PDROP | (PRIBIO + 1), "buf_invalblkno", NULL);
+ error = msleep((caddr_t)bp, &buf_mtx, PDROP | (PRIBIO + 1), "buf_invalblkno", NULL);
if (error) {
return error;
bp->b_owner = current_thread();
bp->b_tag = 4;
#endif
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
buf_brelse(bp);
return 0;
{
int need_wakeup = 0;
- lck_mtx_lock_spin(buf_mtxp);
+ lck_mtx_lock_spin(&buf_mtx);
if (ISSET(bp->b_lflags, BL_WANTED)) {
/*
* delay the actual wakeup until after we
- * clear BL_BUSY and we've dropped buf_mtxp
+ * clear BL_BUSY and we've dropped buf_mtx
*/
need_wakeup = 1;
}
CLR(bp->b_lflags, (BL_BUSY | BL_WANTED));
buf_busycount--;
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
if (need_wakeup) {
/*
{
errno_t error;
- lck_mtx_lock_spin(buf_mtxp);
+ lck_mtx_lock_spin(&buf_mtx);
error = buf_acquire_locked(bp, flags, slpflag, slptimeo);
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
return error;
}
/* the hz value is 100; which leads to 10ms */
ts.tv_sec = (slptimeo / 100);
ts.tv_nsec = (slptimeo % 100) * 10 * NSEC_PER_USEC * 1000;
- error = msleep((caddr_t)bp, buf_mtxp, slpflag | (PRIBIO + 1), "buf_acquire", &ts);
+ error = msleep((caddr_t)bp, &buf_mtx, slpflag | (PRIBIO + 1), "buf_acquire", &ts);
if (error) {
return error;
buf_biowait(buf_t bp)
{
while (!ISSET(bp->b_flags, B_DONE)) {
- lck_mtx_lock_spin(buf_mtxp);
+ lck_mtx_lock_spin(&buf_mtx);
if (!ISSET(bp->b_flags, B_DONE)) {
DTRACE_IO1(wait__start, buf_t, bp);
- (void) msleep(bp, buf_mtxp, PDROP | (PRIBIO + 1), "buf_biowait", NULL);
+ (void) msleep(bp, &buf_mtx, PDROP | (PRIBIO + 1), "buf_biowait", NULL);
DTRACE_IO1(wait__done, buf_t, bp);
} else {
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
}
}
/* check for interruption of I/O (e.g. via NFS), then errors. */
* they do get to run, their going to re-set
* BL_WANTED and go back to sleep
*/
- lck_mtx_lock_spin(buf_mtxp);
+ lck_mtx_lock_spin(&buf_mtx);
CLR(bp->b_lflags, BL_WANTED);
SET(bp->b_flags, B_DONE); /* note that it's done */
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
wakeup(bp);
}
buf_t bp;
int n = 0;
- lck_mtx_lock_spin(buf_mtxp);
+ lck_mtx_lock_spin(&buf_mtx);
for (bp = bufqueues[BQ_LOCKED].tqh_first; bp;
bp = bp->b_freelist.tqe_next) {
n++;
}
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
return n;
}
counts[j] = 0;
}
- lck_mtx_lock(buf_mtxp);
+ lck_mtx_lock(&buf_mtx);
for (bp = dp->tqh_first; bp; bp = bp->b_freelist.tqe_next) {
counts[bp->b_bufsize / CLBYTES]++;
count++;
}
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
printf("%s: total-%d", bname[i], count);
for (j = 0; j <= MAXBSIZE / CLBYTES; j++) {
mount_t mp = NULL;
int alloc_for_virtualdev = FALSE;
- lck_mtx_lock_spin(iobuffer_mtxp);
+ lck_mtx_lock_spin(&iobuffer_mtxp);
/*
* We subject iobuf requests for diskimages to additional restrictions.
bufstats.bufs_iobufsleeps++;
need_iobuffer = 1;
- (void)msleep(&need_iobuffer, iobuffer_mtxp,
+ (void)msleep(&need_iobuffer, &iobuffer_mtxp,
PSPIN | (PRIBIO + 1), (const char *)"alloc_io_buf (1)",
NULL);
}
bufstats.bufs_iobufsleeps++;
need_iobuffer = 1;
- (void)msleep(&need_iobuffer, iobuffer_mtxp, PSPIN | (PRIBIO + 1),
+ (void)msleep(&need_iobuffer, &iobuffer_mtxp, PSPIN | (PRIBIO + 1),
(const char *)"alloc_io_buf (2)", NULL);
}
TAILQ_REMOVE(&iobufqueue, bp, b_freelist);
bufstats.bufs_iobufinuse_vdev++;
}
- lck_mtx_unlock(iobuffer_mtxp);
+ lck_mtx_unlock(&iobuffer_mtxp);
/*
* initialize various fields
/* Zero out the bufattr and its flags before relinquishing this iobuf */
bzero(&bp->b_attr, sizeof(struct bufattr));
- lck_mtx_lock_spin(iobuffer_mtxp);
+ lck_mtx_lock_spin(&iobuffer_mtxp);
binsheadfree(bp, &iobufqueue, -1);
}
}
- lck_mtx_unlock(iobuffer_mtxp);
+ lck_mtx_unlock(&iobuffer_mtxp);
if (need_wakeup) {
wakeup(&need_iobuffer);
void
buf_list_lock(void)
{
- lck_mtx_lock_spin(buf_mtxp);
+ lck_mtx_lock_spin(&buf_mtx);
}
void
buf_list_unlock(void)
{
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
}
/*
int loopcnt = 0;
for (;;) {
- lck_mtx_lock_spin(buf_mtxp);
+ lck_mtx_lock_spin(&buf_mtx);
while ((bp = TAILQ_FIRST(&bufqueues[BQ_LAUNDRY])) == NULL) {
- (void)msleep0(&bufqueues[BQ_LAUNDRY], buf_mtxp, PRIBIO | PDROP, "blaundry", 0, (bcleanbufcontinuation)bcleanbuf_thread);
+ (void)msleep0(&bufqueues[BQ_LAUNDRY], &buf_mtx, PRIBIO | PDROP, "blaundry", 0, (bcleanbufcontinuation)bcleanbuf_thread);
}
/*
bp->b_tag = 10;
#endif
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
/*
* do the IO
*/
bp->b_whichq = BQ_LAUNDRY;
bp->b_timestamp = buf_timestamp();
- lck_mtx_lock_spin(buf_mtxp);
+ lck_mtx_lock_spin(&buf_mtx);
binstailfree(bp, &bufqueues[BQ_LAUNDRY], BQ_LAUNDRY);
blaundrycnt++;
bp->b_tag = 11;
#endif
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
if (loopcnt > MAXLAUNDRY) {
/*
int
fs_buffer_cache_gc_register(void (* callout)(int, void *), void *context)
{
- lck_mtx_lock(buf_gc_callout);
+ lck_mtx_lock(&buf_gc_callout);
for (int i = 0; i < FS_BUFFER_CACHE_GC_CALLOUTS_MAX_SIZE; i++) {
if (fs_callouts[i].callout == NULL) {
fs_callouts[i].callout = callout;
fs_callouts[i].context = context;
- lck_mtx_unlock(buf_gc_callout);
+ lck_mtx_unlock(&buf_gc_callout);
return 0;
}
}
- lck_mtx_unlock(buf_gc_callout);
+ lck_mtx_unlock(&buf_gc_callout);
return ENOMEM;
}
int
fs_buffer_cache_gc_unregister(void (* callout)(int, void *), void *context)
{
- lck_mtx_lock(buf_gc_callout);
+ lck_mtx_lock(&buf_gc_callout);
for (int i = 0; i < FS_BUFFER_CACHE_GC_CALLOUTS_MAX_SIZE; i++) {
if (fs_callouts[i].callout == callout &&
fs_callouts[i].context == context) {
fs_callouts[i].context = NULL;
}
}
- lck_mtx_unlock(buf_gc_callout);
+ lck_mtx_unlock(&buf_gc_callout);
return 0;
}
static void
fs_buffer_cache_gc_dispatch_callouts(int all)
{
- lck_mtx_lock(buf_gc_callout);
+ lck_mtx_lock(&buf_gc_callout);
for (int i = 0; i < FS_BUFFER_CACHE_GC_CALLOUTS_MAX_SIZE; i++) {
if (fs_callouts[i].callout != NULL) {
fs_callouts[i].callout(all, fs_callouts[i].context);
}
}
- lck_mtx_unlock(buf_gc_callout);
+ lck_mtx_unlock(&buf_gc_callout);
}
static boolean_t
* for deep sleep), we only evict up to BUF_MAX_GC_BATCH_SIZE buffers
* that have not been accessed in the last BUF_STALE_THRESHOLD seconds.
* BUF_MAX_GC_BATCH_SIZE controls both the hold time of the global lock
- * "buf_mtxp" and the length of time we spend compute bound in the GC
+ * "buf_mtx" and the length of time we spend compute bound in the GC
* thread which calls this function
*/
- lck_mtx_lock(buf_mtxp);
+ lck_mtx_lock(&buf_mtx);
do {
found = 0;
}
/* Drop lock for batch processing */
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
/* Wakeup and yield for laundry if need be */
if (need_wakeup) {
bp->b_whichq = BQ_EMPTY;
BLISTNONE(bp);
}
- lck_mtx_lock(buf_mtxp);
+ lck_mtx_lock(&buf_mtx);
/* Back under lock, move them all to invalid hash and clear busy */
TAILQ_FOREACH(bp, &privq, b_freelist) {
TAILQ_CONCAT(&bufqueues[BQ_EMPTY], &privq, b_freelist);
} while (all && (found == BUF_MAX_GC_BATCH_SIZE));
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
fs_buffer_cache_gc_dispatch_callouts(all);
}
restart:
- lck_mtx_lock(buf_mtxp);
+ lck_mtx_lock(&buf_mtx);
bp = TAILQ_FIRST(&bufqueues[whichq]);
total_writes++;
if (buf_count >= NFLUSH) {
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
qsort(flush_table, buf_count, sizeof(struct buf *), bp_cmp);
}
}
}
- lck_mtx_unlock(buf_mtxp);
+ lck_mtx_unlock(&buf_mtx);
if (buf_count > 0) {
qsort(flush_table, buf_count, sizeof(struct buf *), bp_cmp);
/* vars for name cache list lock */
-lck_grp_t * namecache_lck_grp;
-lck_grp_attr_t * namecache_lck_grp_attr;
-lck_attr_t * namecache_lck_attr;
+static LCK_GRP_DECLARE(namecache_lck_grp, "Name Cache");
+static LCK_RW_DECLARE(namecache_rw_lock, &namecache_lck_grp);
-lck_grp_t * strcache_lck_grp;
-lck_grp_attr_t * strcache_lck_grp_attr;
-lck_attr_t * strcache_lck_attr;
+static LCK_GRP_DECLARE(strcache_lck_grp, "String Cache");
+static LCK_ATTR_DECLARE(strcache_lck_attr, 0, 0);
+LCK_RW_DECLARE_ATTR(strtable_rw_lock, &strcache_lck_grp, &strcache_lck_attr);
-lck_grp_t * rootvnode_lck_grp;
-lck_grp_attr_t * rootvnode_lck_grp_attr;
-lck_attr_t * rootvnode_lck_attr;
-
-lck_rw_t * namecache_rw_lock;
-lck_rw_t * strtable_rw_lock;
-lck_rw_t * rootvnode_rw_lock;
+static LCK_GRP_DECLARE(rootvnode_lck_grp, "rootvnode");
+LCK_RW_DECLARE(rootvnode_rw_lock, &rootvnode_lck_grp);
#define NUM_STRCACHE_LOCKS 1024
void
nchinit(void)
{
- int i;
-
desiredNegNodes = (desiredvnodes / 10);
desiredNodes = desiredvnodes + desiredNegNodes;
init_string_table();
- /* Allocate name cache lock group attribute and group */
- namecache_lck_grp_attr = lck_grp_attr_alloc_init();
-
- namecache_lck_grp = lck_grp_alloc_init("Name Cache", namecache_lck_grp_attr);
-
- /* Allocate name cache lock attribute */
- namecache_lck_attr = lck_attr_alloc_init();
-
- /* Allocate name cache lock */
- namecache_rw_lock = lck_rw_alloc_init(namecache_lck_grp, namecache_lck_attr);
-
-
- /* Allocate string cache lock group attribute and group */
- strcache_lck_grp_attr = lck_grp_attr_alloc_init();
-
- strcache_lck_grp = lck_grp_alloc_init("String Cache", strcache_lck_grp_attr);
-
- /* Allocate string cache lock attribute */
- strcache_lck_attr = lck_attr_alloc_init();
-
- /* Allocate string cache lock */
- strtable_rw_lock = lck_rw_alloc_init(strcache_lck_grp, strcache_lck_attr);
-
- for (i = 0; i < NUM_STRCACHE_LOCKS; i++) {
- lck_mtx_init(&strcache_mtx_locks[i], strcache_lck_grp, strcache_lck_attr);
+ for (int i = 0; i < NUM_STRCACHE_LOCKS; i++) {
+ lck_mtx_init(&strcache_mtx_locks[i], &strcache_lck_grp, &strcache_lck_attr);
}
-
- /* Allocate root vnode lock group attribute and group */
- rootvnode_lck_grp_attr = lck_grp_attr_alloc_init();
-
- rootvnode_lck_grp = lck_grp_alloc_init("rootvnode", rootvnode_lck_grp_attr);
-
- /* Allocate rootvnode lock attribute */
- rootvnode_lck_attr = lck_attr_alloc_init();
-
- /* Allocate rootvnode lock */
- rootvnode_rw_lock = lck_rw_alloc_init(rootvnode_lck_grp, rootvnode_lck_attr);
}
void
name_cache_lock_shared(void)
{
- lck_rw_lock_shared(namecache_rw_lock);
+ lck_rw_lock_shared(&namecache_rw_lock);
}
void
name_cache_lock(void)
{
- lck_rw_lock_exclusive(namecache_rw_lock);
+ lck_rw_lock_exclusive(&namecache_rw_lock);
}
void
name_cache_unlock(void)
{
- lck_rw_done(namecache_rw_lock);
+ lck_rw_done(&namecache_rw_lock);
}
* the lock exclusively in case some other thread
* beat us to the punch
*/
- lck_rw_lock_exclusive(strtable_rw_lock);
+ lck_rw_lock_exclusive(&strtable_rw_lock);
if (4 * filled_buckets < ((string_table_mask + 1) * 3)) {
- lck_rw_done(strtable_rw_lock);
+ lck_rw_done(&strtable_rw_lock);
return;
}
assert(string_table_mask < INT32_MAX);
if (new_table == NULL) {
printf("failed to resize the hash table.\n");
- lck_rw_done(strtable_rw_lock);
+ lck_rw_done(&strtable_rw_lock);
return;
}
LIST_INSERT_HEAD(head, entry, hash_chain);
}
}
- lck_rw_done(strtable_rw_lock);
+ lck_rw_done(&strtable_rw_lock);
FREE(old_table, M_CACHE);
}
* if someone else decides to grow the pool they
* will take this lock exclusively
*/
- lck_rw_lock_shared(strtable_rw_lock);
+ lck_rw_lock_shared(&strtable_rw_lock);
/*
* If the table gets more than 3/4 full, resize it
*/
if (4 * filled_buckets >= ((string_table_mask + 1) * 3)) {
- lck_rw_done(strtable_rw_lock);
+ lck_rw_done(&strtable_rw_lock);
resize_string_ref_table();
- lck_rw_lock_shared(strtable_rw_lock);
+ lck_rw_lock_shared(&strtable_rw_lock);
}
hash_index = hashval & string_table_mask;
lock_index = hash_index % NUM_STRCACHE_LOCKS;
}
lck_mtx_unlock(&strcache_mtx_locks[lock_index]);
- lck_rw_done(strtable_rw_lock);
+ lck_rw_done(&strtable_rw_lock);
return (const char *)entry->str;
}
* if someone else decides to grow the pool they
* will take this lock exclusively
*/
- lck_rw_lock_shared(strtable_rw_lock);
+ lck_rw_lock_shared(&strtable_rw_lock);
/*
* must compute the head behind the table lock
* since the size and location of the table
}
}
lck_mtx_unlock(&strcache_mtx_locks[lock_index]);
- lck_rw_done(strtable_rw_lock);
+ lck_rw_done(&strtable_rw_lock);
kheap_free_addr(KHEAP_DEFAULT, entry);
string_t *entry;
u_long i;
- lck_rw_lock_shared(strtable_rw_lock);
+ lck_rw_lock_shared(&strtable_rw_lock);
for (i = 0; i <= string_table_mask; i++) {
head = &string_ref_table[i];
printf("%6d - %s\n", entry->refcount, entry->str);
}
}
- lck_rw_done(strtable_rw_lock);
+ lck_rw_done(&strtable_rw_lock);
}
#endif /* DUMP_STRING_TABLE */
//
// Locks
//
-static lck_grp_attr_t * fsevent_group_attr;
-static lck_attr_t * fsevent_lock_attr;
-static lck_grp_t * fsevent_mutex_group;
+static LCK_ATTR_DECLARE(fsevent_lock_attr, 0, 0);
+static LCK_GRP_DECLARE(fsevent_mutex_group, "fsevent-mutex");
+static LCK_GRP_DECLARE(fsevent_rw_group, "fsevent-rw");
-static lck_grp_t * fsevent_rw_group;
-
-static lck_rw_t event_handling_lock; // handles locking for event manipulation and recycling
-static lck_mtx_t watch_table_lock;
-static lck_mtx_t event_buf_lock;
-static lck_mtx_t event_writer_lock;
+static LCK_RW_DECLARE_ATTR(event_handling_lock, // handles locking for event manipulation and recycling
+ &fsevent_rw_group, &fsevent_lock_attr);
+static LCK_MTX_DECLARE_ATTR(watch_table_lock,
+ &fsevent_mutex_group, &fsevent_lock_attr);
+static LCK_MTX_DECLARE_ATTR(event_buf_lock,
+ &fsevent_mutex_group, &fsevent_lock_attr);
+static LCK_MTX_DECLARE_ATTR(event_writer_lock,
+ &fsevent_mutex_group, &fsevent_lock_attr);
/* Explicitly declare qsort so compiler doesn't complain */
memset(watcher_table, 0, sizeof(watcher_table));
- fsevent_lock_attr = lck_attr_alloc_init();
- fsevent_group_attr = lck_grp_attr_alloc_init();
- fsevent_mutex_group = lck_grp_alloc_init("fsevent-mutex", fsevent_group_attr);
- fsevent_rw_group = lck_grp_alloc_init("fsevent-rw", fsevent_group_attr);
-
- lck_mtx_init(&watch_table_lock, fsevent_mutex_group, fsevent_lock_attr);
- lck_mtx_init(&event_buf_lock, fsevent_mutex_group, fsevent_lock_attr);
- lck_mtx_init(&event_writer_lock, fsevent_mutex_group, fsevent_lock_attr);
-
- lck_rw_init(&event_handling_lock, fsevent_rw_group, fsevent_lock_attr);
-
PE_get_default("kern.maxkfsevents", &max_kfs_events, sizeof(max_kfs_events));
event_zone = zone_create_ext("fs-event-buf", sizeof(kfs_event),
ZC_NOGC | ZC_NOCALLOUT, ZONE_ID_ANY, ^(zone_t z) {
// mark the zone as exhaustible so that it will not
// ever grow beyond what we initially filled it with
- zone_set_exhaustible(z, max_kfs_events * sizeof(kfs_event));
+ zone_set_exhaustible(z, max_kfs_events);
});
- if (zfill(event_zone, max_kfs_events) < max_kfs_events) {
- printf("fsevents: failed to pre-fill the event zone.\n");
- }
+ zone_fill_initially(event_zone, max_kfs_events);
}
static void
* Log information about floating point exception handling
*/
-static lck_mtx_t fpxlock;
-
-void
-fpxlog_init(void)
-{
- lck_grp_attr_t *lck_grp_attr = lck_grp_attr_alloc_init();
- lck_grp_t *lck_grp = lck_grp_alloc_init("fpx", lck_grp_attr);
- lck_mtx_init(&fpxlock, lck_grp, LCK_ATTR_NULL);
-}
+static LCK_GRP_DECLARE(fpxlock_grp, "fpx");
+static LCK_MTX_DECLARE(fpxlock, &fpxlock_grp);
struct fpx_event {
uuid_t fe_uuid;
NULL);
}
-#else
-
-void
-fpxlog_init(void)
-{
-}
-
#endif /* __x86_64__ */
extern struct vnodeops dead_vnodeops;
extern struct vnodeops spec_vnodeops;
-/* vars for vnode lock */
-lck_grp_t * vnode_lck_grp;
-lck_grp_attr_t * vnode_lck_grp_attr;
-lck_attr_t * vnode_lck_attr;
-
-#if CONFIG_TRIGGERS
-/* vars for vnode trigger resolver */
-lck_grp_t * trigger_vnode_lck_grp;
-lck_grp_attr_t * trigger_vnode_lck_grp_attr;
-lck_attr_t * trigger_vnode_lck_attr;
-#endif
-
-lck_grp_t * fd_vn_lck_grp;
-lck_grp_attr_t * fd_vn_lck_grp_attr;
-lck_attr_t * fd_vn_lck_attr;
-
/* vars for vnode list lock */
-lck_grp_t * vnode_list_lck_grp;
-lck_grp_attr_t * vnode_list_lck_grp_attr;
-lck_attr_t * vnode_list_lck_attr;
-lck_spin_t * vnode_list_spin_lock;
-lck_mtx_t * spechash_mtx_lock;
-
-/* vars for vfsconf lock */
-lck_grp_t * fsconf_lck_grp;
-lck_grp_attr_t * fsconf_lck_grp_attr;
-lck_attr_t * fsconf_lck_attr;
-
+static LCK_GRP_DECLARE(vnode_list_lck_grp, "vnode list");
+static LCK_ATTR_DECLARE(vnode_list_lck_attr, 0, 0);
+static LCK_SPIN_DECLARE_ATTR(vnode_list_spin_lock,
+ &vnode_list_lck_grp, &vnode_list_lck_attr);
+static LCK_MTX_DECLARE_ATTR(spechash_mtx_lock,
+ &vnode_list_lck_grp, &vnode_list_lck_attr);
+LCK_MTX_DECLARE_ATTR(pkg_extensions_lck,
+ &vnode_list_lck_grp, &vnode_list_lck_attr);
/* vars for mount lock */
-lck_grp_t * mnt_lck_grp;
-lck_grp_attr_t * mnt_lck_grp_attr;
-lck_attr_t * mnt_lck_attr;
+static LCK_GRP_DECLARE(mnt_lck_grp, "mount");
+static LCK_ATTR_DECLARE(mnt_lck_attr, 0, 0);
/* vars for mount list lock */
-lck_grp_t * mnt_list_lck_grp;
-lck_grp_attr_t * mnt_list_lck_grp_attr;
-lck_attr_t * mnt_list_lck_attr;
-lck_mtx_t * mnt_list_mtx_lock;
-
-/* vars for sync mutex */
-lck_grp_t * sync_mtx_lck_grp;
-lck_grp_attr_t * sync_mtx_lck_grp_attr;
-lck_attr_t * sync_mtx_lck_attr;
-lck_mtx_t * sync_mtx_lck;
-
-lck_mtx_t *pkg_extensions_lck;
+static LCK_GRP_DECLARE(mnt_list_lck_grp, "mount list");
+LCK_MTX_DECLARE(mnt_list_mtx_lock, &mnt_list_lck_grp);
struct mount * dead_mountp;
int i, maxtypenum;
struct mount * mp;
- /* Allocate vnode list lock group attribute and group */
- vnode_list_lck_grp_attr = lck_grp_attr_alloc_init();
-
- vnode_list_lck_grp = lck_grp_alloc_init("vnode list", vnode_list_lck_grp_attr);
-
- /* Allocate vnode list lock attribute */
- vnode_list_lck_attr = lck_attr_alloc_init();
-
- /* Allocate vnode list lock */
- vnode_list_spin_lock = lck_spin_alloc_init(vnode_list_lck_grp, vnode_list_lck_attr);
-
- /* Allocate spec hash list lock */
- spechash_mtx_lock = lck_mtx_alloc_init(vnode_list_lck_grp, vnode_list_lck_attr);
-
- /* Allocate the package extensions table lock */
- pkg_extensions_lck = lck_mtx_alloc_init(vnode_list_lck_grp, vnode_list_lck_attr);
-
- /* allocate vnode lock group attribute and group */
- vnode_lck_grp_attr = lck_grp_attr_alloc_init();
-
- vnode_lck_grp = lck_grp_alloc_init("vnode", vnode_lck_grp_attr);
-
- /* Allocate vnode lock attribute */
- vnode_lck_attr = lck_attr_alloc_init();
-
-#if CONFIG_TRIGGERS
- trigger_vnode_lck_grp_attr = lck_grp_attr_alloc_init();
- trigger_vnode_lck_grp = lck_grp_alloc_init("trigger_vnode", trigger_vnode_lck_grp_attr);
- trigger_vnode_lck_attr = lck_attr_alloc_init();
-#endif
- /* Allocate per fd vnode data lock attribute and group */
- fd_vn_lck_grp_attr = lck_grp_attr_alloc_init();
- fd_vn_lck_grp = lck_grp_alloc_init("fd_vnode_data", fd_vn_lck_grp_attr);
- fd_vn_lck_attr = lck_attr_alloc_init();
-
- /* Allocate fs config lock group attribute and group */
- fsconf_lck_grp_attr = lck_grp_attr_alloc_init();
-
- fsconf_lck_grp = lck_grp_alloc_init("fs conf", fsconf_lck_grp_attr);
-
- /* Allocate fs config lock attribute */
- fsconf_lck_attr = lck_attr_alloc_init();
-
- /* Allocate mount point related lock structures */
-
- /* Allocate mount list lock group attribute and group */
- mnt_list_lck_grp_attr = lck_grp_attr_alloc_init();
-
- mnt_list_lck_grp = lck_grp_alloc_init("mount list", mnt_list_lck_grp_attr);
-
- /* Allocate mount list lock attribute */
- mnt_list_lck_attr = lck_attr_alloc_init();
-
- /* Allocate mount list lock */
- mnt_list_mtx_lock = lck_mtx_alloc_init(mnt_list_lck_grp, mnt_list_lck_attr);
-
-
- /* allocate mount lock group attribute and group */
- mnt_lck_grp_attr = lck_grp_attr_alloc_init();
-
- mnt_lck_grp = lck_grp_alloc_init("mount", mnt_lck_grp_attr);
-
- /* Allocate mount lock attribute */
- mnt_lck_attr = lck_attr_alloc_init();
-
- /* Allocate sync lock */
- sync_mtx_lck_grp_attr = lck_grp_attr_alloc_init();
- sync_mtx_lck_grp = lck_grp_alloc_init("sync thread", sync_mtx_lck_grp_attr);
- sync_mtx_lck_attr = lck_attr_alloc_init();
- sync_mtx_lck = lck_mtx_alloc_init(sync_mtx_lck_grp, sync_mtx_lck_attr);
-
/*
* Initialize the vnode table
*/
*/
vnode_authorize_init();
- /*
- * Initialiize the quota system.
- */
-#if QUOTA
- dqinit();
-#endif
-
/*
* create a mount point for dead vnodes
*/
void
vnode_list_lock(void)
{
- lck_spin_lock_grp(vnode_list_spin_lock, vnode_list_lck_grp);
+ lck_spin_lock_grp(&vnode_list_spin_lock, &vnode_list_lck_grp);
}
void
vnode_list_unlock(void)
{
- lck_spin_unlock(vnode_list_spin_lock);
+ lck_spin_unlock(&vnode_list_spin_lock);
}
void
mount_list_lock(void)
{
- lck_mtx_lock(mnt_list_mtx_lock);
+ lck_mtx_lock(&mnt_list_mtx_lock);
}
void
mount_list_unlock(void)
{
- lck_mtx_unlock(mnt_list_mtx_lock);
+ lck_mtx_unlock(&mnt_list_mtx_lock);
}
void
mount_lock_init(mount_t mp)
{
- lck_mtx_init(&mp->mnt_mlock, mnt_lck_grp, mnt_lck_attr);
- lck_mtx_init(&mp->mnt_iter_lock, mnt_lck_grp, mnt_lck_attr);
- lck_mtx_init(&mp->mnt_renamelock, mnt_lck_grp, mnt_lck_attr);
- lck_rw_init(&mp->mnt_rwlock, mnt_lck_grp, mnt_lck_attr);
+ lck_mtx_init(&mp->mnt_mlock, &mnt_lck_grp, &mnt_lck_attr);
+ lck_mtx_init(&mp->mnt_iter_lock, &mnt_lck_grp, &mnt_lck_attr);
+ lck_mtx_init(&mp->mnt_renamelock, &mnt_lck_grp, &mnt_lck_attr);
+ lck_rw_init(&mp->mnt_rwlock, &mnt_lck_grp, &mnt_lck_attr);
}
void
mount_lock_destroy(mount_t mp)
{
- lck_mtx_destroy(&mp->mnt_mlock, mnt_lck_grp);
- lck_mtx_destroy(&mp->mnt_iter_lock, mnt_lck_grp);
- lck_mtx_destroy(&mp->mnt_renamelock, mnt_lck_grp);
- lck_rw_destroy(&mp->mnt_rwlock, mnt_lck_grp);
+ lck_mtx_destroy(&mp->mnt_mlock, &mnt_lck_grp);
+ lck_mtx_destroy(&mp->mnt_iter_lock, &mnt_lck_grp);
+ lck_mtx_destroy(&mp->mnt_renamelock, &mnt_lck_grp);
+ lck_rw_destroy(&mp->mnt_rwlock, &mnt_lck_grp);
}
struct vfstable *vcdelp;
#if DEBUG
- lck_mtx_assert(mnt_list_mtx_lock, LCK_MTX_ASSERT_OWNED);
+ lck_mtx_assert(&mnt_list_mtx_lock, LCK_MTX_ASSERT_OWNED);
#endif /* DEBUG */
/*
}
#if DEBUG
- lck_mtx_assert(mnt_list_mtx_lock, LCK_MTX_ASSERT_OWNED);
+ lck_mtx_assert(&mnt_list_mtx_lock, LCK_MTX_ASSERT_OWNED);
#endif /* DEBUG */
return 0;
void
SPECHASH_LOCK(void)
{
- lck_mtx_lock(spechash_mtx_lock);
+ lck_mtx_lock(&spechash_mtx_lock);
}
void
SPECHASH_UNLOCK(void)
{
- lck_mtx_unlock(spechash_mtx_lock);
+ lck_mtx_unlock(&spechash_mtx_lock);
}
--- /dev/null
+/*
+ * Copyright (c) 2020 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <kern/cpu_data.h>
+#include <kern/cpu_number.h>
+#include <kern/host.h>
+
+#include <mach/host_priv.h>
+#include <mach/host_special_ports.h>
+#include <mach/host_info.h>
+#include <mach/iocompressionstats_notification_server.h>
+#include <mach/mach_host.h>
+
+#include <sys/mount_internal.h>
+#include <sys/param.h>
+#include <sys/sysctl.h>
+#include <sys/vnode.h>
+#include <sys/vnode_internal.h>
+
+#include <vfs/vfs_io_compression_stats.h>
+
+#include <vm/lz4.h>
+#include <vm/vm_compressor_algorithms.h>
+#include <vm/vm_protos.h>
+
+
+int io_compression_stats_enable = 0;
+int io_compression_stats_block_size = IO_COMPRESSION_STATS_DEFAULT_BLOCK_SIZE;
+
+#define LZ4_SCRATCH_ALIGN (64)
+typedef struct {
+ uint8_t lz4state[lz4_encode_scratch_size]__attribute((aligned(LZ4_SCRATCH_ALIGN)));
+} lz4_encode_scratch_t;
+
+lz4_encode_scratch_t **per_cpu_scratch_buf;
+uint8_t **per_cpu_compression_buf;
+uint32_t io_compression_stats_cpu_count;
+char *vnpath_scratch_buf;
+
+LCK_GRP_DECLARE(io_compression_stats_lckgrp, "io_compression_stats");
+LCK_RW_DECLARE(io_compression_stats_lock, &io_compression_stats_lckgrp);
+LCK_MTX_DECLARE(iocs_store_buffer_lock, &io_compression_stats_lckgrp);
+
+typedef enum io_compression_stats_allocate_type {
+ IO_COMPRESSION_STATS_NEW_ALLOC = 0,
+ IO_COMPRESSION_STATS_RESIZE = 1
+} io_compression_stats_alloc_type_t;
+
+static void io_compression_stats_deallocate_compression_buffers(void);
+
+struct iocs_store_buffer iocs_store_buffer = {
+ .buffer = 0,
+ .current_position = 0,
+ .marked_point = 0
+};
+
+int iocs_sb_bytes_since_last_mark = 0;
+int iocs_sb_bytes_since_last_notification = 0;
+
+ZONE_DECLARE(io_compression_stats_zone, "io_compression_stats",
+ sizeof(struct io_compression_stats), ZC_NOENCRYPT | ZC_NOGC | ZC_ZFREE_CLEARMEM);
+
+static int
+io_compression_stats_allocate_compression_buffers(io_compression_stats_alloc_type_t alloc_type, uint32_t block_size)
+{
+ int err = 0;
+ host_basic_info_data_t hinfo;
+ mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
+#define BSD_HOST 1
+ host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
+
+ io_compression_stats_cpu_count = hinfo.max_cpus;
+ if (alloc_type == IO_COMPRESSION_STATS_NEW_ALLOC) {
+ assert(per_cpu_scratch_buf == NULL);
+ per_cpu_scratch_buf = kheap_alloc(KHEAP_DEFAULT, sizeof(lz4_encode_scratch_t *) * io_compression_stats_cpu_count, Z_ZERO);
+ if (per_cpu_scratch_buf == NULL) {
+ err = ENOMEM;
+ goto out;
+ }
+ assert(per_cpu_compression_buf == NULL);
+ per_cpu_compression_buf = kheap_alloc(KHEAP_DEFAULT, sizeof(uint8_t *) * io_compression_stats_cpu_count, Z_ZERO);
+ if (per_cpu_compression_buf == NULL) {
+ err = ENOMEM;
+ goto out;
+ }
+ }
+ for (uint32_t cpu = 0; cpu < io_compression_stats_cpu_count; cpu++) {
+ if (alloc_type == IO_COMPRESSION_STATS_NEW_ALLOC) {
+ per_cpu_scratch_buf[cpu] = kheap_alloc(KHEAP_DEFAULT, sizeof(lz4_encode_scratch_t), Z_ZERO);
+ if (per_cpu_scratch_buf[cpu] == NULL) {
+ err = ENOMEM;
+ goto out;
+ }
+ } else {
+ kheap_free_addr(KHEAP_DEFAULT, per_cpu_compression_buf[cpu]);
+ }
+ per_cpu_compression_buf[cpu] = kheap_alloc(KHEAP_DEFAULT, block_size, Z_ZERO);
+ if (per_cpu_compression_buf[cpu] == NULL) {
+ err = ENOMEM;
+ goto out;
+ }
+ }
+ bzero(&iocs_store_buffer, sizeof(struct iocs_store_buffer));
+ iocs_store_buffer.buffer = kheap_alloc(KHEAP_DEFAULT, IOCS_STORE_BUFFER_SIZE, Z_ZERO);
+ if (iocs_store_buffer.buffer == NULL) {
+ err = ENOMEM;
+ goto out;
+ }
+ iocs_store_buffer.current_position = 0;
+ iocs_store_buffer.marked_point = 0;
+
+ assert(vnpath_scratch_buf == NULL);
+ vnpath_scratch_buf = kheap_alloc(KHEAP_DEFAULT, MAXPATHLEN, Z_ZERO);
+ if (vnpath_scratch_buf == NULL) {
+ err = ENOMEM;
+ goto out;
+ }
+
+out:
+ if (err) {
+ /* In case of any error, irrespective of whether it is new alloc or resize,
+ * dellocate all buffers and fail */
+ io_compression_stats_deallocate_compression_buffers();
+ }
+ return err;
+}
+
+static void
+io_compression_stats_deallocate_compression_buffers()
+{
+ uint32_t cpu;
+ if (per_cpu_compression_buf != NULL) {
+ for (cpu = 0; cpu < io_compression_stats_cpu_count; cpu++) {
+ if (per_cpu_compression_buf[cpu] != NULL) {
+ kheap_free_addr(KHEAP_DEFAULT, per_cpu_compression_buf[cpu]);
+ per_cpu_compression_buf[cpu] = NULL;
+ }
+ }
+ kheap_free_addr(KHEAP_DEFAULT, per_cpu_compression_buf);
+ per_cpu_compression_buf = NULL;
+ }
+
+ if (per_cpu_scratch_buf != NULL) {
+ for (cpu = 0; cpu < io_compression_stats_cpu_count; cpu++) {
+ if (per_cpu_scratch_buf[cpu] != NULL) {
+ kheap_free_addr(KHEAP_DEFAULT, per_cpu_scratch_buf[cpu]);
+ per_cpu_scratch_buf[cpu] = NULL;
+ }
+ }
+ kheap_free_addr(KHEAP_DEFAULT, per_cpu_scratch_buf);
+ per_cpu_scratch_buf = NULL;
+ }
+
+ if (iocs_store_buffer.buffer != NULL) {
+ kheap_free_addr(KHEAP_DEFAULT, iocs_store_buffer.buffer);
+ bzero(&iocs_store_buffer, sizeof(struct iocs_store_buffer));
+ }
+
+ iocs_sb_bytes_since_last_mark = 0;
+ iocs_sb_bytes_since_last_notification = 0;
+
+ if (vnpath_scratch_buf != NULL) {
+ kheap_free_addr(KHEAP_DEFAULT, vnpath_scratch_buf);
+ vnpath_scratch_buf = NULL;
+ }
+}
+
+
+static int
+sysctl_io_compression_stats_enable SYSCTL_HANDLER_ARGS
+{
+#pragma unused (arg1, arg2, oidp)
+
+ int error = 0;
+ int enable = 0;
+
+ error = SYSCTL_OUT(req, &io_compression_stats_enable, sizeof(int));
+
+ if (error || !req->newptr) {
+ return error;
+ }
+
+ error = SYSCTL_IN(req, &enable, sizeof(int));
+ if (error) {
+ return error;
+ }
+
+ if (!((enable == 1) || (enable == 0))) {
+ return EINVAL;
+ }
+
+ lck_rw_lock_exclusive(&io_compression_stats_lock);
+ lck_mtx_lock(&iocs_store_buffer_lock);
+ if ((io_compression_stats_enable == 0) && (enable == 1)) {
+ /* Enabling collection of stats. Allocate appropriate buffers */
+ error = io_compression_stats_allocate_compression_buffers(IO_COMPRESSION_STATS_NEW_ALLOC, io_compression_stats_block_size);
+ if (error == 0) {
+ io_compression_stats_enable = enable;
+ io_compression_stats_dbg("SUCCESS: setting io_compression_stats_enable to %d", io_compression_stats_enable);
+ } else {
+ io_compression_stats_dbg("FAILED: setting io_compression_stats_enable to %d", io_compression_stats_enable);
+ }
+ } else if ((io_compression_stats_enable == 1) && (enable == 0)) {
+ io_compression_stats_deallocate_compression_buffers();
+ io_compression_stats_enable = 0;
+ io_compression_stats_dbg("SUCCESS: setting io_compression_stats_enable to %d", io_compression_stats_enable);
+ }
+ lck_mtx_unlock(&iocs_store_buffer_lock);
+ lck_rw_unlock_exclusive(&io_compression_stats_lock);
+
+ return error;
+}
+SYSCTL_PROC(_vfs, OID_AUTO, io_compression_stats_enable, CTLTYPE_INT | CTLFLAG_RW, 0, 0, &sysctl_io_compression_stats_enable, "I", "");
+
+static int
+sysctl_io_compression_block_size SYSCTL_HANDLER_ARGS
+{
+#pragma unused (arg1, arg2, oidp)
+
+ int error = 0;
+ int block_size = io_compression_stats_block_size;
+
+ error = SYSCTL_OUT(req, &block_size, sizeof(int));
+
+ if (error || !req->newptr) {
+ return error;
+ }
+
+ error = SYSCTL_IN(req, &block_size, sizeof(int));
+ if (error) {
+ return error;
+ }
+
+ if (block_size < IO_COMPRESSION_STATS_MIN_BLOCK_SIZE || block_size > IO_COMPRESSION_STATS_MAX_BLOCK_SIZE) {
+ return EINVAL;
+ }
+
+ lck_rw_lock_exclusive(&io_compression_stats_lock);
+
+ if (io_compression_stats_block_size != block_size) {
+ if (io_compression_stats_enable == 1) {
+ /* IO compression stats is enabled, rellocate buffers. */
+ error = io_compression_stats_allocate_compression_buffers(IO_COMPRESSION_STATS_RESIZE, block_size);
+ if (error == 0) {
+ io_compression_stats_block_size = block_size;
+ io_compression_stats_dbg("SUCCESS: setting io_compression_stats_block_size to %d", io_compression_stats_block_size);
+ } else {
+ /* Failed to allocate buffers, disable IO compression stats */
+ io_compression_stats_enable = 0;
+ io_compression_stats_dbg("Failed: setting io_compression_stats_block_size to %d", io_compression_stats_block_size);
+ }
+ } else {
+ /* IO compression stats is disabled, only set the io_compression_stats_block_size */
+ io_compression_stats_block_size = block_size;
+ io_compression_stats_dbg("SUCCESS: setting io_compression_stats_block_size to %d", io_compression_stats_block_size);
+ }
+ }
+ lck_rw_unlock_exclusive(&io_compression_stats_lock);
+
+
+ return error;
+}
+SYSCTL_PROC(_vfs, OID_AUTO, io_compression_stats_block_size, CTLTYPE_INT | CTLFLAG_RW, 0, 0, &sysctl_io_compression_block_size, "I", "");
+
+
+static int32_t
+iocs_compress_block(uint8_t *block_ptr, uint32_t block_size)
+{
+ disable_preemption();
+
+ uint32_t current_cpu = cpu_number();
+ if (!(current_cpu < io_compression_stats_cpu_count)) {
+ enable_preemption();
+ return -1;
+ }
+
+ lz4_encode_scratch_t *scratch_buf = per_cpu_scratch_buf[current_cpu];
+ uint8_t *dest_buf = per_cpu_compression_buf[current_cpu];
+
+ int compressed_block_size = (int) lz4raw_encode_buffer(dest_buf, block_size,
+ block_ptr, block_size, (lz4_hash_entry_t *) scratch_buf);
+
+ enable_preemption();
+
+ return compressed_block_size;
+}
+/*
+ * Compress buf in chunks of io_compression_stats_block_size
+ */
+static uint32_t
+iocs_compress_buffer(vnode_t vn, uint8_t *buf_ptr, uint32_t buf_size)
+{
+ uint32_t offset;
+ uint32_t compressed_size = 0;
+ int block_size = io_compression_stats_block_size;
+ int block_stats_scaling_factor = block_size / IOCS_BLOCK_NUM_SIZE_BUCKETS;
+
+ for (offset = 0; offset < buf_size; offset += block_size) {
+ int current_block_size = min(block_size, buf_size - offset);
+ int current_compressed_block_size = iocs_compress_block(buf_ptr + offset, current_block_size);
+
+ if (current_compressed_block_size == 0) {
+ compressed_size += current_block_size;
+ vnode_updateiocompressionblockstats(vn, current_block_size / block_stats_scaling_factor);
+ } else if (current_compressed_block_size != -1) {
+ compressed_size += current_compressed_block_size;
+ vnode_updateiocompressionblockstats(vn, current_compressed_block_size / block_stats_scaling_factor);
+ }
+ }
+
+ return compressed_size;
+}
+
+static uint32_t
+log2down(uint32_t x)
+{
+ return 31 - __builtin_clz(x);
+}
+
+/*
+ * Once we get the IO compression stats for the entire buffer, we update buffer_size_compressibility_dist,
+ * which helps us observe distribution across various io sizes and compression factors.
+ * The goal of next two functions is to get the index in this buffer_size_compressibility_dist table.
+ */
+
+/*
+ * Maps IO size to a bucket between 0 - IO_COMPRESSION_STATS_MAX_SIZE_BUCKET
+ * for size < 4096 returns 0 and size > 1MB returns IO_COMPRESSION_STATS_MAX_SIZE_BUCKET (9).
+ * For IO sizes in-between we arrive at the index based on log2 function.
+ * sizes 4097 - 8192 => index = 1,
+ * sizes 8193 - 16384 => index = 2, and so on
+ */
+#define SIZE_COMPRESSION_DIST_SIZE_BUCKET_MIN 4096
+#define SIZE_COMPRESSION_DIST_SIZE_BUCKET_MAX (1024 * 1024)
+static uint32_t
+get_buffer_size_bucket(uint32_t size)
+{
+ if (size <= SIZE_COMPRESSION_DIST_SIZE_BUCKET_MIN) {
+ return 0;
+ }
+ if (size > SIZE_COMPRESSION_DIST_SIZE_BUCKET_MAX) {
+ return IOCS_BUFFER_MAX_BUCKET;
+ }
+#define IOCS_INDEX_MAP_OFFSET 11
+ return log2down(size - 1) - IOCS_INDEX_MAP_OFFSET;
+}
+
+/*
+ * Maps compression factor to a bucket between 0 - IO_COMPRESSION_STATS_MAX_COMPRESSION_BUCKET
+ */
+static uint32_t
+get_buffer_compressibility_bucket(uint32_t uncompressed_size, uint32_t compressed_size)
+{
+ int saved_space_pc = (uncompressed_size - compressed_size) * 100 / uncompressed_size;
+
+ if (saved_space_pc < 0) {
+ saved_space_pc = 0;
+ }
+
+ /* saved_space_pc lies bw 0 - 100. log2(saved_space_pc) lies bw 0 - 6 */
+ return log2down(saved_space_pc);
+}
+
+void
+io_compression_stats(buf_t bp)
+{
+ uint8_t *buf_ptr = NULL;
+ int bflags = bp->b_flags;
+ uint32_t compressed_size = 0;
+ uint32_t buf_cnt = buf_count(bp);
+ uint64_t duration = 0;
+ caddr_t vaddr = NULL;
+ vnode_t vn = buf_vnode(bp);
+ int err = 0;
+
+ if ((io_compression_stats_enable != 1) || (bflags & B_READ) || (buf_cnt <= 0)) {
+ return;
+ }
+
+ if (!lck_rw_try_lock_shared(&io_compression_stats_lock)) {
+ /* sysctl modifying IO compression stats parameters is in progress.
+ * Don't block, since malloc might be in progress. */
+ return;
+ }
+ /* re-check io_compression_stats_enable with lock */
+ if (io_compression_stats_enable != 1) {
+ goto out;
+ }
+
+ err = buf_map(bp, &vaddr);
+ if (!err) {
+ buf_ptr = (uint8_t *) vaddr;
+ }
+
+ if (buf_ptr != NULL) {
+ int64_t start = mach_absolute_time();
+ compressed_size = iocs_compress_buffer(vn, buf_ptr, buf_cnt);
+ absolutetime_to_nanoseconds(mach_absolute_time() - start, &duration);
+
+ if (compressed_size != 0) {
+ vnode_updateiocompressionbufferstats(vn, buf_cnt, compressed_size,
+ get_buffer_size_bucket(buf_cnt),
+ get_buffer_compressibility_bucket(buf_cnt, compressed_size));
+ }
+ }
+
+ KDBG_RELEASE(FSDBG_CODE(DBG_VFS, DBG_VFS_IO_COMPRESSION_STATS) | DBG_FUNC_NONE,
+ duration, io_compression_stats_block_size, compressed_size, buf_cnt, 0);
+
+out:
+ lck_rw_unlock_shared(&io_compression_stats_lock);
+ if (buf_ptr != NULL) {
+ buf_unmap(bp);
+ }
+}
+
+static void
+iocs_notify_user(void)
+{
+ mach_port_t user_port = MACH_PORT_NULL;
+ kern_return_t kr = host_get_iocompressionstats_port(host_priv_self(), &user_port);
+ if ((kr != KERN_SUCCESS) || !IPC_PORT_VALID(user_port)) {
+ return;
+ }
+ iocompressionstats_notification(user_port, 0);
+ ipc_port_release_send(user_port);
+}
+static void
+construct_iocs_sbe_from_vnode(struct vnode *vp, struct iocs_store_buffer_entry *iocs_sbe)
+{
+ int path_len = MAXPATHLEN;
+
+ vn_getpath(vp, vnpath_scratch_buf, &path_len);
+ /*
+ * Total path length is path_len, we can copy out IOCS_SBE_PATH_LEN bytes. We are interested
+ * in first segment of the path to try and figure out the process writing to the file, and we are
+ * interested in the last segment to figure out extention. So, in cases where
+ * IOCS_SBE_PATH_LEN < path_len, lets copy out first IOCS_PATH_START_BYTES_TO_COPY bytes and
+ * last IOCS_PATH_END_BYTES_TO_COPY (last segment includes the null character).
+ */
+ if (path_len > IOCS_SBE_PATH_LEN) {
+ strncpy(iocs_sbe->path_name, vnpath_scratch_buf, IOCS_PATH_START_BYTES_TO_COPY);
+ strncpy(iocs_sbe->path_name + IOCS_PATH_START_BYTES_TO_COPY,
+ vnpath_scratch_buf + path_len - IOCS_PATH_END_BYTES_TO_COPY,
+ IOCS_PATH_END_BYTES_TO_COPY);
+ } else {
+ strncpy(iocs_sbe->path_name, vnpath_scratch_buf, IOCS_SBE_PATH_LEN);
+ }
+ memcpy(&iocs_sbe->iocs, vp->io_compression_stats, sizeof(struct io_compression_stats));
+}
+void
+vnode_iocs_record_and_free(struct vnode *vp)
+{
+ int notify = 0;
+ struct iocs_store_buffer_entry *iocs_sbe = NULL;
+
+ if (!lck_mtx_try_lock(&iocs_store_buffer_lock)) {
+ goto out;
+ }
+
+ if (iocs_store_buffer.buffer == NULL) {
+ goto release;
+ }
+
+ assert(iocs_store_buffer.current_position + sizeof(struct iocs_store_buffer_entry) <= IOCS_STORE_BUFFER_SIZE);
+
+ iocs_sbe = (struct iocs_store_buffer_entry *)(iocs_store_buffer.buffer + iocs_store_buffer.current_position);
+
+ construct_iocs_sbe_from_vnode(vp, iocs_sbe);
+
+ iocs_store_buffer.current_position += sizeof(struct iocs_store_buffer_entry);
+
+ if (iocs_store_buffer.current_position + sizeof(struct iocs_store_buffer_entry) > IOCS_STORE_BUFFER_SIZE) {
+ /* We've reached end of the buffer, move back to the top */
+ iocs_store_buffer.current_position = 0;
+ }
+
+ iocs_sb_bytes_since_last_mark += sizeof(struct iocs_store_buffer_entry);
+ iocs_sb_bytes_since_last_notification += sizeof(struct iocs_store_buffer_entry);
+
+ if ((iocs_sb_bytes_since_last_mark > IOCS_STORE_BUFFER_NOTIFY_AT) &&
+ (iocs_sb_bytes_since_last_notification > IOCS_STORE_BUFFER_NOTIFICATION_INTERVAL)) {
+ notify = 1;
+ iocs_sb_bytes_since_last_notification = 0;
+ }
+
+release:
+ lck_mtx_unlock(&iocs_store_buffer_lock);
+out:
+ /* We need to free io_compression_stats whether or not we were able to record it */
+ bzero(vp->io_compression_stats, sizeof(struct io_compression_stats));
+ zfree(io_compression_stats_zone, vp->io_compression_stats);
+ vp->io_compression_stats = NULL;
+ if (notify) {
+ iocs_notify_user();
+ }
+}
+
+struct vnode_iocs_context {
+ struct sysctl_req *addr;
+ int current_ptr;
+};
+
+static int
+vnode_iocs_callback(struct vnode *vp, void *vctx)
+{
+ struct vnode_iocs_context *ctx = vctx;
+ struct sysctl_req *req = ctx->addr;
+ int current_ptr = ctx->current_ptr;
+
+ if (current_ptr + sizeof(struct iocs_store_buffer_entry) < req->oldlen) {
+ if (vp->io_compression_stats != NULL) {
+ construct_iocs_sbe_from_vnode(vp, (struct iocs_store_buffer_entry *) (req->oldptr + current_ptr));
+ current_ptr += sizeof(struct iocs_store_buffer_entry);
+ }
+ } else {
+ return VNODE_RETURNED_DONE;
+ }
+ ctx->current_ptr = current_ptr;
+
+ return VNODE_RETURNED;
+}
+
+static int
+vfs_iocs_callback(mount_t mp, void *arg)
+{
+ if (mp->mnt_flag & MNT_LOCAL) {
+ vnode_iterate(mp, VNODE_ITERATE_ALL, vnode_iocs_callback, arg);
+ }
+
+ return VFS_RETURNED;
+}
+
+extern long numvnodes;
+
+static int
+sysctl_io_compression_dump_stats SYSCTL_HANDLER_ARGS
+{
+#pragma unused (arg1, arg2, oidp)
+
+ int32_t error = 0;
+ uint32_t inp_flag = 0;
+ uint32_t ret_len;
+
+ if (io_compression_stats_enable == 0) {
+ error = EINVAL;
+ goto out;
+ }
+
+ if ((req->newptr != USER_ADDR_NULL) && (req->newlen == sizeof(uint32_t))) {
+ error = SYSCTL_IN(req, &inp_flag, sizeof(uint32_t));
+ if (error) {
+ goto out;
+ }
+ switch (inp_flag) {
+ case IOCS_SYSCTL_LIVE:
+ case IOCS_SYSCTL_STORE_BUFFER_RD_ONLY:
+ case IOCS_SYSCTL_STORE_BUFFER_MARK:
+ break;
+ default:
+ error = EINVAL;
+ goto out;
+ }
+ } else {
+ error = EINVAL;
+ goto out;
+ }
+
+ if (req->oldptr == USER_ADDR_NULL) {
+ /* Query to figure out size of the buffer */
+ if (inp_flag & IOCS_SYSCTL_LIVE) {
+ req->oldidx = numvnodes * sizeof(struct iocs_store_buffer_entry);
+ } else {
+ /* Buffer size for archived case, let's keep it
+ * simple and return IOCS store buffer size */
+ req->oldidx = IOCS_STORE_BUFFER_SIZE;
+ }
+ goto out;
+ }
+
+ if (inp_flag & IOCS_SYSCTL_LIVE) {
+ struct vnode_iocs_context ctx;
+
+ bzero(&ctx, sizeof(struct vnode_iocs_context));
+ ctx.addr = req;
+ vfs_iterate(0, vfs_iocs_callback, &ctx);
+ req->oldidx = ctx.current_ptr;
+ goto out;
+ }
+
+ /* reading from store buffer */
+ lck_mtx_lock(&iocs_store_buffer_lock);
+
+ if (iocs_store_buffer.buffer == NULL) {
+ error = EINVAL;
+ goto release;
+ }
+ if (iocs_sb_bytes_since_last_mark == 0) {
+ req->oldidx = 0;
+ goto release;
+ }
+
+ int expected_size = 0;
+ /* Dry run to figure out amount of space required to copy out the
+ * iocs_store_buffer.buffer */
+ if (iocs_store_buffer.marked_point < iocs_store_buffer.current_position) {
+ expected_size = iocs_store_buffer.current_position - iocs_store_buffer.marked_point;
+ } else {
+ expected_size = IOCS_STORE_BUFFER_SIZE - iocs_store_buffer.marked_point;
+ expected_size += iocs_store_buffer.current_position;
+ }
+
+ if (req->oldlen < expected_size) {
+ error = ENOMEM;
+ req->oldidx = 0;
+ goto release;
+ }
+
+ if (iocs_store_buffer.marked_point < iocs_store_buffer.current_position) {
+ error = copyout(iocs_store_buffer.buffer + iocs_store_buffer.marked_point,
+ req->oldptr,
+ iocs_store_buffer.current_position - iocs_store_buffer.marked_point);
+ if (error) {
+ req->oldidx = 0;
+ goto release;
+ }
+ ret_len = iocs_store_buffer.current_position - iocs_store_buffer.marked_point;
+ } else {
+ error = copyout(iocs_store_buffer.buffer + iocs_store_buffer.marked_point,
+ req->oldptr,
+ IOCS_STORE_BUFFER_SIZE - iocs_store_buffer.marked_point);
+ if (error) {
+ req->oldidx = 0;
+ goto release;
+ }
+ ret_len = IOCS_STORE_BUFFER_SIZE - iocs_store_buffer.marked_point;
+
+ error = copyout(iocs_store_buffer.buffer,
+ req->oldptr + ret_len,
+ iocs_store_buffer.current_position);
+ if (error) {
+ req->oldidx = 0;
+ goto release;
+ }
+ ret_len += iocs_store_buffer.current_position;
+ }
+
+ req->oldidx = ret_len;
+ if ((ret_len != 0) && (inp_flag & IOCS_SYSCTL_STORE_BUFFER_MARK)) {
+ iocs_sb_bytes_since_last_mark = 0;
+ iocs_store_buffer.marked_point = iocs_store_buffer.current_position;
+ }
+release:
+ lck_mtx_unlock(&iocs_store_buffer_lock);
+
+out:
+ return error;
+}
+SYSCTL_PROC(_vfs, OID_AUTO, io_compression_dump_stats, CTLFLAG_WR | CTLTYPE_NODE, 0, 0, sysctl_io_compression_dump_stats, "-", "");
+
+errno_t
+vnode_updateiocompressionblockstats(vnode_t vp, uint32_t size_bucket)
+{
+ if (vp == NULL) {
+ return EINVAL;
+ }
+
+ if (size_bucket >= IOCS_BLOCK_NUM_SIZE_BUCKETS) {
+ return EINVAL;
+ }
+
+ if (vp->io_compression_stats == NULL) {
+ io_compression_stats_t iocs = (io_compression_stats_t)zalloc_flags(io_compression_stats_zone, Z_ZERO);
+ if (iocs == NULL) {
+ return ENOMEM;
+ }
+ vnode_lock_spin(vp);
+ /* Re-check with lock */
+ if (vp->io_compression_stats == NULL) {
+ vp->io_compression_stats = iocs;
+ } else {
+ zfree(io_compression_stats_zone, iocs);
+ }
+ vnode_unlock(vp);
+ }
+ OSIncrementAtomic((SInt32 *)&vp->io_compression_stats->block_compressed_size_dist[size_bucket]);
+
+ return 0;
+}
+errno_t
+vnode_updateiocompressionbufferstats(__unused vnode_t vp, __unused uint64_t uncompressed_size, __unused uint64_t compressed_size, __unused uint32_t size_bucket, __unused uint32_t compression_bucket)
+{
+ if (vp == NULL) {
+ return EINVAL;
+ }
+
+ /* vnode_updateiocompressionblockstats will always be called before vnode_updateiocompressionbufferstats.
+ * Hence vp->io_compression_stats should already be allocated */
+ if (vp->io_compression_stats == NULL) {
+ return EINVAL;
+ }
+
+ if ((size_bucket >= IOCS_BUFFER_NUM_SIZE_BUCKETS) || (compression_bucket >= IOCS_BUFFER_NUM_COMPRESSION_BUCKETS)) {
+ return EINVAL;
+ }
+
+ OSAddAtomic64(uncompressed_size, &vp->io_compression_stats->uncompressed_size);
+ OSAddAtomic64(compressed_size, &vp->io_compression_stats->compressed_size);
+
+ OSIncrementAtomic((SInt32 *)&vp->io_compression_stats->buffer_size_compression_dist[size_bucket][compression_bucket]);
+
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _MISCFS_SPECFS_IO_COMPRESSION_STATS_H_
+#define _MISCFS_SPECFS_IO_COMPRESSION_STATS_H_
+
+#include <sys/buf_internal.h>
+#include <sys/vnode.h>
+
+void io_compression_stats_init(void);
+void io_compression_stats(buf_t bp);
+
+#define IO_COMPRESSION_STATS_DEFAULT_BLOCK_SIZE (4 * 1024)
+#define IO_COMPRESSION_STATS_MIN_BLOCK_SIZE (4 * 1024)
+#define IO_COMPRESSION_STATS_MAX_BLOCK_SIZE (1024 * 1024 * 1024)
+
+#if IO_COMPRESSION_STATS_DEBUG
+#define io_compression_stats_dbg(fmt, ...) \
+ printf("%s: " fmt "\n", __func__, ## __VA_ARGS__)
+#else
+#define io_compression_stats_dbg(fmt, ...)
+#endif
+
+/* iocs_store_buffer: Buffer that captures the stats of vnode being reclaimed */
+struct iocs_store_buffer {
+ void* buffer;
+ uint32_t current_position;
+ uint32_t marked_point;
+};
+
+#define IOCS_STORE_BUFFER_NUM_SLOTS 10000
+#define IOCS_STORE_BUFFER_SIZE (IOCS_STORE_BUFFER_NUM_SLOTS * (sizeof(struct iocs_store_buffer_entry)))
+
+/* Notify user when the buffer is 80% full */
+#define IOCS_STORE_BUFFER_NOTIFY_AT ((IOCS_STORE_BUFFER_SIZE * 8) / 10)
+
+/* Wait for the buffer to be 10% more full before notifying again */
+#define IOCS_STORE_BUFFER_NOTIFICATION_INTERVAL (IOCS_STORE_BUFFER_SIZE / 10)
+
+#endif
static int lookup_handle_rsrc_fork(vnode_t dp, struct nameidata *ndp, struct componentname *cnp, int wantparent, vfs_context_t ctx);
#endif
-extern lck_rw_t * rootvnode_rw_lock;
+extern lck_rw_t rootvnode_rw_lock;
/*
* Convert a pathname into a pointer to a locked inode.
* determine the starting point for the translation.
*/
proc_dirs_lock_shared(p);
- lck_rw_lock_shared(rootvnode_rw_lock);
+ lck_rw_lock_shared(&rootvnode_rw_lock);
if (!(fdp->fd_flags & FD_CHROOT)) {
ndp->ni_rootdir = rootvnode;
/* This should be a panic */
printf("fdp->fd_rdir is not set\n");
}
- lck_rw_unlock_shared(rootvnode_rw_lock);
+ lck_rw_unlock_shared(&rootvnode_rw_lock);
proc_dirs_unlock_shared(p);
error = ENOENT;
goto error_out;
if (dp == NULLVP || (dp->v_lflag & VL_DEAD)) {
dp = NULLVP;
- lck_rw_unlock_shared(rootvnode_rw_lock);
+ lck_rw_unlock_shared(&rootvnode_rw_lock);
proc_dirs_unlock_shared(p);
error = ENOENT;
goto error_out;
}
/* Now that we have our usecount, release the locks */
- lck_rw_unlock_shared(rootvnode_rw_lock);
+ lck_rw_unlock_shared(&rootvnode_rw_lock);
proc_dirs_unlock_shared(p);
ndp->ni_dvp = NULLVP;
startdir_with_usecount = NULLVP;
}
if (rootdir_with_usecount) {
- lck_rw_lock_shared(rootvnode_rw_lock);
+ lck_rw_lock_shared(&rootvnode_rw_lock);
if (rootdir_with_usecount == rootvnode) {
old_count = os_atomic_dec_orig(&rootdir_with_usecount->v_usecount, relaxed);
if (old_count < 2) {
}
rootdir_with_usecount = NULLVP;
}
- lck_rw_unlock_shared(rootvnode_rw_lock);
+ lck_rw_unlock_shared(&rootvnode_rw_lock);
if (rootdir_with_usecount) {
vnode_rele(rootdir_with_usecount);
rootdir_with_usecount = NULLVP;
startdir_with_usecount = NULLVP;
}
if (rootdir_with_usecount) {
- lck_rw_lock_shared(rootvnode_rw_lock);
+ lck_rw_lock_shared(&rootvnode_rw_lock);
if (rootdir_with_usecount == rootvnode) {
old_count = os_atomic_dec_orig(&rootdir_with_usecount->v_usecount, relaxed);
if (old_count < 2) {
panic("(4) Unexpected pre-decrement value (%d) of usecount for rootvnode %p",
old_count, rootdir_with_usecount);
}
- lck_rw_unlock_shared(rootvnode_rw_lock);
+ lck_rw_unlock_shared(&rootvnode_rw_lock);
} else {
- lck_rw_unlock_shared(rootvnode_rw_lock);
+ lck_rw_unlock_shared(&rootvnode_rw_lock);
vnode_rele(rootdir_with_usecount);
}
rootdir_with_usecount = NULLVP;
/* vars for quota file lock */
-lck_grp_t * qf_lck_grp;
-lck_grp_attr_t * qf_lck_grp_attr;
-lck_attr_t * qf_lck_attr;
+static LCK_GRP_DECLARE(qf_lck_grp, "quota file");
/* vars for quota list lock */
-lck_grp_t * quota_list_lck_grp;
-lck_grp_attr_t * quota_list_lck_grp_attr;
-lck_attr_t * quota_list_lck_attr;
-lck_mtx_t * quota_list_mtx_lock;
+static LCK_GRP_DECLARE(quota_list_lck_grp, "quuota list");
+static LCK_MTX_DECLARE(quota_list_mtx_lock, "a_list_lck_grp);
/* Routines to lock and unlock the quota global data */
static int dq_list_lock(void);
static void qf_rele(struct quotafile *);
-/*
- * Initialize locks for the quota system.
- */
-void
-dqinit(void)
-{
- /*
- * Allocate quota list lock group attribute and group
- */
- quota_list_lck_grp_attr = lck_grp_attr_alloc_init();
- quota_list_lck_grp = lck_grp_alloc_init("quota list", quota_list_lck_grp_attr);
-
- /*
- * Allocate qouta list lock attribute
- */
- quota_list_lck_attr = lck_attr_alloc_init();
-
- /*
- * Allocate quota list lock
- */
- quota_list_mtx_lock = lck_mtx_alloc_init(quota_list_lck_grp, quota_list_lck_attr);
-
-
- /*
- * allocate quota file lock group attribute and group
- */
- qf_lck_grp_attr = lck_grp_attr_alloc_init();
- qf_lck_grp = lck_grp_alloc_init("quota file", qf_lck_grp_attr);
-
- /*
- * Allocate quota file lock attribute
- */
- qf_lck_attr = lck_attr_alloc_init();
-}
-
/*
* Report whether dqhashinit has been run.
*/
static int
dq_list_lock(void)
{
- lck_mtx_lock(quota_list_mtx_lock);
+ lck_mtx_lock("a_list_mtx_lock);
return ++dq_list_lock_cnt;
}
void
dq_list_unlock(void)
{
- lck_mtx_unlock(quota_list_mtx_lock);
+ lck_mtx_unlock("a_list_mtx_lock);
}
{
while (dq->dq_lflags & DQ_LLOCK) {
dq->dq_lflags |= DQ_LWANT;
- msleep(&dq->dq_lflags, quota_list_mtx_lock, PVFS, "dq_lock_internal", NULL);
+ msleep(&dq->dq_lflags, "a_list_mtx_lock, PVFS, "dq_lock_internal", NULL);
}
dq->dq_lflags |= DQ_LLOCK;
}
void
dqlock(struct dquot *dq)
{
- lck_mtx_lock(quota_list_mtx_lock);
+ lck_mtx_lock("a_list_mtx_lock);
dq_lock_internal(dq);
- lck_mtx_unlock(quota_list_mtx_lock);
+ lck_mtx_unlock("a_list_mtx_lock);
}
void
dqunlock(struct dquot *dq)
{
- lck_mtx_lock(quota_list_mtx_lock);
+ lck_mtx_lock("a_list_mtx_lock);
dq_unlock_internal(dq);
- lck_mtx_unlock(quota_list_mtx_lock);
+ lck_mtx_unlock("a_list_mtx_lock);
}
}
if ((qfp->qf_qflags & QTF_CLOSING)) {
qfp->qf_qflags |= QTF_WANTED;
- msleep(&qfp->qf_qflags, quota_list_mtx_lock, PVFS, "qf_get", NULL);
+ msleep(&qfp->qf_qflags, "a_list_mtx_lock, PVFS, "qf_get", NULL);
}
}
if (qfp->qf_vp != NULLVP) {
while ((qfp->qf_qflags & QTF_OPENING) || qfp->qf_refcnt) {
qfp->qf_qflags |= QTF_WANTED;
- msleep(&qfp->qf_qflags, quota_list_mtx_lock, PVFS, "qf_get", NULL);
+ msleep(&qfp->qf_qflags, "a_list_mtx_lock, PVFS, "qf_get", NULL);
}
if (qfp->qf_vp == NULLVP) {
qfp->qf_qflags &= ~QTF_CLOSING;
qfp->qf_vp = NULLVP;
qfp->qf_qflags = 0;
- lck_mtx_init(&qfp->qf_lock, qf_lck_grp, qf_lck_attr);
+ lck_mtx_init(&qfp->qf_lock, &qf_lck_grp, LCK_ATTR_NULL);
}
#include <vfs/vfs_disk_conditioner.h>
#include <libkern/section_keywords.h>
-extern lck_grp_t *vnode_lck_grp;
-extern lck_attr_t *vnode_lck_attr;
+static LCK_GRP_DECLARE(vnode_lck_grp, "vnode");
+static LCK_ATTR_DECLARE(vnode_lck_attr, 0, 0);
#if CONFIG_TRIGGERS
-extern lck_grp_t *trigger_vnode_lck_grp;
-extern lck_attr_t *trigger_vnode_lck_attr;
+static LCK_GRP_DECLARE(trigger_vnode_lck_grp, "trigger_vnode");
+static LCK_ATTR_DECLARE(trigger_vnode_lck_attr, 0, 0);
#endif
-extern lck_mtx_t * mnt_list_mtx_lock;
+extern lck_mtx_t mnt_list_mtx_lock;
ZONE_DECLARE(specinfo_zone, "specinfo",
sizeof(struct specinfo), ZC_NOENCRYPT | ZC_ZFREE_CLEARMEM);
S_IFSOCK, S_IFIFO, S_IFMT,
};
-
/* XXX These should be in a BSD accessible Mach header, but aren't. */
extern void memory_object_mark_used(
memory_object_control_t control);
struct timeval rage_tv;
int rage_limit = 0;
int ragevnodes = 0;
+
+int deadvnodes_low = 0;
+int deadvnodes_high = 0;
+
+uint64_t newvnode = 0;
+uint64_t newvnode_nodead = 0;
+
static int vfs_unmountall_started = 0;
#define RAGE_LIMIT_MIN 100
} while(0)
static void async_work_continue(void);
+static void vn_laundry_continue(void);
/*
* Initialize the vnode management data structures.
rage_limit = RAGE_LIMIT_MIN;
}
+ deadvnodes_low = (desiredvnodes) / 100;
+ if (deadvnodes_low > 300) {
+ deadvnodes_low = 300;
+ }
+ deadvnodes_high = deadvnodes_low * 2;
+
/*
* create worker threads
*/
kernel_thread_start((thread_continue_t)async_work_continue, NULL, &thread);
thread_deallocate(thread);
+ kernel_thread_start((thread_continue_t)vn_laundry_continue, NULL, &thread);
+ thread_deallocate(thread);
}
/* the timeout is in 10 msecs */
struct cl_writebehind *wbp;
/*
- * Not taking the buf_mtxp as there is little
+ * Not taking the buf_mtx as there is little
* point doing it. Even if the lock is taken the
* state can change right after that. If their
* needs to be a synchronization, it must be driven
vnode_hascleanblks(vnode_t vp)
{
/*
- * Not taking the buf_mtxp as there is little
+ * Not taking the buf_mtx as there is little
* point doing it. Even if the lock is taken the
* state can change right after that. If their
* needs to be a synchronization, it must be driven
{
mount_list_lock();
while (mp->mnt_iterref) {
- msleep((caddr_t)&mp->mnt_iterref, mnt_list_mtx_lock, PVFS, "mount_iterdrain", NULL);
+ msleep((caddr_t)&mp->mnt_iterref, &mnt_list_mtx_lock, PVFS, "mount_iterdrain", NULL);
}
/* mount iterations drained */
mp->mnt_iterref = -1;
return VFS_RETURNED;
}
-extern lck_rw_t * rootvnode_rw_lock;
+extern lck_rw_t rootvnode_rw_lock;
extern void set_rootvnode(vnode_t);
pmi->pm_mount = pmi->pm_rootvnode->v_mount;
}
- lck_rw_lock_exclusive(rootvnode_rw_lock);
+ lck_rw_lock_exclusive(&rootvnode_rw_lock);
/* Setup incoming as the new rootfs */
lck_rw_lock_exclusive(&incoming->mnt_rwlock);
vnode_unlock(outgoing_vol_new_covered_vp);
lck_rw_done(&outgoing->mnt_rwlock);
+ if (!(outgoing->mnt_kern_flag & MNTK_VIRTUALDEV) &&
+ (TAILQ_FIRST(&mountlist) == outgoing)) {
+ vfs_setmntsystem(outgoing);
+ }
+
/*
* Finally, remove the mount_t linkage from the previously covered
* vnodes on the old root volume. These were incoming_vol_old_path,
* prevents concurrent vnode_lookups.
*/
set_rootvnode(incoming_rootvnode);
- lck_rw_unlock_exclusive(rootvnode_rw_lock);
+ lck_rw_unlock_exclusive(&rootvnode_rw_lock);
if (!(incoming->mnt_kern_flag & MNTK_VIRTUALDEV) &&
!(outgoing->mnt_kern_flag & MNTK_VIRTUALDEV)) {
}
#if CONFIG_MACF
- mac_vnode_notify_reclaim(vp);
+ if (vp->v_mount) {
+ /*
+ * It is possible for bdevvp vnodes to not have a mount
+ * pointer. It's fine to let it get reclaimed without
+ * notifying.
+ */
+ mac_vnode_notify_reclaim(vp);
+ }
#endif
if (active && (flags & DOCLOSE)) {
}
#endif
+#if CONFIG_IO_COMPRESSION_STATS
+ if ((vp->io_compression_stats)) {
+ vnode_iocs_record_and_free(vp);
+ }
+#endif /* CONFIG_IO_COMPRESSION_STATS */
+
/*
* Reclaim the vnode.
*/
// them (i.e. a short 8 character name can't have an 8
// character extension).
//
-extern lck_mtx_t *pkg_extensions_lck;
+extern lck_mtx_t pkg_extensions_lck;
__private_extern__ int
set_package_extensions_table(user_addr_t data, int nentries, int maxwidth)
qsort(new_exts, nentries, maxwidth, extension_cmp);
- lck_mtx_lock(pkg_extensions_lck);
+ lck_mtx_lock(&pkg_extensions_lck);
old_exts = extension_table;
old_nentries = nexts;
nexts = nentries;
max_ext_width = maxwidth;
- lck_mtx_unlock(pkg_extensions_lck);
+ lck_mtx_unlock(&pkg_extensions_lck);
kheap_free(KHEAP_DATA_BUFFERS, old_exts,
(old_nentries * old_maxwidth) + 1);
// advance over the "."
name_ext++;
- lck_mtx_lock(pkg_extensions_lck);
+ lck_mtx_lock(&pkg_extensions_lck);
// now iterate over all the extensions to see if any match
ptr = &extension_table[0];
extlen = strlen(ptr);
if (strncasecmp(name_ext, ptr, extlen) == 0 && name_ext[extlen] == '\0') {
// aha, a match!
- lck_mtx_unlock(pkg_extensions_lck);
+ lck_mtx_unlock(&pkg_extensions_lck);
return 1;
}
}
- lck_mtx_unlock(pkg_extensions_lck);
+ lck_mtx_unlock(&pkg_extensions_lck);
// if we get here, no extension matched
return 0;
struct unmount_info {
int u_errs; // Total failed unmounts
int u_busy; // EBUSY failed unmounts
+ int u_count; // Total volumes iterated
+ int u_only_non_system;
};
static int
char *mntname;
struct unmount_info *uip = arg;
- mount_ref(mp, 0);
- mount_iterdrop(mp); // avoid vfs_iterate deadlock in dounmount()
+ uip->u_count++;
mntname = zalloc(ZV_NAMEI);
strlcpy(mntname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
- error = dounmount(mp, MNT_FORCE, 1, vfs_context_current());
- if (error) {
- uip->u_errs++;
- printf("Unmount of %s failed (%d)\n", mntname ? mntname:"?", error);
- if (error == EBUSY) {
- uip->u_busy++;
+ if (uip->u_only_non_system
+ && ((mp->mnt_flag & MNT_ROOTFS) || (mp->mnt_kern_flag & MNTK_SYSTEM))) { //MNTK_BACKS_ROOT
+ printf("unmount(%d) %s skipped\n", uip->u_only_non_system, mntname);
+ mount_iterdrop(mp); // VFS_ITERATE_CB_DROPREF
+ } else {
+ printf("unmount(%d) %s\n", uip->u_only_non_system, mntname);
+
+ mount_ref(mp, 0);
+ mount_iterdrop(mp); // VFS_ITERATE_CB_DROPREF
+ error = dounmount(mp, MNT_FORCE, 1, vfs_context_current());
+ if (error) {
+ uip->u_errs++;
+ printf("Unmount of %s failed (%d)\n", mntname ? mntname:"?", error);
+ if (error == EBUSY) {
+ uip->u_busy++;
+ }
}
}
if (mntname) {
* Busy mounts are retried.
*/
__private_extern__ void
-vfs_unmountall(void)
+vfs_unmountall(int only_non_system)
{
int mounts, sec = 1;
struct unmount_info ui;
vfs_unmountall_started = 1;
+ printf("vfs_unmountall(%ssystem) start\n", only_non_system ? "non" : "");
retry:
- ui.u_errs = ui.u_busy = 0;
+ ui.u_errs = ui.u_busy = ui.u_count = 0;
+ ui.u_only_non_system = only_non_system;
+ // avoid vfs_iterate deadlock in dounmount(), use VFS_ITERATE_CB_DROPREF
vfs_iterate(VFS_ITERATE_CB_DROPREF | VFS_ITERATE_TAIL_FIRST, unmount_callback, &ui);
mounts = mount_getvfscnt();
if (mounts == 0) {
return;
}
-
if (ui.u_busy > 0) { // Busy mounts - wait & retry
tsleep(&nummounts, PVFS, "busy mount", sec * hz);
sec *= 2;
goto retry;
}
printf("Unmounting timed out\n");
- } else if (ui.u_errs < mounts) {
+ } else if (ui.u_count < mounts) {
// If the vfs_iterate missed mounts in progress - wait a bit
tsleep(&nummounts, PVFS, "missed mount", 2 * hz);
}
+
+ printf("vfs_unmountall(%ssystem) end\n", only_non_system ? "non" : "");
}
/*
}
static struct klist fs_klist;
-lck_grp_t *fs_klist_lck_grp;
-lck_mtx_t *fs_klist_lock;
+static LCK_GRP_DECLARE(fs_klist_lck_grp, "fs_klist");
+static LCK_MTX_DECLARE(fs_klist_lock, &fs_klist_lck_grp);
void
vfs_event_init(void)
{
klist_init(&fs_klist);
- fs_klist_lck_grp = lck_grp_alloc_init("fs_klist", NULL);
- fs_klist_lock = lck_mtx_alloc_init(fs_klist_lck_grp, NULL);
}
void
}
}
- lck_mtx_lock(fs_klist_lock);
+ lck_mtx_lock(&fs_klist_lock);
KNOTE(&fs_klist, event);
- lck_mtx_unlock(fs_klist_lock);
+ lck_mtx_unlock(&fs_klist_lock);
}
/*
kn->kn_flags |= EV_CLEAR; /* automatic */
kn->kn_sdata = 0; /* incoming data is ignored */
- lck_mtx_lock(fs_klist_lock);
+ lck_mtx_lock(&fs_klist_lock);
KNOTE_ATTACH(&fs_klist, kn);
- lck_mtx_unlock(fs_klist_lock);
+ lck_mtx_unlock(&fs_klist_lock);
/*
* filter only sees future events,
static void
filt_fsdetach(struct knote *kn)
{
- lck_mtx_lock(fs_klist_lock);
+ lck_mtx_lock(&fs_klist_lock);
KNOTE_DETACH(&fs_klist, kn);
- lck_mtx_unlock(fs_klist_lock);
+ lck_mtx_unlock(&fs_klist_lock);
}
static int
{
int res;
- lck_mtx_lock(fs_klist_lock);
+ lck_mtx_lock(&fs_klist_lock);
kn->kn_sfflags = kev->fflags;
// kn->kn_fflags &= kn->kn_sfflags;
res = (kn->kn_fflags != 0);
- lck_mtx_unlock(fs_klist_lock);
+ lck_mtx_unlock(&fs_klist_lock);
return res;
}
{
int res = 0;
- lck_mtx_lock(fs_klist_lock);
+ lck_mtx_lock(&fs_klist_lock);
if (kn->kn_fflags) {
knote_fill_kevent(kn, kev, 0);
res = 1;
}
- lck_mtx_unlock(fs_klist_lock);
+ lck_mtx_unlock(&fs_klist_lock);
return res;
}
}
/* the vfs.generic. branch. */
-SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RW | CTLFLAG_LOCKED, NULL, "vfs generic hinge");
+SYSCTL_EXTENSIBLE_NODE(_vfs, VFS_GENERIC, generic,
+ CTLFLAG_RW | CTLFLAG_LOCKED, NULL, "vfs generic hinge");
/* retreive a list of mounted filesystem fsid_t */
SYSCTL_PROC(_vfs_generic, OID_AUTO, vfsidlist,
CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
static vnode_t
-process_vp(vnode_t vp, int want_vp, int *deferred)
+process_vp(vnode_t vp, int want_vp, bool can_defer, int *deferred)
{
unsigned int vpid;
* Checks for anyone racing us for recycle
*/
if (vp->v_type != VBAD) {
- if (want_vp && (vnode_on_reliable_media(vp) == FALSE || (vp->v_flag & VISDIRTY))) {
+ if ((want_vp || can_defer) && (vnode_on_reliable_media(vp) == FALSE || (vp->v_flag & VISDIRTY))) {
vnode_async_list_add(vp);
vnode_unlock(vp);
vp = TAILQ_FIRST(q);
- vp = process_vp(vp, 0, &deferred);
+ vp = process_vp(vp, 0, false, &deferred);
if (vp != NULLVP) {
panic("found VBAD vp (%p) on async queue", vp);
}
}
+__attribute__((noreturn))
+static void
+vn_laundry_continue(void)
+{
+ struct freelst *free_q;
+ struct ragelst *rage_q;
+ int deferred;
+ vnode_t vp;
+ bool rage_q_empty;
+ bool free_q_empty;
+
+
+ free_q = &vnode_free_list;
+ rage_q = &vnode_rage_list;
+
+ for (;;) {
+ vnode_list_lock();
+
+ free_q_empty = TAILQ_EMPTY(free_q);
+ rage_q_empty = TAILQ_EMPTY(rage_q);
+
+ if (!rage_q_empty && !free_q_empty) {
+ struct timeval current_tv;
+
+ microuptime(¤t_tv);
+ if (ragevnodes < rage_limit &&
+ ((current_tv.tv_sec - rage_tv.tv_sec) < RAGE_TIME_LIMIT)) {
+ rage_q_empty = true;
+ }
+ }
+
+ if (deadvnodes >= deadvnodes_high ||
+ (rage_q_empty && free_q_empty) ||
+ numvnodes < desiredvnodes) {
+ assert_wait(free_q, (THREAD_UNINT));
+
+ vnode_list_unlock();
+
+ thread_block((thread_continue_t)vn_laundry_continue);
+
+ continue;
+ }
+
+ if (!rage_q_empty) {
+ vp = TAILQ_FIRST(rage_q);
+ } else {
+ vp = TAILQ_FIRST(free_q);
+ }
+
+ vp = process_vp(vp, 0, true, &deferred);
+ }
+}
+
+static inline void
+wakeup_laundry_thread()
+{
+ if ((deadvnodes < deadvnodes_low) &&
+ /* Minimum number of free vnodes the thread should act on */
+ ((freevnodes + ragevnodes) > 10)) {
+ wakeup(&vnode_free_list);
+ }
+}
static int
new_vnode(vnode_t *vpp)
vp = NULLVP;
vnode_list_lock();
+ newvnode++;
if (need_reliable_vp == TRUE) {
async_work_timed_out++;
* Can always reuse a dead one
*/
vp = TAILQ_FIRST(&vnode_dead_list);
+ if (numvnodes >= desiredvnodes) {
+ wakeup_laundry_thread();
+ }
goto steal_this_vp;
}
/*
* the limit, we'll create a new vnode
*/
numvnodes++;
+ if (numvnodes >= desiredvnodes) {
+ wakeup_laundry_thread();
+ }
vnode_list_unlock();
vp = zalloc_flags(vnode_zone, Z_WAITOK | Z_ZERO);
VLISTNONE(vp); /* avoid double queue removal */
- lck_mtx_init(&vp->v_lock, vnode_lck_grp, vnode_lck_attr);
+ lck_mtx_init(&vp->v_lock, &vnode_lck_grp, &vnode_lck_attr);
TAILQ_INIT(&vp->v_ncchildren);
vp->v_iocount = 1;
goto done;
}
+
+ wakeup_laundry_thread();
+
microuptime(¤t_tv);
#define MAX_WALK_COUNT 1000
panic("new_vnode: vp (%p) on RAGE list not marked VLIST_RAGE", vp);
}
- /*
- * skip free vnodes created by bdevvp as they are
- * typically not fully constructedi and may have issues
- * in getting reclaimed.
- */
- if (vp->v_flag & VBDEVVP) {
- bdevvp_vnodes++;
- continue;
- }
-
// if we're a dependency-capable process, skip vnodes that can
// cause recycling deadlocks. (i.e. this process is diskimages
// helper and the vnode is in a disk image). Querying the
*/
walk_count = 0;
TAILQ_FOREACH(vp, &vnode_free_list, v_freelist) {
- /*
- * skip free vnodes created by bdevvp as they are
- * typically not fully constructedi and may have issues
- * in getting reclaimed.
- */
- if (vp->v_flag & VBDEVVP) {
- bdevvp_vnodes++;
- continue;
- }
-
// if we're a dependency-capable process, skip vnodes that can
// cause recycling deadlocks. (i.e. this process is diskimages
// helper and the vnode is in a disk image). Querying the
*vpp = NULL;
return ENFILE;
}
+ newvnode_nodead++;
steal_this_vp:
- if ((vp = process_vp(vp, 1, &deferred)) == NULLVP) {
+ if ((vp = process_vp(vp, 1, true, &deferred)) == NULLVP) {
if (deferred) {
int elapsed_msecs;
struct timeval elapsed_tv;
}
owner_ok = (needed & vap->va_mode) == needed;
+ /*
+ * Processes with the appropriate entitlement can marked themselves as
+ * ignoring file/directory permissions if they own it.
+ */
+ if (!owner_ok && proc_ignores_node_permissions(vfs_context_proc(vcp->ctx))) {
+ owner_ok = 1;
+ }
+
/* group permissions */
needed = 0;
if (action & VREAD) {
_SETWHERE("all");
goto out;
}
+
if (!owner_ok && !group_ok && !world_ok) {
_SETWHERE("all");
error = EACCES;
switch (eval.ae_result) {
case KAUTH_RESULT_DENY:
+ if (vauth_file_owner(vcp) && proc_ignores_node_permissions(vfs_context_proc(vcp->ctx))) {
+ KAUTH_DEBUG("%p Override DENY due to entitlement", vcp->vp);
+ return 0;
+ }
KAUTH_DEBUG("%p DENIED - denied by ACL", vcp->vp);
return EACCES;
case KAUTH_RESULT_ALLOW:
}
switch (eval.ae_result) {
case KAUTH_RESULT_DENY:
+ if (vauth_dir_owner(vcp) && proc_ignores_node_permissions(vfs_context_proc(vcp->ctx))) {
+ KAUTH_DEBUG("%p Override DENY due to entitlement", vcp->vp);
+ return 0;
+ }
KAUTH_DEBUG("%p DENIED - denied by directory ACL", vcp->vp);
return EACCES;
case KAUTH_RESULT_ALLOW:
switch (eval.ae_result) {
case KAUTH_RESULT_DENY:
+ if (vauth_file_owner(vcp) && proc_ignores_node_permissions(vfs_context_proc(vcp->ctx))) {
+ KAUTH_DEBUG("%p Override DENY due to entitlement", vcp->vp);
+ return 0;
+ }
KAUTH_DEBUG("%p DENIED - by ACL", vcp->vp);
return EACCES; /* deny, deny, counter-allege */
case KAUTH_RESULT_ALLOW:
* Check for file immutability.
*/
static int
-vnode_authorize_checkimmutable(mount_t mp, struct vnode_attr *vap, int rights, int ignore)
+vnode_authorize_checkimmutable(mount_t mp, vauth_ctx vcp,
+ struct vnode_attr *vap, int rights, int ignore)
{
int error;
int append;
}
}
if ((error = vnode_immutable(vap, append, ignore)) != 0) {
+ if (error && !ignore) {
+ /*
+ * In case of a rename, we want to check ownership for dvp as well.
+ */
+ int owner = 0;
+ if (rights & KAUTH_VNODE_DELETE_CHILD && vcp->dvp != NULL) {
+ owner = vauth_file_owner(vcp) && vauth_dir_owner(vcp);
+ } else {
+ owner = vauth_file_owner(vcp);
+ }
+ if (owner && proc_ignores_node_permissions(vfs_context_proc(vcp->ctx))) {
+ error = vnode_immutable(vap, append, 1);
+ }
+ }
+ }
+ if (error) {
KAUTH_DEBUG("%p DENIED - file is immutable", vap);
goto out;
}
* In the deletion case, parent directory immutability vetoes specific
* file rights.
*/
- if ((result = vnode_authorize_checkimmutable(mp, vcp->vap, rights,
+ if ((result = vnode_authorize_checkimmutable(mp, vcp, vcp->vap, rights,
noimmutable)) != 0) {
goto out;
}
if ((rights & KAUTH_VNODE_DELETE) &&
!parent_authorized_for_delete_child) {
- result = vnode_authorize_checkimmutable(mp, vcp->dvap,
+ result = vnode_authorize_checkimmutable(mp, vcp, vcp->dvap,
KAUTH_VNODE_DELETE_CHILD, 0);
if (result) {
goto out;
return ENOMEM;
}
- lck_mtx_init(&rp->vr_lock, trigger_vnode_lck_grp, trigger_vnode_lck_attr);
+ lck_mtx_init(&rp->vr_lock, &trigger_vnode_lck_grp, &trigger_vnode_lck_attr);
rp->vr_resolve_func = tinfo->vnt_resolve_func;
rp->vr_unresolve_func = tinfo->vnt_unresolve_func;
rp->vr_reclaim_func(NULLVP, rp->vr_data);
}
- lck_mtx_destroy(&rp->vr_lock, trigger_vnode_lck_grp);
+ lck_mtx_destroy(&rp->vr_lock, &trigger_vnode_lck_grp);
kheap_free(KHEAP_DEFAULT, rp, sizeof(struct vnode_resolve));
}
__private_extern__
int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
-extern lck_grp_t *fd_vn_lck_grp;
-extern lck_grp_attr_t *fd_vn_lck_grp_attr;
-extern lck_attr_t *fd_vn_lck_attr;
+static LCK_GRP_DECLARE(fd_vn_lck_grp, "fd_vnode_data");
+static LCK_ATTR_DECLARE(fd_vn_lck_attr, 0, 0);
-extern lck_rw_t * rootvnode_rw_lock;
+/* vars for sync mutex */
+static LCK_GRP_DECLARE(sync_mtx_lck_grp, "sync thread");
+static LCK_MTX_DECLARE(sync_mtx_lck, &sync_mtx_lck_grp);
+
+extern lck_rw_t rootvnode_rw_lock;
/*
* incremented each time a mount or unmount operation occurs
}
#endif /* CONFIG_NFS_CLIENT || DEVFS */
+ if (KERNEL_MOUNT_DEVFS & internal_flags) {
+ // kernel mounted devfs
+ mp->mnt_kern_flag |= MNTK_SYSTEM;
+ }
+
update:
/*
if (rootvnode == olddp) {
vnode_ref(newdp);
- lck_rw_lock_exclusive(rootvnode_rw_lock);
+ lck_rw_lock_exclusive(&rootvnode_rw_lock);
tvp = rootvnode;
rootvnode = newdp;
- lck_rw_unlock_exclusive(rootvnode_rw_lock);
+ lck_rw_unlock_exclusive(&rootvnode_rw_lock);
vnode_rele(tvp);
}
* associated with it (for example, the associated VM or DATA mounts) .
*/
if ((mp->mnt_flag & MNT_ROOTFS) || (mp->mnt_kern_flag & MNTK_SYSTEM)) {
+ if (!(mp->mnt_flag & MNT_ROOTFS)) {
+ printf("attempt to unmount a system mount (%s), will return EBUSY\n",
+ mp->mnt_vfsstat.f_mntonname);
+ }
error = EBUSY; /* the root (or associated volumes) is always busy */
goto out;
}
pm_sync_thread = current_thread();
#endif /* CONFIG_PHYS_WRITE_ACCT */
- lck_mtx_lock(sync_mtx_lck);
+ lck_mtx_lock(&sync_mtx_lck);
while (sync_thread_state & SYNC_THREAD_RUN) {
sync_thread_state &= ~SYNC_THREAD_RUN;
- lck_mtx_unlock(sync_mtx_lck);
+ lck_mtx_unlock(&sync_mtx_lck);
sync_type = SYNC_ONLY_RELIABLE_MEDIA;
vfs_iterate(LK_NOWAIT, sync_internal_callback, &sync_type);
sync_type = SYNC_ONLY_UNRELIABLE_MEDIA;
vfs_iterate(LK_NOWAIT, sync_internal_callback, &sync_type);
- lck_mtx_lock(sync_mtx_lck);
+ lck_mtx_lock(&sync_mtx_lck);
}
/*
* This wakeup _has_ to be issued before the lock is released otherwise
#if CONFIG_PHYS_WRITE_ACCT
pm_sync_thread = NULL;
#endif /* CONFIG_PHYS_WRITE_ACCT */
- lck_mtx_unlock(sync_mtx_lck);
+ lck_mtx_unlock(&sync_mtx_lck);
if (print_vmpage_stat) {
vm_countdirtypages();
int thread_created = FALSE;
struct timespec ts = {.tv_sec = sync_timeout_seconds, .tv_nsec = 0};
- lck_mtx_lock(sync_mtx_lck);
+ lck_mtx_lock(&sync_mtx_lck);
sync_thread_state |= SYNC_THREAD_RUN;
if (!(sync_thread_state & SYNC_THREAD_RUNNING)) {
int kr;
kr = kernel_thread_start(sync_thread, NULL, &thd);
if (kr != KERN_SUCCESS) {
sync_thread_state &= ~SYNC_THREAD_RUNNING;
- lck_mtx_unlock(sync_mtx_lck);
+ lck_mtx_unlock(&sync_mtx_lck);
printf("sync_thread failed\n");
return 0;
}
thread_created = TRUE;
}
- error = msleep((caddr_t)&sync_thread_state, sync_mtx_lck,
+ error = msleep((caddr_t)&sync_thread_state, &sync_mtx_lck,
(PVFS | PDROP | PCATCH), "sync_thread", &ts);
if (error) {
struct timeval now;
/* Allocate per fd vnode data */
fvdata = kheap_alloc(KM_FD_VN_DATA, sizeof(struct fd_vn_data),
Z_WAITOK | Z_ZERO);
- lck_mtx_init(&fvdata->fv_lock, fd_vn_lck_grp, fd_vn_lck_attr);
+ lck_mtx_init(&fvdata->fv_lock, &fd_vn_lck_grp, &fd_vn_lck_attr);
return fvdata;
}
struct fd_vn_data *fvdata = (struct fd_vn_data *)fgvndata;
kheap_free(KHEAP_DATA_BUFFERS, fvdata->fv_buf, fvdata->fv_bufallocsiz);
- lck_mtx_destroy(&fvdata->fv_lock, fd_vn_lck_grp);
+ lck_mtx_destroy(&fvdata->fv_lock, &fd_vn_lck_grp);
kheap_free(KM_FD_VN_DATA, fvdata, sizeof(struct fd_vn_data));
}
/*
* certain attributes may need to be changed from the source, we ask for
- * those here.
+ * those here with the exception of source file's ACL. The clone file
+ * will inherit the target directory's ACL.
*/
VATTR_INIT(&va);
VATTR_WANTED(&va, va_uid);
VATTR_WANTED(&va, va_gid);
VATTR_WANTED(&va, va_mode);
VATTR_WANTED(&va, va_flags);
- VATTR_WANTED(&va, va_acl);
if ((error = vnode_getattr(fvp, &va, ctx)) != 0) {
goto out;
* If some of the requested attributes weren't handled by the
* VNOP, use our fallback code.
*/
- if (!VATTR_ALL_SUPPORTED(&va)) {
+ if (!VATTR_ALL_SUPPORTED(&nva)) {
(void)vnode_setattr_fallback(tvp, &nva, ctx);
}
static u_long nspace_resolver_request_hashmask;
static u_int nspace_resolver_request_count;
static bool nspace_resolver_request_wait_slot;
-static lck_grp_t *nspace_resolver_request_lck_grp;
-static lck_mtx_t nspace_resolver_request_hash_mutex;
+static LCK_GRP_DECLARE(nspace_resolver_request_lck_grp, "file namespace resolver");
+static LCK_MTX_DECLARE(nspace_resolver_request_hash_mutex,
+ &nspace_resolver_request_lck_grp);
#define NSPACE_REQ_LOCK() \
lck_mtx_lock(&nspace_resolver_request_hash_mutex)
return 0;
}
-static int
-nspace_materialization_is_prevented(void)
-{
- proc_t p = current_proc();
- uthread_t ut = (uthread_t)get_bsdthread_info(current_thread());
- vfs_context_t ctx = vfs_context_current();
-
- /*
- * Kernel context ==> return EDEADLK, as we would with any random
- * process decorated as no-materialize.
- */
- if (ctx == vfs_context_kernel()) {
- return EDEADLK;
- }
-
- /*
- * If the process has the dataless-manipulation entitlement,
- * materialization is prevented, and depending on the kind
- * of file system operation, things get to proceed as if the
- * object is not dataless.
- */
- if (vfs_context_is_dataless_manipulator(ctx)) {
- return EJUSTRETURN;
- }
-
- /*
- * Per-thread decorations override any process-wide decorations.
- * (Foundation uses this, and this overrides even the dataless-
- * manipulation entitlement so as to make API contracts consistent.)
- */
- if (ut != NULL) {
- if (ut->uu_flag & UT_NSPACE_NODATALESSFAULTS) {
- return EDEADLK;
- }
- if (ut->uu_flag & UT_NSPACE_FORCEDATALESSFAULTS) {
- return 0;
- }
- }
-
- /*
- * If the process's iopolicy specifies that dataless files
- * can be materialized, then we let it go ahead.
- */
- if (p->p_vfs_iopolicy & P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES) {
- return 0;
- }
-
- /*
- * The default behavior is to not materialize dataless files;
- * return to the caller that deadlock was detected.
- */
- return EDEADLK;
-}
-
/* the vfs.nspace branch */
SYSCTL_NODE(_vfs, OID_AUTO, nspace, CTLFLAG_RW | CTLFLAG_LOCKED, NULL, "vfs nspace hinge");
#define __no_dataless_unused __unused
#endif
-void
-nspace_resolver_init(void)
+int
+vfs_context_dataless_materialization_is_prevented(
+ vfs_context_t const ctx __no_dataless_unused)
{
#if CONFIG_DATALESS_FILES
- nspace_resolver_request_lck_grp =
- lck_grp_alloc_init("file namespace resolver", NULL);
+ proc_t const p = vfs_context_proc(ctx);
+ thread_t const t = vfs_context_thread(ctx);
+ uthread_t const ut = t ? get_bsdthread_info(t) : NULL;
+
+ /*
+ * Kernel context ==> return EDEADLK, as we would with any random
+ * process decorated as no-materialize.
+ */
+ if (ctx == vfs_context_kernel()) {
+ return EDEADLK;
+ }
+
+ /*
+ * If the process has the dataless-manipulation entitlement,
+ * materialization is prevented, and depending on the kind
+ * of file system operation, things get to proceed as if the
+ * object is not dataless.
+ */
+ if (vfs_context_is_dataless_manipulator(ctx)) {
+ return EJUSTRETURN;
+ }
+
+ /*
+ * Per-thread decorations override any process-wide decorations.
+ * (Foundation uses this, and this overrides even the dataless-
+ * manipulation entitlement so as to make API contracts consistent.)
+ */
+ if (ut != NULL) {
+ if (ut->uu_flag & UT_NSPACE_NODATALESSFAULTS) {
+ return EDEADLK;
+ }
+ if (ut->uu_flag & UT_NSPACE_FORCEDATALESSFAULTS) {
+ return 0;
+ }
+ }
- lck_mtx_init(&nspace_resolver_request_hash_mutex,
- nspace_resolver_request_lck_grp, NULL);
+ /*
+ * If the process's iopolicy specifies that dataless files
+ * can be materialized, then we let it go ahead.
+ */
+ if (p->p_vfs_iopolicy & P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES) {
+ return 0;
+ }
+#endif /* CONFIG_DATALESS_FILES */
+ /*
+ * The default behavior is to not materialize dataless files;
+ * return to the caller that deadlock was detected.
+ */
+ return EDEADLK;
+}
+
+void
+nspace_resolver_init(void)
+{
+#if CONFIG_DATALESS_FILES
nspace_resolver_request_hashtbl =
hashinit(NSPACE_RESOLVER_REQ_HASHSIZE,
M_VNODE /* XXX */, &nspace_resolver_request_hashmask);
return ENOTSUP;
}
- error = nspace_materialization_is_prevented();
+ error = vfs_context_dataless_materialization_is_prevented(
+ vfs_context_current());
if (error) {
os_log_debug(OS_LOG_DEFAULT,
"NSPACE process/thread is decorated as no-materialization");
*/
end = ah->data_start + ah->data_length;
if (ah->total_size > ainfop->finderinfo->offset + ainfop->finderinfo->length ||
+ ah->data_start < sizeof(attr_header_t) ||
end < ah->data_start ||
end > ah->total_size) {
return EINVAL;
SYSCTL_INT(_vm, OID_AUTO, cs_executable_create_upl, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_create_upl, 0, "");
SYSCTL_INT(_vm, OID_AUTO, cs_executable_wire, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_wire, 0, "");
+extern int apple_protect_pager_count;
+extern int apple_protect_pager_count_mapped;
+extern unsigned int apple_protect_pager_cache_limit;
+SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count_mapped, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count_mapped, 0, "");
+SYSCTL_UINT(_vm, OID_AUTO, apple_protect_pager_cache_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_cache_limit, 0, "");
+
#if DEVELOPMENT || DEBUG
extern int radar_20146450;
SYSCTL_INT(_vm, OID_AUTO, radar_20146450, CTLFLAG_RW | CTLFLAG_LOCKED, &radar_20146450, 0, "");
SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
__attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
- mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid);
+ mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid, mach_task_flavor_t flavor);
/*
* Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c
*/
*/
__attribute__((noinline)) int
__KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
- mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid)
+ mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid, mach_task_flavor_t flavor)
{
- return check_task_access(task_access_port, calling_pid, calling_gid, target_pid);
+ return check_task_access_with_flavor(task_access_port, calling_pid, calling_gid, target_pid, flavor);
}
/*
/* Always check if pid == 0 */
if (pid == 0) {
- (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
+ (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
return KERN_FAILURE;
}
t1 = port_name_to_task(target_tport);
if (t1 == TASK_NULL) {
- (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
+ (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
return KERN_FAILURE;
}
p = PROC_NULL;
#if CONFIG_MACF
- error = mac_proc_check_get_task(kauth_cred_get(), &pident);
+ error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_CONTROL);
if (error) {
error = KERN_FAILURE;
goto tfpout;
}
/* Call up to the task access server */
- error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid);
+ error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
+ proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
if (error != MACH_MSG_SUCCESS) {
if (error == MACH_RCV_INTERRUPTED) {
/* Grant task port access */
extmod_statistics_incr_task_for_pid(task);
- sright = (void *) convert_task_to_port(task);
+
+ if (task == current_task()) {
+ /* return pinned self if current_task() so equality check with mach_task_self_ passes */
+ sright = (void *)convert_task_to_port_pinned(task);
+ } else {
+ sright = (void *)convert_task_to_port(task);
+ }
/* Check if the task has been corpsified */
if (is_corpsetask(task)) {
mach_port_name_t target_tport = args->target_tport;
int pid = args->pid;
user_addr_t task_addr = args->t;
- proc_t p = PROC_NULL;
- task_t t1;
- mach_port_name_t tret;
+ proc_t p = PROC_NULL;
+ task_t t1 = TASK_NULL;
+ mach_port_name_t tret = MACH_PORT_NULL;
void * sright;
int error = 0, refheld = 0;
kauth_cred_t target_cred;
t1 = port_name_to_task(target_tport);
if (t1 == TASK_NULL) {
- (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
+ (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
return KERN_FAILURE;
}
proc_rele(p);
p = PROC_NULL;
#if CONFIG_MACF
- error = mac_proc_check_get_task_name(kauth_cred_get(), &pident);
+ error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_NAME);
if (error) {
task_deallocate(task);
goto noperm;
/* Disallow inspect port for kernel_task */
if (pid == 0) {
- (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
+ (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
return EPERM;
}
t1 = port_name_to_task(target_tport);
if (t1 == TASK_NULL) {
- (void) copyout((char *) &t1, task_addr, sizeof(mach_port_name_t));
+ (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
return EINVAL;
}
proc_rele(proc);
proc = PROC_NULL;
- /*
- * For now, it performs the same set of permission checks as task_for_pid. This
- * will be addressed in rdar://problem/53478660
- */
#if CONFIG_MACF
- error = mac_proc_check_get_task(kauth_cred_get(), &pident);
+ error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_INSPECT);
if (error) {
error = EPERM;
goto tifpout;
/* Call up to the task access server */
- error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid);
+ error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
+ proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_INSPECT);
if (error != MACH_MSG_SUCCESS) {
if (error == MACH_RCV_INTERRUPTED) {
/* Disallow read port for kernel_task */
if (pid == 0) {
- (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
+ (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
return EPERM;
}
t1 = port_name_to_task(target_tport);
if (t1 == TASK_NULL) {
- (void) copyout((char *) &t1, task_addr, sizeof(mach_port_name_t));
+ (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
return EINVAL;
}
proc_rele(proc);
proc = PROC_NULL;
- /*
- * For now, it performs the same set of permission checks as task_for_pid. This
- * will be addressed in rdar://problem/53478660
- */
#if CONFIG_MACF
- error = mac_proc_check_get_task(kauth_cred_get(), &pident);
+ error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_READ);
if (error) {
error = EPERM;
goto trfpout;
/* Call up to the task access server */
- error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid);
+ error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
+ proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_READ);
if (error != MACH_MSG_SUCCESS) {
if (error == MACH_RCV_INTERRUPTED) {
#endif
target = targetproc->task;
-#ifndef CONFIG_EMBEDDED
+#if XNU_TARGET_OS_OSX
if (target != TASK_NULL) {
/* If we aren't root and target's task access port is set... */
if (!kauth_cred_issuser(kauth_cred_get()) &&
}
/* Call up to the task access server */
- error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid);
+ error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
+ proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
if (error != MACH_MSG_SUCCESS) {
if (error == MACH_RCV_INTERRUPTED) {
}
}
}
-#endif
+#endif /* XNU_TARGET_OS_OSX */
task_reference(target);
error = task_pidsuspend(target);
/* Always check if pid == 0 */
if (pid == 0) {
- (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
+ (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
return KERN_FAILURE;
}
t1 = port_name_to_task(target_tport);
if (t1 == TASK_NULL) {
- (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
+ (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
return KERN_FAILURE;
}
if (!IOTaskHasEntitlement(current_task(), DEBUG_PORT_ENTITLEMENT)) {
#if CONFIG_MACF
- error = mac_proc_check_get_task(kauth_cred_get(), &pident);
+ error = mac_proc_check_get_task(kauth_cred_get(), &pident, TASK_FLAVOR_CONTROL);
if (error) {
error = KERN_FAILURE;
goto tfpout;
/* Call up to the task access server */
- error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid);
+ error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
+ proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
if (error != MACH_MSG_SUCCESS) {
if (error == MACH_RCV_INTERRUPTED) {
#endif
target = targetproc->task;
-#ifndef CONFIG_EMBEDDED
+#if XNU_TARGET_OS_OSX
if (target != TASK_NULL) {
/* If we aren't root and target's task access port is set... */
if (!kauth_cred_issuser(kauth_cred_get()) &&
}
/* Call up to the task access server */
- error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid);
+ error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport,
+ proc_selfpid(), kauth_getgid(), pid, TASK_FLAVOR_CONTROL);
if (error != MACH_MSG_SUCCESS) {
if (error == MACH_RCV_INTERRUPTED) {
}
}
}
-#endif
+#endif /* XNU_TARGET_OS_OSX */
#if !XNU_TARGET_OS_OSX
#if SOCKETS
return error;
}
-#if CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
/*
* Freeze the specified process (provided in args->pid), or find and freeze a PID.
* When a process is specified, this call is blocking, otherwise we wake up the
*ret = error;
return error;
}
-#endif /* CONFIG_EMBEDDED */
+#endif /* !XNU_TARGET_OS_OSX */
#if SOCKETS
int
* proc lock NOT held
* a reference on the proc has been held / shall be dropped by the caller.
*/
- LCK_MTX_ASSERT(proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
+ LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED);
proc_fdlock(p);
mach_vm_offset_t start_address = 0;
int error = 0;
kern_return_t kr;
+ task_t task = current_task();
SHARED_REGION_TRACE_DEBUG(
("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
(uint64_t)uap->start_address));
/* retrieve the current tasks's shared region */
- shared_region = vm_shared_region_get(current_task());
+ shared_region = vm_shared_region_get(task);
if (shared_region != NULL) {
/* retrieve address of its first mapping... */
- kr = vm_shared_region_start_address(shared_region, &start_address);
+ kr = vm_shared_region_start_address(shared_region, &start_address, task);
if (kr != KERN_SUCCESS) {
error = ENOMEM;
} else {
* a max value. The kernel will choose a random value based on that, then use it
* for all shared regions.
*/
-#define SLIDE_AMOUNT_MASK ~PAGE_MASK
+#if defined (__x86_64__)
+#define SLIDE_AMOUNT_MASK ~FOURK_PAGE_MASK
+#else
+#define SLIDE_AMOUNT_MASK ~SIXTEENK_PAGE_MASK
+#endif
int
shared_region_map_and_slide_2_np(
}
mappings[m].sms_address += slide_amount;
if (mappings[m].sms_slide_size != 0) {
- mappings[i].sms_slide_start += slide_amount;
+ mappings[m].sms_slide_start += slide_amount;
}
}
}
SYSCTL_INT(_debug, OID_AUTO, vm_mixed_pagesize_supported, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_mixed_pagesize_supported, 0, "kernel support for mixed pagesize");
-
-extern uint64_t get_pages_grabbed_count(void);
-
-static int
-pages_grabbed SYSCTL_HANDLER_ARGS
-{
-#pragma unused(arg1, arg2, oidp)
- uint64_t value = get_pages_grabbed_count();
- return SYSCTL_OUT(req, &value, sizeof(value));
-}
-
-SYSCTL_PROC(_vm, OID_AUTO, pages_grabbed, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED,
- 0, 0, &pages_grabbed, "QU", "Total pages grabbed");
+SCALABLE_COUNTER_DECLARE(vm_page_grab_count);
+SYSCTL_SCALABLE_COUNTER(_vm, pages_grabbed, vm_page_grab_count, "Total pages grabbed");
SYSCTL_ULONG(_vm, OID_AUTO, pages_freed, CTLFLAG_RD | CTLFLAG_LOCKED,
&vm_pageout_vminfo.vm_page_pages_freed, "Total pages freed");
SYSCTL_INT(_vm, OID_AUTO, pmap_ledgers_panic_leeway, CTLFLAG_RW | CTLFLAG_LOCKED, &pmap_ledgers_panic_leeway, 0, "");
#endif /* MACH_ASSERT */
+
+extern uint64_t vm_map_lookup_locked_copy_slowly_count;
+extern uint64_t vm_map_lookup_locked_copy_slowly_size;
+extern uint64_t vm_map_lookup_locked_copy_slowly_max;
+extern uint64_t vm_map_lookup_locked_copy_slowly_restart;
+extern uint64_t vm_map_lookup_locked_copy_slowly_error;
+extern uint64_t vm_map_lookup_locked_copy_strategically_count;
+extern uint64_t vm_map_lookup_locked_copy_strategically_size;
+extern uint64_t vm_map_lookup_locked_copy_strategically_max;
+extern uint64_t vm_map_lookup_locked_copy_strategically_restart;
+extern uint64_t vm_map_lookup_locked_copy_strategically_error;
+extern uint64_t vm_map_lookup_locked_copy_shadow_count;
+extern uint64_t vm_map_lookup_locked_copy_shadow_size;
+extern uint64_t vm_map_lookup_locked_copy_shadow_max;
+SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_count,
+ CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_slowly_count, "");
+SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_size,
+ CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_slowly_size, "");
+SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_max,
+ CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_slowly_max, "");
+SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_restart,
+ CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_slowly_restart, "");
+SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_error,
+ CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_slowly_error, "");
+SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_count,
+ CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_strategically_count, "");
+SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_size,
+ CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_strategically_size, "");
+SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_max,
+ CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_strategically_max, "");
+SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_restart,
+ CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_strategically_restart, "");
+SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_error,
+ CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_strategically_error, "");
+SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_count,
+ CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_shadow_count, "");
+SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_size,
+ CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_shadow_size, "");
+SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_max,
+ CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_locked_copy_shadow_max, "");
+
extern int vm_protect_privileged_from_untrusted;
SYSCTL_INT(_vm, OID_AUTO, protect_privileged_from_untrusted,
CTLFLAG_RW | CTLFLAG_LOCKED, &vm_protect_privileged_from_untrusted, 0, "");
CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
0, 0, shared_region_pivot, "I", "");
-extern int vm_remap_old_path, vm_remap_new_path;
-SYSCTL_INT(_vm, OID_AUTO, remap_old_path,
- CTLFLAG_RD | CTLFLAG_LOCKED, &vm_remap_old_path, 0, "");
-SYSCTL_INT(_vm, OID_AUTO, remap_new_path,
- CTLFLAG_RD | CTLFLAG_LOCKED, &vm_remap_new_path, 0, "");
+/*
+ * sysctl to return the number of pages on retired_pages_object
+ */
+static int
+retired_pages_count SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2, oidp)
+ extern uint32_t vm_retired_pages_count(void);
+ uint32_t value = vm_retired_pages_count();
+
+ return SYSCTL_OUT(req, &value, sizeof(value));
+}
+SYSCTL_PROC(_vm, OID_AUTO, retired_pages_count, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
+ 0, 0, &retired_pages_count, "I", "");
+
+SYSCTL_INT(_vm, OID_AUTO, vmtc_total, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vmtc_total, 0, "total text page corruptions detected");
+SYSCTL_INT(_vm, OID_AUTO, vmtc_undiagnosed, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vmtc_undiagnosed, 0, "undiagnosed text page corruptions");
+SYSCTL_INT(_vm, OID_AUTO, vmtc_not_eligible, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vmtc_not_eligible, 0, "text page corruptions not eligible for correction");
+SYSCTL_INT(_vm, OID_AUTO, vmtc_copyin_fail, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vmtc_copyin_fail, 0, "undiagnosed text page corruptions due to copyin failure");
+SYSCTL_INT(_vm, OID_AUTO, vmtc_not_found, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vmtc_not_found, 0, "text page corruptions but no diff found");
+SYSCTL_INT(_vm, OID_AUTO, vmtc_one_bit_flip, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vmtc_one_bit_flip, 0, "text page corruptions that had a single bit flip");
+
+SYSCTL_INT(_vm, OID_AUTO, vmtc_1_byte, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vmtc_byte_counts[0], 0, "text page corruptions with 1 changed byte");
+
+SYSCTL_INT(_vm, OID_AUTO, vmtc_2_byte, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vmtc_byte_counts[1], 0, "text page corruptions with 2 changed bytes");
+
+SYSCTL_INT(_vm, OID_AUTO, vmtc_4_byte, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vmtc_byte_counts[2], 0, "text page corruptions with 3 to 4 changed bytes");
+
+SYSCTL_INT(_vm, OID_AUTO, vmtc_8_byte, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vmtc_byte_counts[3], 0, "text page corruptions with 5 to 8 changed bytes");
+
+SYSCTL_INT(_vm, OID_AUTO, vmtc_16_byte, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vmtc_byte_counts[4], 0, "text page corruptions with 9 to 16 changed bytes");
+
+SYSCTL_INT(_vm, OID_AUTO, vmtc_32_byte, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vmtc_byte_counts[5], 0, "text page corruptions with 17 to 32 changed bytes");
+
+SYSCTL_INT(_vm, OID_AUTO, vmtc_64_byte, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vmtc_byte_counts[6], 0, "text page corruptions with 33 to 64 changed bytes");
+
+SYSCTL_INT(_vm, OID_AUTO, vmtc_128byte, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vmtc_byte_counts[7], 0, "text page corruptions with 65 to 128 changed bytes");
+
+SYSCTL_INT(_vm, OID_AUTO, vmtc_256_byte, CTLFLAG_RD | CTLFLAG_LOCKED,
+ &vmtc_byte_counts[8], 0, "text page corruptions with >128 changed bytes");
+
+#if DEBUG || DEVELOPMENT
+/*
+ * A sysctl that can be used to corrupt a text page with an illegal instruction.
+ * Used for testing text page self healing.
+ */
+extern kern_return_t vm_corrupt_text_addr(uintptr_t);
+static int
+corrupt_text_addr(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
+{
+ uint64_t value = 0;
+ int error = sysctl_handle_quad(oidp, &value, 0, req);
+ if (error || !req->newptr) {
+ return error;
+ }
+
+ if (vm_corrupt_text_addr((uintptr_t)value) == KERN_SUCCESS) {
+ return 0;
+ } else {
+ return EINVAL;
+ }
+}
+
+SYSCTL_PROC(_vm, OID_AUTO, corrupt_text_addr,
+ CTLTYPE_QUAD | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
+ 0, 0, corrupt_text_addr, "-", "");
+#endif /* DEBUG || DEVELOPMENT */
#include <mach/mach_types.h>
#include <mach/memory_object_types.h>
-#include <mach/memory_object_control.h>
#include <mach/vm_map.h>
#include <mach/mach_vm.h>
#include <mach/upl.h>
_sysctl__net_children
_sysctl__sysctl_children
_sysctl__vfs_children
-_sysctl__vfs_generic
_sysctl__vfs_generic_children
_sysctl__vm_children
_sysctl_handle_int
__ZN18IOMemoryDescriptor15getDMAMapLengthEPy
__ZN18IOMemoryDescriptor15getDescriptorIDEv
__ZN18IOMemoryDescriptor16getPreparationIDEv
+__ZN18IOMemoryDescriptor16setPreparationIDEv
__ZN18IOMemoryDescriptor17_CopyState_InvokeE5IORPCP15OSMetaClassBasePFiS2_P17_IOMDPrivateStateE
__ZN18IOMemoryDescriptor18getPhysicalAddressEv
__ZN18IOMemoryDescriptor20CreateMapping_InvokeE5IORPCP15OSMetaClassBasePFiS2_yyyyyPP11IOMemoryMapE
__ZN29IOInterleavedMemoryDescriptordlEPvm
__ZN29IOInterleavedMemoryDescriptornwEm
__ZN6IOPMGR10gMetaClassE
+__ZN6IOPMGR13enableCPUCoreEj
+__ZN6IOPMGR13enableCPUCoreEjy
__ZN6IOPMGRC2EPK11OSMetaClass
__ZN6IOPMGRD2Ev
__ZN6IOPMGRdlEPvm
__ZN9IOService20getDeviceMemoryCountEv
__ZN9IOService20powerOverrideOffPrivEv
__ZN9IOService20unlockForArbitrationEv
+__ZN9IOService20ClientCrashed_InvokeE5IORPCP15OSMetaClassBasePFiS2_PS_yE
__ZN9IOService21CopyProperties_InvokeE5IORPCP15OSMetaClassBasePFiS2_PP12OSDictionaryE
__ZN9IOService21SearchProperty_InvokeE5IORPCP15OSMetaClassBasePFiS2_PKcS4_yPP8OSObjectE
__ZN9IOService21getClientWithCategoryEPK8OSSymbol
_copyinstr
_copyout
_copyoutstr
+_coretrust_interface_register
_crc32
_debug_ivars_size
_deflate
#
# configurable kernel message buffer size
#
-options CONFIG_MSG_BSIZE_REL=4096 # <msgb_small>
-options CONFIG_MSG_BSIZE_DEV=4096 # <msgb_small>
-options CONFIG_MSG_BSIZE_REL=16384 # <msgb_large>
+options CONFIG_MSG_BSIZE_REL=16384 # <msgb_small>
+options CONFIG_MSG_BSIZE_DEV=131072 # <msgb_small>
+options CONFIG_MSG_BSIZE_REL=131072 # <msgb_large>
options CONFIG_MSG_BSIZE_DEV=131072 # <msgb_large>
options CONFIG_MSG_BSIZE=CONFIG_MSG_BSIZE_REL # <!development,debug>
options CONFIG_MSG_BSIZE=CONFIG_MSG_BSIZE_DEV # <development,debug>
options CONFIG_IPC_TABLE_ENTRIES_STEPS=64 # 137898 entries # <bsmall,small,xsmall>
options CONFIG_IPC_TABLE_ENTRIES_STEPS=256 # 300714 entries # <medium,large,xlarge>
+#
+# maximum copyout size for IPC debugging tools
+#
+options CONFIG_IPC_KERNEL_MAP_SIZE=16 # 16M # <bsmall,small,xsmall>
+options CONFIG_IPC_KERNEL_MAP_SIZE=64 # 64M # <medium,large,xlarge>
+
#
# configurable kernel - use these options to strip strings from panic
# and printf calls.
#
# MACH configuration options.
#
-# TASK_SWAPPER enables code that manages demand for physical memory by
-# forcibly suspending tasks when the demand exceeds supply. This
-# option should be on.
-#
-options TASK_SWAPPER # <task_swapper_disabled>
#
# This defines configuration options that are normally used only during
# hardclock device driver.
#
options MACH_MP_DEBUG # # <debug>
-options CONFIG_ZCACHE # Enable per-cpu caching for zones # <config_zcache>
options CONFIG_ZLEAKS # Live zone leak debugging # <zleaks>
#
options PGO # <pgo>
-# MACH_COUNTERS enables code that handles various counters in the system.
-#
-options MACH_COUNTERS # # <debug>
-
# DEVELOPMENT define for development builds
options DEVELOPMENT # dev kernel # <development>
options CONFIG_KDP_INTERACTIVE_DEBUGGING # <kdp_interactive_debugging>
options CONFIG_TASKWATCH
+options CONFIG_USER_NOTIFICATION # <config_user_notification>
#
# Kernel Power On Self Tests
#
#
options PROC_REF_DEBUG # <proc_ref_debug>
-#
-# Kernel OS reason debug instrumentation
-#
-options OS_REASON_DEBUG # <os_reason_debug>
-
#
# Kernel Voucher Attr Manager for Activity Trace
#
options CONFIG_CSR # <config_csr>
options CONFIG_CSR_FROM_DT # <config_csr_from_dt>
+# Enable collection of IO Compression statistics
+options CONFIG_IO_COMPRESSION_STATS # <config_io_compression_stats>
+
#
# Console options
#
# Standard Apple OS Configurations:
# -------- ----- -- ---------------
#
-# KERNEL_BASE = [ arm xsmall msgb_small config_embedded config_enforce_signed_code config_zcache config_darkboot ARM_EXTRAS_BASE ]
+# KERNEL_BASE = [ arm xsmall msgb_small config_embedded config_enforce_signed_code config_darkboot ARM_EXTRAS_BASE ]
# KERNEL_RELEASE = [ KERNEL_BASE ]
-# KERNEL_DEV = [ KERNEL_BASE development mach_assert config_xnupost proc_ref_debug os_reason_debug ]
+# KERNEL_DEV = [ KERNEL_BASE development mach_assert config_xnupost proc_ref_debug ]
# KERNEL_DEBUG = [ KERNEL_BASE debug mach_assert config_xnupost config_ltable_stats config_ltable_debug config_waitq_stats config_workloop_debug config_waitq_debug ]
# BSD_BASE = [ mach_bsd config_workqueue psynch config_proc_uuid_policy config_imageboot config_imageboot_img4 ]
# BSD_RELEASE = [ BSD_BASE no_printf_str no_kprintf_str secure_kernel ]
# PERF_DBG_RELEASE=[ PERF_DBG_BASE ist_kdebug ]
# PERF_DBG_DEV = [ PERF_DBG_BASE config_dtrace zleaks kdp_interactive_debugging interrupt_masked_debug ]
# PERF_DBG_DEBUG = [ PERF_DBG_BASE config_dtrace zleaks kdp_interactive_debugging interrupt_masked_debug ]
-# MACH_BASE = [ mach slidable vc_progress_white mdebug ipc_debug importance_inheritance config_atm config_coalitions config_library_validation config_iosched config_telemetry config_sysdiagnose config_quiesce_counter phys_write_acct ]
+# MACH_BASE = [ mach slidable vc_progress_white mdebug ipc_debug importance_inheritance config_atm config_coalitions config_library_validation config_iosched config_telemetry config_sysdiagnose config_quiesce_counter phys_write_acct config_io_compression_stats ]
# MACH_RELEASE = [ MACH_BASE config_skip_precise_user_kernel_time debugger_for_zone_info ]
# MACH_DEV = [ MACH_BASE task_zone_info config_io_accounting importance_trace config_ledger_interval_max ]
# MACH_DEBUG = [ MACH_BASE task_zone_info config_io_accounting importance_trace config_ledger_interval_max importance_debug ]
# Standard Apple OS Configurations:
# -------- ----- -- ---------------
#
-# KERNEL_BASE = [ arm64 xsmall msgb_small config_embedded config_enforce_signed_code config_requires_u32_munging config_zcache config_darkboot ARM_EXTRAS_BASE ]
+# KERNEL_BASE = [ arm64 xsmall msgb_small config_embedded config_enforce_signed_code config_requires_u32_munging config_darkboot ARM_EXTRAS_BASE ]
# KERNEL_RELEASE = [ KERNEL_BASE ]
-# KERNEL_DEV = [ KERNEL_BASE development mach_assert config_xnupost proc_ref_debug os_reason_debug pgtrace ]
+# KERNEL_DEV = [ KERNEL_BASE development mach_assert config_xnupost proc_ref_debug pgtrace ]
# KERNEL_DEBUG = [ KERNEL_BASE debug mach_assert config_xnupost config_ltable_stats config_ltable_debug config_waitq_stats config_workloop_debug config_waitq_debug pgtrace ]
# BSD_BASE = [ mach_bsd config_workqueue psynch config_proc_uuid_policy config_personas config_imageboot config_imageboot_img4 ]
# BSD_RELEASE = [ BSD_BASE no_printf_str no_kprintf_str secure_kernel ]
# PERF_DBG_RELEASE=[ PERF_DBG_BASE ist_kdebug ]
# PERF_DBG_DEV = [ PERF_DBG_BASE config_dtrace lock_stats zleaks kdp_interactive_debugging alternate_debugger interrupt_masked_debug ]
# PERF_DBG_DEBUG = [ PERF_DBG_BASE config_dtrace lock_stats zleaks kdp_interactive_debugging alternate_debugger interrupt_masked_debug ]
-# MACH_BASE = [ mach slidable config_ecc_logging vc_progress_white mdebug ipc_debug importance_inheritance config_atm config_coalitions config_iosched config_library_validation config_sysdiagnose config_telemetry config_mach_bridge_recv_time config_quiesce_counter phys_write_acct ]
+# MACH_BASE = [ mach slidable config_ecc_logging vc_progress_white mdebug ipc_debug importance_inheritance config_atm config_coalitions config_iosched config_library_validation config_sysdiagnose config_telemetry config_mach_bridge_recv_time config_quiesce_counter phys_write_acct config_io_compression_stats ]
# MACH_RELEASE = [ MACH_BASE config_skip_precise_user_kernel_time debugger_for_zone_info ]
# MACH_DEV = [ MACH_BASE task_zone_info config_io_accounting importance_trace config_ledger_interval_max ]
# MACH_DEBUG = [ MACH_BASE task_zone_info config_io_accounting importance_trace config_ledger_interval_max importance_debug ]
# Standard Apple OS Configurations:
# -------- ----- -- ---------------
#
-# KERNEL_BASE = [ arm64 xsmall msgb_large config_embedded config_enforce_signed_code config_requires_u32_munging config_zcache config_darkboot ARM_EXTRAS_BASE ]
+# KERNEL_BASE = [ arm64 xsmall msgb_small config_embedded config_enforce_signed_code config_requires_u32_munging config_darkboot ARM_EXTRAS_BASE ]
# KERNEL_RELEASE = [ KERNEL_BASE ]
-# KERNEL_DEV = [ KERNEL_BASE development mach_assert config_xnupost proc_ref_debug os_reason_debug pgtrace ]
+# KERNEL_DEV = [ KERNEL_BASE development mach_assert config_xnupost proc_ref_debug pgtrace ]
# KERNEL_DEBUG = [ KERNEL_BASE debug mach_assert config_xnupost config_ltable_stats config_ltable_debug config_waitq_stats config_workloop_debug config_waitq_debug pgtrace ]
# BSD_BASE = [ mach_bsd config_workqueue psynch config_proc_uuid_policy config_personas config_imageboot config_imageboot_img4 ]
# BSD_RELEASE = [ BSD_BASE no_printf_str no_kprintf_str secure_kernel ]
# Standard Apple OS Configurations:
# -------- ----- -- ---------------
#
-# KERNEL_BASE = [ arm64 medium msgb_large config_arrow config_requires_u32_munging config_zcache config_delay_idle_sleep config_proc_udata_storage ARM_EXTRAS_BASE ]
+# KERNEL_BASE = [ arm64 medium msgb_large config_arrow config_requires_u32_munging config_delay_idle_sleep config_proc_udata_storage config_uexc config_darkboot ARM_EXTRAS_BASE ]
# KERNEL_RELEASE = [ KERNEL_BASE ]
-# KERNEL_DEV = [ KERNEL_BASE development mach_assert config_xnupost proc_ref_debug os_reason_debug pgtrace ]
+# KERNEL_DEV = [ KERNEL_BASE development mach_assert config_xnupost proc_ref_debug pgtrace ]
# KERNEL_DEBUG = [ KERNEL_BASE debug mach_assert config_xnupost config_ltable_stats config_ltable_debug config_waitq_stats config_workloop_debug config_waitq_debug pgtrace ]
# BSD_BASE = [ mach_bsd sysv_sem sysv_msg sysv_shm config_netboot config_imageboot config_workqueue psynch config_proc_uuid_policy config_coredump pgo config_personas ]
# BSD_RELEASE = [ BSD_BASE ]
# VPN = [ ipsec flow_divert necp content_filter ]
# PF = [ pf pflog ]
# MULTIPATH = [ multipath mptcp ]
+#if defined(SOC_CONFIG_t8020)
# HIBERNATION = [ ]
+#else /*!(defined(SOC_CONFIG_t8020)*/
+# HIBERNATION = [ ]
+#endif /*!(defined(SOC_CONFIG_t8020)*/
# IOKIT_BASE = [ iokit iokitcpp no_kernel_hid config_sleep iokitstats HIBERNATION ]
# IOKIT_RELEASE = [ IOKIT_BASE ]
# IOKIT_DEV = [ IOKIT_BASE iotracking ]
# PERF_DBG_RELEASE=[ PERF_DBG_BASE ist_kdebug ]
# PERF_DBG_DEV = [ PERF_DBG_BASE lock_stats zleaks alternate_debugger interrupt_masked_debug ]
# PERF_DBG_DEBUG = [ PERF_DBG_BASE lock_stats zleaks alternate_debugger interrupt_masked_debug ]
-# MACH_BASE = [ mach slidable config_ecc_logging vc_progress_white mdebug ipc_debug importance_inheritance config_atm config_coalitions config_iosched config_sysdiagnose config_telemetry config_mach_bridge_recv_time config_quiesce_counter config_arm_pfz ]
+# MACH_BASE = [ mach slidable config_ecc_logging vc_progress_white mdebug ipc_debug importance_inheritance config_atm config_coalitions config_iosched config_sysdiagnose config_telemetry config_mach_bridge_recv_time config_quiesce_counter config_arm_pfz config_user_notification phys_write_acct ]
# MACH_RELEASE = [ MACH_BASE debugger_for_zone_info ]
# MACH_DEV = [ MACH_BASE task_zone_info config_io_accounting importance_trace config_ledger_interval_max ]
# MACH_DEBUG = [ MACH_BASE task_zone_info config_io_accounting importance_trace config_ledger_interval_max importance_debug ]
# -------- ----- -- ---------------
#
# ARM_EXTRAS_BASE = [ nos_arm_pmap nos_arm_asm ]
-# KERNEL_BASE = [ arm64 xsmall msgb_small config_embedded config_requires_u32_munging config_zcache ARM_EXTRAS_BASE ]
+# KERNEL_BASE = [ arm64 xsmall msgb_small config_embedded config_requires_u32_munging ARM_EXTRAS_BASE ]
# KERNEL_RELEASE = [ KERNEL_BASE ]
-# KERNEL_DEV = [ KERNEL_BASE development mach_assert config_xnupost proc_ref_debug os_reason_debug pgtrace ]
+# KERNEL_DEV = [ KERNEL_BASE development mach_assert config_xnupost proc_ref_debug pgtrace ]
# KERNEL_DEBUG = [ KERNEL_BASE debug mach_assert config_xnupost config_ltable_stats config_ltable_debug config_waitq_stats config_workloop_debug config_waitq_debug pgtrace ]
# BSD_BASE = [ mach_bsd config_workqueue psynch config_proc_uuid_policy config_personas ]
# BSD_RELEASE = [ BSD_BASE no_printf_str no_kprintf_str secure_kernel ]
# Standard Apple OS Configurations:
# -------- ----- -- ---------------
#
-# KERNEL_BASE = [ arm64 xsmall msgb_large config_embedded config_enforce_signed_code config_requires_u32_munging config_zcache config_darkboot ARM_EXTRAS_BASE ]
+# KERNEL_BASE = [ arm64 xsmall msgb_small config_embedded config_enforce_signed_code config_requires_u32_munging config_darkboot ARM_EXTRAS_BASE ]
# KERNEL_RELEASE = [ KERNEL_BASE ]
-# KERNEL_DEV = [ KERNEL_BASE development mach_assert config_xnupost proc_ref_debug os_reason_debug pgtrace ]
+# KERNEL_DEV = [ KERNEL_BASE development mach_assert config_xnupost proc_ref_debug pgtrace ]
# KERNEL_DEBUG = [ KERNEL_BASE debug mach_assert config_xnupost config_ltable_stats config_ltable_debug config_waitq_stats config_workloop_debug config_waitq_debug pgtrace ]
# BSD_BASE = [ mach_bsd config_workqueue psynch config_proc_uuid_policy config_personas config_imageboot config_imageboot_img4 ]
# BSD_RELEASE = [ BSD_BASE no_printf_str no_kprintf_str secure_kernel ]
# PERF_DBG_RELEASE=[ PERF_DBG_BASE ist_kdebug ]
# PERF_DBG_DEV = [ PERF_DBG_BASE config_dtrace lock_stats zleaks kdp_interactive_debugging alternate_debugger interrupt_masked_debug ]
# PERF_DBG_DEBUG = [ PERF_DBG_BASE config_dtrace lock_stats zleaks kdp_interactive_debugging alternate_debugger interrupt_masked_debug ]
-# MACH_BASE = [ mach slidable config_ecc_logging vc_progress_white mdebug ipc_debug importance_inheritance config_atm config_coalitions config_iosched config_library_validation config_sysdiagnose config_telemetry config_mach_bridge_recv_time config_quiesce_counter ]
+# MACH_BASE = [ mach slidable config_ecc_logging vc_progress_white mdebug ipc_debug importance_inheritance config_atm config_coalitions config_iosched config_library_validation config_sysdiagnose config_telemetry config_mach_bridge_recv_time config_quiesce_counter config_io_compression_stats phys_write_acct ]
# MACH_RELEASE = [ MACH_BASE config_skip_precise_user_kernel_time debugger_for_zone_info ]
# MACH_DEV = [ MACH_BASE task_zone_info config_io_accounting importance_trace config_ledger_interval_max ]
# MACH_DEBUG = [ MACH_BASE task_zone_info config_io_accounting importance_trace config_ledger_interval_max importance_debug ]
# Standard Apple OS Configurations:
# -------- ----- -- ---------------
#
-# KERNEL_BASE = [ intel medium msgb_large config_requires_u32_munging config_zcache config_delay_idle_sleep config_proc_udata_storage vsprintf ]
+# KERNEL_BASE = [ intel medium msgb_large config_requires_u32_munging config_delay_idle_sleep config_proc_udata_storage vsprintf ]
# KERNEL_RELEASE = [ KERNEL_BASE ]
-# KERNEL_DEV = [ KERNEL_BASE development mach_assert config_xnupost proc_ref_debug os_reason_debug ]
+# KERNEL_DEV = [ KERNEL_BASE development mach_assert config_xnupost proc_ref_debug ]
# KERNEL_DEBUG = [ KERNEL_BASE debug mach_assert config_xnupost config_ltable_stats config_ltable_debug config_waitq_stats config_workloop_debug config_waitq_debug ]
# BSD_BASE = [ mach_bsd sysv_sem sysv_msg sysv_shm config_netboot config_imageboot config_imageboot_chunklist config_workqueue psynch config_proc_uuid_policy config_coredump pgo config_32bit_telemetry config_personas ]
# BSD_RELEASE = [ BSD_BASE ]
# PERF_DBG_RELEASE=[ PERF_DBG_BASE ]
# PERF_DBG_DEV =[ PERF_DBG_BASE lock_stats ]
# PERF_DBG_DEBUG = [ PERF_DBG_BASE lock_stats ]
-# MACH_BASE = [ mach config_kext_basement mdebug ipc_debug config_mca config_vmx config_mtrr config_lapic config_telemetry importance_inheritance config_atm config_coalitions hypervisor config_iosched config_sysdiagnose config_mach_bridge_send_time copyout_shim phys_write_acct ]
+# MACH_BASE = [ mach config_kext_basement mdebug ipc_debug config_mca config_vmx config_mtrr config_lapic config_telemetry importance_inheritance config_atm config_coalitions hypervisor config_iosched config_sysdiagnose config_mach_bridge_send_time copyout_shim phys_write_acct config_user_notification ]
# MACH_RELEASE = [ MACH_BASE ]
# MACH_DEV = [ MACH_BASE task_zone_info importance_trace config_ledger_interval_max ]
# MACH_DEBUG = [ MACH_BASE task_zone_info importance_trace config_ledger_interval_max importance_debug ]
-20.3.0
+20.4.0
# The first line of this file contains the master version number for the kernel.
# All other instances of the kernel version in xnu are derived from this file.
_ml_get_wake_timebase
_ml_set_reset_time
_proc_getcdhash
+_ml_cpu_init_completed
_cpu_broadcast_xcall
_cpu_xcall
_cpu_broadcast_immediate_xcall
_sched_perfcontrol_update_callback_deadline
_thread_group_join_io_storage
_thread_group_join_perf_controller
+_ml_cpu_init_completed
_ml_cpu_signal
_ml_cpu_signal_deferred
_ml_cpu_signal_retract
_pmap_iommu_unmap
_pmap_iommu_iovmfree
_pmap_iommu_ioctl
-_pmap_iommu_grant_page
-_pmap_iommu_alloc_contiguous_pages
_nvme_ppl_get_desc
_sart_get_desc
_t8020dart_get_desc
__ZNK15IORegistryEntry18getIndexedPropertyEj
__ZN16IOPlatformExpert*
__ZNK16IOPlatformExpert*
-__ZN18IOMemoryDescriptor16setPreparationIDEv
__ZTV16IOPlatformExpert
__ZN18IODTPlatformExpert*
__ZNK18IODTPlatformExpert*
_bufattr_markmeta
_bufattr_markquickcomplete
_bufattr_meta
+_bufattr_markexpeditedmeta
+_bufattr_expeditedmeta
_bufattr_nocache
_bufattr_passive
_bufattr_quickcomplete
_kern_packet_get_next
_kern_packet_set_chain_counts
_kern_packet_get_chain_counts
+_kern_packet_trace_start
+_kern_packet_trace_end
+_kern_packet_is_traced
+_kern_packet_trace_event
_kern_pbufpool_alloc
_kern_pbufpool_alloc_batch
_kern_pbufpool_alloc_batch_callback
_kern_stack_snapshot_with_reason
_kernel_debug_string
_kext_receipt
+_kext_receipt_set_queried
_kmem_alloc_kobject:_kmem_alloc_kobject_external
_kmem_alloc_pageable:_kmem_alloc_pageable_external
_kx_qsort
_pmap_lockdown_image4_slab
_pmap_lookup_in_static_trust_cache
_pmap_lookup_in_loaded_trust_caches
+_pmap_set_compilation_service_cdhash
+_pmap_match_compilation_service_cdhash
_port_name_to_task
_port_name_to_thread
_post_sys_powersource
_proc_set_syscall_filter_index
_proc_set_syscall_filter_mask
_proc_selfcsflags
+_proc_skip_mtime_update
_proc_starttime
_proc_task
_proc_uniqueid
_throttle_lowpri_window
_throttle_set_thread_io_policy
_throttle_get_thread_effective_io_policy
+_throttle_thread_io_tier_above_metadata
_timeout
_timeout_with_leeway
_tk_nin
_utun_pkt_dtls_input
_vfs_context_bind
_vfs_context_can_resolve_triggers
+_vfs_context_dataless_materialization_is_prevented
_vfs_context_get_special_port
_vfs_context_set_special_port
_vfs_context_is_dataless_manipulator
_vfs_devvp
+_vfs_get_thread_fs_private
_vfs_getattr
_vfs_getbyid
_vfs_is_basesystem
_vfs_mount_id
_vfs_nativexattrs
_vfs_set_root_unmounted_cleanly
+_vfs_set_thread_fs_private
_vfs_setcompoundopen
_vfs_throttle_mask
_vfs_vnodecovered
_IOGetAPFSKeyStoreData
_IOSetAPFSKeyStoreData
_IOGetARVRootHashData
-_IOSetARVRootHashData
_IOGetARVManifestData
-_IOSetARVManifestData
+_IOGetBaseSystemARVRootHashData
+_IOGetBaseSystemARVManifestData
+_IOBaseSystemARVRootHashAvailable
__Z33IOSKCopyKextIdentifierWithAddressm
__ZN14IOPMrootDomain17requestUserActiveEP9IOServicePKc
__ZN14IOPMrootDomain20claimSystemBootEventEP9IOServicejPKcP8OSObject
_hv_disable
_hv_ept_pmap_create
_hv_get*
+_hv_io_notifier*
_hv_release*
_hv_set*
_hv_trace*
__ZN18IODTPlatformExpert28_RESERVEDIODTPlatformExpert5Ev
__ZN18IODTPlatformExpert28_RESERVEDIODTPlatformExpert6Ev
__ZN18IODTPlatformExpert28_RESERVEDIODTPlatformExpert7Ev
+_KUNCExecute
+_KUNCGetNotificationID
+_KUNCUserNotificationDisplayAlert
+_KUNCUserNotificationDisplayFromBundle
+_KUNCUserNotificationDisplayNotice
_Debugger
-_KUNCExecute
-_KUNCGetNotificationID
-_KUNCUserNotificationDisplayAlert
-_KUNCUserNotificationDisplayFromBundle
-_KUNCUserNotificationDisplayNotice
_NDR_record
_OSSpinLockTry
_OSSpinLockUnlock
__ZN5IORTC15_RESERVEDIORTC5Ev
__ZN5IORTC15_RESERVEDIORTC6Ev
__ZN5IORTC15_RESERVEDIORTC7Ev
+_KUNCExecute
+_KUNCGetNotificationID
+_KUNCUserNotificationDisplayAlert
+_KUNCUserNotificationDisplayFromBundle
+_KUNCUserNotificationDisplayNotice
fi
shift 2
+if [ $(egrep -c 'CFBundleIdentifier|OSBundleCompatibleVersion|CFBundleVersion' $PLIST) -lt 3 ]; then
+ echo "error: Invalid input Info.plist $PLIST" 1>&2
+ exit 1
+fi
+
printf \
'<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
- If your allocation size is of fixed size, of a sub-page size, and done with
the `Z_WAITOK` semantics (allocation can block), consider adding `Z_NOFAIL`,
-- If you `bzero` the memory on allocation, prefer passing `Z_ZERO` which can be
+- If you `bzero` the memory on allocation, instead pass `Z_ZERO` which can be
optimized away more often than not.
### Considerations for zones
There are several allocation wrappers in XNU, present for various reasons
ranging from additional accounting features (IOKit's `IONew`), conformance to
-langauge requirements (C++ various `new` operators) or organical historical
+language requirements (C++ various `new` operators) or organic historical
reasons.
`zalloc` and `kalloc` are considered the primitive allocation interfaces which
Rank 1: allocates the percpu memory, `percpu_foreach_base` and `percpu_foreach`
become usable.
+Rank 2: sets up static percpu counters.
+
`STARTUP_SUB_LOCKS`
-------------------
- Rank 1: `LCK_MTX_DECLARE`.
-
`STARTUP_SUB_CODESIGNING`
-------------------------
- Rank last: Final IPC initialization.
+`STARTUP_SUB_SYSCTL`
+-------------------------
+
+### Description
+
+Initializes the sysctl kernel subsystem
+
+### Rank usage
+
+- Rank 1: automatic `SYSCTL_NODE` registration.
+- Rank 2: automatic `SYSCTL_OID` registration.
+- Middle: other manual early registrations.
+- Last: registrations of dummy nodes in the constant nodes to allow extension.
+
+
`STARTUP_SUB_EARLY_BOOT`
------------------------
### Rank usage
N/A.
-
-
virtual kern_return_t
Stop(IOService * provider) LOCAL;
+ /*! @function ClientCrashed
+ * @discussion Notification for kernel objects of a client crash.
+ * @param client Attached client.
+ * @param options No options are currently defined.
+ * @return kIOReturnSuccess on success. See IOReturn.h for error codes.
+ */
+ virtual kern_return_t
+ ClientCrashed(IOService * client, uint64_t options);
+
/*!
* @brief Obtain IOKit IORegistryEntryID.
* @param registryEntryID IORegistryEntryID for the IOKit object.
* @field scalarOutput Array of scalars to return to the caller.
* @field scalarOutputCount Count of scalars to return to the caller in scalarOutput.
* @field structureOutput An OSData to be returned to the caller as structure output.
- * A reference will be consumed by the caller. It is an error to set this field if
- * structureOutputDescriptor was passed in
+ * This field should be set by the driver to an OSData object it created with
+ * the data to be returned, and the OSData instance will be released by the OS.
+ * It is an error for the driver to set this field if structureOutputDescriptor was passed in
* @field structureOutputDescriptor A IOMemoryDescriptor specified by the caller for structure output.
* @field structureOutputMaximumSize Maximum size of structure output specified by caller
* or kIOUserClientVariableStructureSize.
// care.
#define kIONVRAMForceSyncNowPropertyKey "IONVRAM-FORCESYNCNOW-PROPERTY"
+// GUID to address variables for the system NVRAM region
+#define kIOKitSystemGUID "40A0DDD2-77F8-4392-B4A3-1E7304206516"
+#define kIOKitSystemGUIDPrefix (kIOKitSystemGUID ":")
+// Internal only key to give access to system region on internal builds
+#define kIONVRAMSystemInternalAllowKey "com.apple.private.iokit.system-nvram-internal-allow"
+
// clientHasPrivilege security token for kIOClientPrivilegeSecureConsoleProcess
typedef struct _IOUCProcessToken {
#define kIOPlatformFunctionHandlerSet "IOPlatformFunctionHandlerSet"
#define kIOPlatformFunctionHandlerMaxBusDelay "IOPlatformFunctionHandlerMaxBusDelay"
-#define kIOPlatformMaxBusDelay "IOPlatformMaxBusDelay"
+#define kIOPlatformMaxBusDelay "IOPlatformMaxBusDelay"
#if defined(__i386__) || defined(__x86_64__)
#define kIOPlatformFunctionHandlerMaxInterruptDelay "IOPlatformFunctionHandlerMaxInterruptDelay"
-#define kIOPlatformMaxInterruptDelay "IOPlatformMaxInterruptDelay"
+#define kIOPlatformMaxInterruptDelay "IOPlatformMaxInterruptDelay"
#endif /* defined(__i386__) || defined(__x86_64__) */
extern void iokit_retain_port( ipc_port_t port );
extern void iokit_release_port( ipc_port_t port );
+extern void iokit_make_port_send( ipc_port_t port );
extern void iokit_release_port_send( ipc_port_t port );
extern void iokit_lock_port(ipc_port_t port);
OSDeclareDefaultStructorsWithDispatch(IOMemoryMap);
#ifdef XNU_KERNEL_PRIVATE
public:
+ IOOptionBits fOptions;
OSPtr<IOMemoryDescriptor> fMemory;
OSPtr<IOMemoryMap> fSuperMap;
mach_vm_size_t fOffset;
mach_vm_size_t fLength;
task_t fAddressTask;
vm_map_t fAddressMap;
- IOOptionBits fOptions;
upl_t fRedirUPL;
- ipc_port_t fRedirEntry;
- IOMemoryDescriptor * fOwner;
uint8_t fUserClientUnmap;
#if IOTRACKING
IOTrackingUser fTracking;
OSDeclareDefaultStructors(IODTNVRAM);
private:
+ friend class IODTNVRAMVariables;
+
IONVRAMController *_nvramController;
OSPtr<const OSSymbol> _registryPropertiesKey;
UInt8 *_nvramImage;
- IOLock *_variableLock;
+ IORWLock *_variableLock;
IOLock *_controllerLock;
UInt32 _commonPartitionOffset;
UInt32 _commonPartitionSize;
IOReturn removePropertyInternal(const OSSymbol *aKey);
IOReturn chooseDictionary(IONVRAMOperation operation, const uuid_t *varGuid,
const char *variableName, OSDictionary **dict) const;
- bool handleSpecialVariables(const char *name, uuid_t *guid, OSObject *obj, IOReturn *error);
+ IOReturn flushDict(const uuid_t *guid, IONVRAMOperation op);
+ bool handleSpecialVariables(const char *name, const uuid_t *guid, const OSObject *obj, IOReturn *error);
+ OSSharedPtr<OSObject> copyPropertyWithGUIDAndName(const uuid_t *guid, const char *name) const;
+ IOReturn removePropertyWithGUIDAndName(const uuid_t *guid, const char *name);
+ IOReturn setPropertyWithGUIDAndName(const uuid_t *guid, const char *name, OSObject *anObject);
public:
virtual bool init(IORegistryEntry *old, const IORegistryPlane *plane) APPLE_KEXT_OVERRIDE;
#include <machine/machine_routines.h>
};
+#include <stdint.h>
#include <IOKit/IOService.h>
/*!
OSDeclareAbstractStructors(IOPMGR);
public:
+ /*!
+ * @function enableCPUCore
+ * @abstract Enable a single CPU core.
+ * @discussion Release a secondary CPU core from reset, and enable
+ * external IRQ delivery to the core. XNU will not
+ * invoke this method on the boot CPU's cpu_id.
+ * @param cpu_id Logical CPU ID of the core.
+ * @param entry_pa Physical address to use as the reset vector on the
+ * secondary CPU. Not all platforms will honor this
+ * parameter; on Apple Silicon RVBAR_EL1 is programmed
+ * by iBoot.
+ */
+ virtual void enableCPUCore(unsigned int cpu_id, uint64_t entry_pa);
+
/*!
* @function enableCPUCore
- * @abstract Enable a single CPU core.
- * @discussion Release a secondary CPU core from reset, and enable
- * external IRQ delivery to the core. XNU will not
- * invoke this method on the boot CPU's cpu_id.
- * @param cpu_id Logical CPU ID of the core.
+ * @abstract Deprecated - Enable a single CPU core.
*/
- virtual void enableCPUCore(unsigned int cpu_id) = 0;
+ virtual void enableCPUCore(unsigned int cpu_id);
/*!
* @function disableCPUCore
kPEPagingOff,
kPEPanicBegin,
kPEPanicEnd,
- kPEPanicRestartCPUNoCallouts
+ kPEPanicRestartCPUNoCallouts,
+ kPEPanicDiagnosticsDone,
};
/* Bitmask of details related to panic callouts */
enum {
kIOSystemShutdownNotificationStageProcessExit = 0,
kIOSystemShutdownNotificationStageRootUnmount = 1,
+ kIOSystemShutdownNotificationTerminateDEXTs = 2,
};
extern void IOSystemShutdownNotification(int stage);
static void initialize();
+ inline static bool
+ isEnabled()
+ {
+ return enabled;
+ }
+
static void onKextLoad(OSKext *kext, kmod_info_t *kmod_info);
static void onKextUnload(OSKext *kext);
static void onClassAdded(OSKext *parentKext, OSMetaClass *metaClass);
void serverRemove(IOUserServer * server);
void serverAck(IOUserServer * server);
bool serverSlept(void);
+void systemHalt(void);
};
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
void setDriverKitUUID(OSKext *kext);
void setCheckInToken(IOUserServerCheckInToken *token);
void systemPower(bool powerOff);
+ void systemHalt(void);
IOReturn setPowerState(unsigned long state, IOService * service) APPLE_KEXT_OVERRIDE;
IOReturn powerStateWillChangeTo(IOPMPowerFlags flags, unsigned long state, IOService * service) APPLE_KEXT_OVERRIDE;
IOReturn powerStateDidChangeTo(IOPMPowerFlags flags, unsigned long state, IOService * service) APPLE_KEXT_OVERRIDE;
kIOPSFamilyCodeExternal4 = iokit_family_err(sub_iokit_pmu, 4),
kIOPSFamilyCodeExternal5 = iokit_family_err(sub_iokit_pmu, 5),
kIOPSFamilyCodeExternal6 = iokit_family_err(sub_iokit_pmu, 6),
+ kIOPSFamilyCodeExternal7 = iokit_family_err(sub_iokit_pmu, 7),
};
// values for kIOPMPSAdapterDetailsErrorFlagsKey
#define kIOPMMessageRequestUserActive \
iokit_family_msg(sub_iokit_powermanagement, 0x460)
-#define kIOPMMessageRequestSystemShutdown \
- iokit_family_msg(sub_iokit_powermanagement, 0x470)
-
/* @enum SystemSleepReasons
* @abstract The potential causes for system sleep as logged in the system event record.
*/
{
kern_return_t kr;
vm_address_t vmaddr = 0;
- int options = 0;// KMA_LOMEM;
kr = kernel_memory_allocate(kernel_map, &vmaddr,
- page_size, 0, options, VM_KERN_MEMORY_IOKIT);
+ page_size, 0, KMA_NONE, VM_KERN_MEMORY_IOKIT);
if (KERN_SUCCESS != kr) {
vmaddr = 0;
OSSharedPtr<const OSSymbol> gIOProbeScoreKey;
OSSharedPtr<const OSSymbol> gIOModuleIdentifierKey;
OSSharedPtr<const OSSymbol> gIOModuleIdentifierKernelKey;
+OSSharedPtr<const OSSymbol> gIOHIDInterfaceClassName;
IORWLock * gIOCatalogLock;
#if PRAGMA_MARK
gIOProbeScoreKey = OSSymbol::withCStringNoCopy( kIOProbeScoreKey );
gIOModuleIdentifierKey = OSSymbol::withCStringNoCopy( kCFBundleIdentifierKey );
gIOModuleIdentifierKernelKey = OSSymbol::withCStringNoCopy( kCFBundleIdentifierKernelKey );
+ gIOHIDInterfaceClassName = OSSymbol::withCStringNoCopy( "IOHIDInterface" );
assert( array && gIOClassKey && gIOProbeScoreKey
{
IOReturn ret;
OSSharedPtr<OSDictionary> dict;
+ OSSharedPtr<OSKext> kext;
bool isLoaded = false;
+ bool isDext = false;
/* Check first if the kext currently has any linkage dependents;
* in such a case the unload would fail so let's not terminate any
goto finish;
}
}
+ kext = OSKext::lookupKextWithIdentifier(moduleName->getCStringNoCopy());
+ if (kext) {
+ isDext = kext->isDriverKit();
+ }
+
dict = OSDictionary::withCapacity(1);
if (!dict) {
ret = kIOReturnNoMemory;
ret = terminateDrivers(dict.get(), NULL);
- /* No goto between IOLock calls!
- */
- IORWLockWrite(lock);
- if (kIOReturnSuccess == ret) {
- ret = _removeDrivers(dict.get());
- }
+ if (isDext) {
+ /* Force rematching after removing personalities. Dexts are never considered to be "loaded" (from OSKext),
+ * so we can't call unloadModule() to remove personalities and start rematching. */
+ removeDrivers(dict.get(), true);
+ } else {
+ /* No goto between IOLock calls!
+ */
+ IORWLockWrite(lock);
+ if (kIOReturnSuccess == ret) {
+ ret = _removeDrivers(dict.get());
+ }
- // Unload the module itself.
- if (unload && isLoaded && ret == kIOReturnSuccess) {
- ret = unloadModule(moduleName);
+ // Unload the module itself.
+ if (unload && isLoaded && ret == kIOReturnSuccess) {
+ ret = unloadModule(moduleName);
+ }
+ IORWLockUnlock(lock);
}
- IORWLockUnlock(lock);
-
finish:
return ret;
}
IOCatalogue::startMatching( const OSSymbol * moduleName )
{
OSSharedPtr<OSOrderedSet> set;
+ OSSharedPtr<OSKext> kext;
+ OSSharedPtr<OSArray> servicesToTerminate;
if (!moduleName) {
return false;
IORWLockRead(lock);
+ kext = OSKext::lookupKextWithIdentifier(moduleName->getCStringNoCopy());
+ if (kext && kext->isDriverKit()) {
+ /* We're here because kernelmanagerd called IOCatalogueModuleLoaded after launching a dext.
+ * Determine what providers the dext would match against. If there's something already attached
+ * to the provider, terminate it.
+ *
+ * This is only safe to do for HID dexts.
+ */
+ OSSharedPtr<OSArray> dextPersonalities = kext->copyPersonalitiesArray();
+
+ if (!dextPersonalities) {
+ return false;
+ }
+
+ servicesToTerminate = OSArray::withCapacity(1);
+ if (!servicesToTerminate) {
+ return false;
+ }
+
+ dextPersonalities->iterateObjects(^bool (OSObject * obj) {
+ OSDictionary * personality = OSDynamicCast(OSDictionary, obj);
+ OSSharedPtr<OSIterator> iter;
+ IOService * provider;
+ OSSharedPtr<IOService> service;
+ const OSSymbol * category;
+
+ if (personality) {
+ category = OSDynamicCast(OSSymbol, personality->getObject(gIOMatchCategoryKey));
+ if (!category) {
+ category = gIODefaultMatchCategoryKey;
+ }
+ iter = IOService::getMatchingServices(personality);
+
+ while (iter && (provider = OSDynamicCast(IOService, iter->getNextObject()))) {
+ if (provider->metaCast(gIOHIDInterfaceClassName.get()) != NULL) {
+ service.reset(provider->copyClientWithCategory(category), OSNoRetain);
+ if (service) {
+ servicesToTerminate->setObject(service);
+ }
+ }
+ }
+ }
+
+ return false;
+ });
+ }
+
personalities->iterateObjects(^bool (const OSSymbol * key, OSObject * value) {
OSArray * array;
OSDictionary * dict;
return false;
});
+ if (servicesToTerminate) {
+ servicesToTerminate->iterateObjects(^bool (OSObject * obj) {
+ IOService * service = OSDynamicCast(IOService, obj);
+ if (service) {
+ IOOptionBits terminateOptions = kIOServiceRequired;
+ if (service->hasUserServer()) {
+ terminateOptions |= kIOServiceTerminateNeedWillTerminate;
+ }
+ if (!service->terminate(terminateOptions)) {
+ IOLog("%s: failed to terminate service %s-0x%qx with options %08llx for new dext %s\n", __FUNCTION__, service->getName(), service->getRegistryEntryID(), (long long)terminateOptions, moduleName->getCStringNoCopy());
+ }
+ }
+ return false;
+ });
+ }
+
// Start device matching.
if (set->getCount() > 0) {
IOService::catalogNewDrivers(set.get());
}
kr = vm_page_alloc_list(state->fCopyPageCount,
- KMA_LOMEM | KMA_NOPAGEWAIT, &mapBase);
+ (kma_flags_t)(KMA_LOMEM | KMA_NOPAGEWAIT), &mapBase);
if (KERN_SUCCESS != kr) {
DEBG("vm_page_alloc_list(%d) failed (%d)\n", state->fCopyPageCount, kr);
mapBase = NULL;
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
SYSCTL_STRING(_kern, OID_AUTO, hibernatefile,
- CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
+ CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
gIOHibernateFilename, sizeof(gIOHibernateFilename), "");
SYSCTL_STRING(_kern, OID_AUTO, bootsignature,
- CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
+ CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
gIOHibernateBootSignature, sizeof(gIOHibernateBootSignature), "");
SYSCTL_UINT(_kern, OID_AUTO, hibernatemode,
- CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
+ CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
&gIOHibernateMode, 0, "");
SYSCTL_STRUCT(_kern, OID_AUTO, hibernatestatistics,
- CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
+ CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED,
&_hibernateStats, hibernate_statistics_t, "");
-SYSCTL_STRING(_kern_bridge, OID_AUTO, bootsessionuuid,
- CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
- gIOHibernateBridgeBootSessionUUIDString, sizeof(gIOHibernateBridgeBootSessionUUIDString), "");
+SYSCTL_OID_MANUAL(_kern_bridge, OID_AUTO, bootsessionuuid,
+ CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
+ gIOHibernateBridgeBootSessionUUIDString, sizeof(gIOHibernateBridgeBootSessionUUIDString),
+ sysctl_handle_string, "A", "");
SYSCTL_UINT(_kern, OID_AUTO, hibernategraphicsready,
- CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_ANYBODY,
+ CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_ANYBODY,
&_hibernateStats.graphicsReadyTime, 0, "");
SYSCTL_UINT(_kern, OID_AUTO, hibernatewakenotification,
- CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_ANYBODY,
+ CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_ANYBODY,
&_hibernateStats.wakeNotificationTime, 0, "");
SYSCTL_UINT(_kern, OID_AUTO, hibernatelockscreenready,
- CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_ANYBODY,
+ CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_ANYBODY,
&_hibernateStats.lockScreenReadyTime, 0, "");
SYSCTL_UINT(_kern, OID_AUTO, hibernatehidready,
- CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_ANYBODY,
+ CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_ANYBODY,
&_hibernateStats.hidReadyTime, 0, "");
SYSCTL_UINT(_kern, OID_AUTO, hibernatecount,
- CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_ANYBODY,
+ CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_ANYBODY,
&gIOHibernateCount, 0, "");
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
gIOHibernateFilename[0] = 0;
}
- sysctl_register_oid(&sysctl__kern_hibernatefile);
- sysctl_register_oid(&sysctl__kern_bootsignature);
- sysctl_register_oid(&sysctl__kern_hibernatemode);
- sysctl_register_oid(&sysctl__kern_hibernatestatistics);
- sysctl_register_oid(&sysctl__kern_hibernategraphicsready);
- sysctl_register_oid(&sysctl__kern_hibernatewakenotification);
- sysctl_register_oid(&sysctl__kern_hibernatelockscreenready);
- sysctl_register_oid(&sysctl__kern_hibernatehidready);
- sysctl_register_oid(&sysctl__kern_hibernatecount);
-
gIOChosenEntry = IORegistryEntry::fromPath("/chosen", gIODTPlane);
if (gIOChosenEntry
}
SYSCTL_PROC(_debug, OID_AUTO, iokit,
- CTLTYPE_QUAD | IODEBUG_CTLFLAGS | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
+ CTLTYPE_QUAD | IODEBUG_CTLFLAGS | CTLFLAG_KERN | CTLFLAG_LOCKED,
&gIOKitDebug, 0, sysctl_debug_iokit, "Q", "boot_arg io");
size_t debug_malloc_size;
extern OSSet * gIORemoveOnReadProperties;
+extern uint32_t gHaltTimeMaxLog;
+extern uint32_t gHaltTimeMaxPanic;
+
extern "C" void IOKitInitializeTime( void );
extern void IOMachPortInitialize(void);
extern const OSSymbol * gIOCreateEFIDevicePathSymbol;
extern "C" void IOSetKeyStoreData(LIBKERN_CONSUMED IOMemoryDescriptor * data);
extern "C" void IOSetAPFSKeyStoreData(LIBKERN_CONSUMED IOMemoryDescriptor* data);
-extern "C" void IOSetARVRootHashData(LIBKERN_CONSUMED IOMemoryDescriptor* arvData);
-extern "C" void IOSetARVManifestData(LIBKERN_CONSUMED IOMemoryDescriptor* arvData);
#endif
extern const OSSymbol * gAKSGetKey;
address = 0; /* overflow detected */
} else if (adjustedSize >= page_size) {
kr = kernel_memory_allocate(kernel_map, &address,
- size, alignMask, 0, IOMemoryTag(kernel_map));
+ size, alignMask, KMA_NONE, IOMemoryTag(kernel_map));
if (KERN_SUCCESS != kr) {
address = 0;
}
if (adjustedSize >= page_size) {
kr = kernel_memory_allocate(kernel_map, &allocationAddress,
- adjustedSize, 0, 0, IOMemoryTag(kernel_map));
+ adjustedSize, 0, KMA_NONE, IOMemoryTag(kernel_map));
if (KERN_SUCCESS != kr) {
allocationAddress = 0;
}
|| (alignment > page_size);
if (contiguous || maxPhys) {
- int options = 0;
+ kma_flags_t options = KMA_NONE;
vm_offset_t virt;
adjustedSize = size;
#endif
if (maxPhys <= 0xFFFFFFFF) {
maxPhys = 0;
- options |= KMA_LOMEM;
+ options = (kma_flags_t)(options | KMA_LOMEM);
} else if (gIOLastPage && (atop_64(maxPhys) > gIOLastPage)) {
maxPhys = 0;
}
}
if (contiguous || maxPhys) {
kr = kmem_alloc_contig(kernel_map, &virt, size,
- alignMask, (ppnum_t) atop(maxPhys), (ppnum_t) atop(alignMask), 0, IOMemoryTag(kernel_map));
+ alignMask, (ppnum_t) atop(maxPhys), (ppnum_t) atop(alignMask),
+ KMA_NONE, IOMemoryTag(kernel_map));
} else {
kr = kernel_memory_allocate(kernel_map, &virt,
size, alignMask, options, IOMemoryTag(kernel_map));
fMemory.reset();
}
- if (fOwner && (fOwner != fMemory)) {
- LOCK;
- fOwner->removeMapping(this);
- UNLOCK;
- }
-
if (fSuperMap) {
fSuperMap.reset();
}
#include <kern/debug.h>
#include <pexpert/boot.h>
#include <pexpert/pexpert.h>
+#include <sys/csr.h>
#define super IOService
// From Apple CHRP Spec
#define NVRAM_CHRP_SIG_SYSTEM 0x70
#define NVRAM_CHRP_SIG_CONFIG 0x71
-#define NVRAM_CHRP_SIG_FREESPACE 0x7F
-#define NVRAM_CHRP_PARTITION_NAME_COMMON "common"
-#define NVRAM_CHRP_PARTITION_NAME_SYSTEM "system"
-#define NVRAM_CHRP_PARTITION_NAME_SYSTEM_LEGACY "secure"
-#define NVRAM_CHRP_PARTITION_NAME_FREESPACE "\x77\x77\x77\x77\x77\x77\x77\x77\x77\x77\x77\x77"
+#define NVRAM_CHRP_PARTITION_NAME_COMMON_V1 "common"
+#define NVRAM_CHRP_PARTITION_NAME_SYSTEM_V1 "system"
+#define NVRAM_CHRP_PARTITION_NAME_COMMON_V2 "2common"
+#define NVRAM_CHRP_PARTITION_NAME_SYSTEM_V2 "2system"
#define NVRAM_CHRP_LENGTH_BLOCK_SIZE 0x10 // CHRP length field is in 16 byte blocks
IOLockUnlock(_controllerLock); \
})
-#define NVRAMLOCK() \
-({ \
+#define NVRAMREADLOCK() \
+({ \
if (preemption_enabled() && !panic_active()) \
- IOLockLock(_variableLock); \
+ IORWLockRead(_variableLock); \
})
-#define NVRAMUNLOCK() \
-({ \
+#define NVRAMWRITELOCK() \
+({ \
if (preemption_enabled() && !panic_active()) \
- IOLockUnlock(_variableLock); \
+ IORWLockWrite(_variableLock); \
})
-#define NVRAMLOCKASSERT() \
-({ \
- if (preemption_enabled() && !panic_active()) \
- IOLockAssert(_variableLock, kIOLockAssertOwned); \
+#define NVRAMUNLOCK() \
+({ \
+ if (preemption_enabled() && !panic_active()) \
+ IORWLockUnlock(_variableLock); \
+})
+
+#define NVRAMLOCKASSERTHELD() \
+({ \
+ if (preemption_enabled() && !panic_active()) \
+ IORWLockAssert(_variableLock, kIORWLockAssertHeld); \
})
+#define NVRAMLOCKASSERTEXCLUSIVE() \
+({ \
+ if (preemption_enabled() && !panic_active()) \
+ IORWLockAssert(_variableLock, kIORWLockAssertWrite); \
+})
+
+enum NVRAMPartitionType {
+ kIONVRAMPartitionSystem,
+ kIONVRAMPartitionCommon
+};
+
typedef struct {
- const char *name;
+ NVRAMPartitionType type;
UInt32 offset;
UInt32 size;
OSSharedPtr<OSDictionary> &dict;
UUID_DEFINE(gAppleNVRAMGuid, 0x7C, 0x43, 0x61, 0x10, 0xAB, 0x2A, 0x4B, 0xBB, 0xA8, 0x80, 0xFE, 0x41, 0x99, 0x5C, 0x9F, 0x82);
static bool gNVRAMLogging = false;
+static bool gInternalBuild = false;
// allowlist variables from macboot that need to be set/get from system region if present
static const char * const gNVRAMSystemList[] = {
- "adbe-tunable",
- "adbe-tunables",
- "adfe-tunables",
- "alamo-path",
- "alt-boot-volume",
- "ASMB",
- "atc0",
- "atc1",
+ "allow-root-hash-mismatch",
"auto-boot",
"auto-boot-halt-stage",
- "auto-boot-once",
- "auto-boot-usb",
- "auxkc-path",
- "backlight-level",
- "backlight-nits",
"base-system-path",
"boot-args",
- "boot-breadcrumbs",
"boot-command",
- "boot-device",
"boot-image",
- "boot-partition",
- "boot-path",
- "boot-ramdisk",
- "boot-script",
- "boot-volume",
"bootdelay",
- "bt1addr",
- "btaddr",
- "cam-use-ext-ldo",
- "CLCG_override",
"com.apple.System.boot-nonce",
- "com.apple.System.rtc-offset",
- "com.apple.System.tz0-size",
- "core-bin-offset",
- "cpu-bin-offset",
"darkboot",
- "DClr_override",
- "dcp-auto-boot",
- "debug-gg",
- "debug-soc",
- "debug-uarts",
- "diags-path",
- "disable-boot-wdt",
- "display-color-space",
- "display-timing",
- "display-vsh-comp",
- "dpcd-max-brightness",
- "dtdump",
- "dtdump-path",
- "e75",
"emu",
- "enable-auth-debug",
- "enable-jop",
- "enable-marconi",
- "enable-upgrade-fallback",
- "enforce-iuob",
- "eth1addr",
- "ethaddr",
- "failboot-breadcrumbs",
- "fixed-lcm-boost",
- "force-ctrr-lock",
- "force-upgrade-fail",
- "fuos-path",
- "hib-ui-force",
- "hibhack-test-hmac",
- "iboot-data",
- "iboot-failure-reason",
- "iboot-failure-reason-str",
- "iboot-failure-volume",
- "iboot1-precommitted",
- "idle-off",
- "is-tethered",
- "kaslr-off",
- "kaslr-slide",
- "kis-rsm",
- "knobs",
- "loadaddr",
- "memmapdump",
- "mipi-bridge-cmd-verify",
- "mipi-bridge-poll-cmd-fifo",
- "no-ctrr",
- "one-time-boot-command",
- "osenvironment",
- "ota-breadcrumbs",
- "ota-outcome",
- "panicmedic",
- "panicmedic-threshold",
- "panicmedic-timestamps",
- "phleet-path",
- "pinot-panel-id",
- "pintoaddr",
+ "one-time-boot-command", // Needed for diags customer install flows
"policy-nonce-digests",
- "preserve-debuggability",
"prevent-restores", // Keep for factory <rdar://problem/70476321>
"prev-lang:kbd",
- "ramrod-kickstart-aces",
- "rbdaddr0",
- "rbm-path",
- "reconfig-behavior",
- "reconfig-breakpoints",
- "recovery-boot-mode",
- "recovery-breadcrumbs",
- "restored-host-timeout",
"root-live-fs",
- "rtos-path",
- "soc-bin-offset",
- "StartupMute",
- "StartupMuteAccessibility",
- "storage-prev-assert",
- "storage-prev-assert-stored",
- "summit-panel-id",
"SystemAudioVolume",
"SystemAudioVolumeExtension",
"SystemAudioVolumeSaved",
- "tz0-size-override",
- "upgrade-fallback-boot-command",
- "upgrade-retry",
- "usb-enabled",
- "wifi1addr",
- "wifiaddr",
nullptr
};
.p.Bits.NeverAllowedToDelete = 1},
{"boot-image", .p.Bits.UserWrite = 1},
{"com.apple.System.fp-state", .p.Bits.KernelOnly = 1},
- {"policy-nonce-digests", .p.Bits.ResetNVRAMOnlyDelete = 1},
+ {"policy-nonce-digests", .p.Bits.ResetNVRAMOnlyDelete = 1}, // Deleting this via user triggered obliterate leave J273a unable to boot
{"security-password", .p.Bits.RootRequired = 1},
#if !defined(__x86_64__)
{"acc-cm-override-count", .p.Bits.KernelOnly = 1},
{"acc-mb-ld-lifetime", .p.Bits.KernelOnly = 1},
{"backlight-level", .p.Bits.UserWrite = 1},
+ {"backlight-nits", .p.Bits.UserWrite = 1},
{"com.apple.System.boot-nonce", .p.Bits.KernelOnly = 1},
{"com.apple.System.sep.art", .p.Bits.KernelOnly = 1},
{"darkboot", .p.Bits.UserWrite = 1},
return true;
}
+#if defined(DEBUG) || defined(DEVELOPMENT)
+static const char *
+getNVRAMOpString(IONVRAMOperation op)
+{
+ switch (op) {
+ case kIONVRAMOperationRead:
+ return "Read";
+ case kIONVRAMOperationWrite:
+ return "Write";
+ case kIONVRAMOperationDelete:
+ return "Delete";
+ case kIONVRAMOperationObliterate:
+ return "Obliterate";
+ case kIONVRAMOperationReset:
+ return "Reset";
+ default:
+ return "Unknown";
+ }
+}
+#endif
+
static bool
verifyPermission(IONVRAMOperation op, const uuid_t *varGuid, const char *varName)
{
VariablePermission perm;
- bool kernel, admin, writeEntitled, readEntitled, allowList, systemGuid, systemEntitled;
+ bool kernel, admin, writeEntitled, readEntitled, allowList, systemGuid, systemEntitled, systemInternalEntitled, systemAllow;
+ bool ok = false;
perm = getVariablePermission(varName);
if (perm.Bits.KernelOnly) {
DEBUG_INFO("KernelOnly access for %s, kernel=%d\n", varName, kernel);
- return kernel;
+ ok = kernel;
+ goto exit;
}
- allowList = variableInAllowList(varName);
- systemGuid = uuid_compare(*varGuid, gAppleSystemVariableGuid) == 0;
- admin = IOUserClient::clientHasPrivilege(current_task(), kIONVRAMPrivilege) == kIOReturnSuccess;
- writeEntitled = IOTaskHasEntitlement(current_task(), kIONVRAMWriteAccessKey);
- readEntitled = IOTaskHasEntitlement(current_task(), kIONVRAMReadAccessKey);
- systemEntitled = IOTaskHasEntitlement(current_task(), kIONVRAMSystemAllowKey) || kernel;
+ allowList = variableInAllowList(varName);
+ systemGuid = uuid_compare(*varGuid, gAppleSystemVariableGuid) == 0;
+ admin = IOUserClient::clientHasPrivilege(current_task(), kIONVRAMPrivilege) == kIOReturnSuccess;
+ writeEntitled = IOTaskHasEntitlement(current_task(), kIONVRAMWriteAccessKey);
+ readEntitled = IOTaskHasEntitlement(current_task(), kIONVRAMReadAccessKey);
+ systemEntitled = IOTaskHasEntitlement(current_task(), kIONVRAMSystemAllowKey);
+ systemInternalEntitled = IOTaskHasEntitlement(current_task(), kIONVRAMSystemInternalAllowKey);
+
+ systemAllow = systemEntitled || (systemInternalEntitled && gInternalBuild) || kernel;
switch (op) {
case kIONVRAMOperationRead:
if (kernel || admin || readEntitled || perm.Bits.FullAccess) {
- return true;
+ ok = true;
}
break;
if (kernel || perm.Bits.UserWrite || admin || writeEntitled) {
if (systemGuid) {
if (allowList) {
- if (!systemEntitled) {
+ if (!systemAllow) {
DEBUG_ERROR("Allowed write to system region when NOT entitled for %s\n", varName);
}
- } else if (!systemEntitled) {
+ } else if (!systemAllow) {
DEBUG_ERROR("Not entitled for system region writes for %s\n", varName);
break;
}
}
- return true;
+ ok = true;
}
break;
} else if ((op == kIONVRAMOperationObliterate) && perm.Bits.ResetNVRAMOnlyDelete) {
DEBUG_INFO("Not allowed to obliterate %s\n", varName);
break;
+ } else if ((op == kIONVRAMOperationDelete) && perm.Bits.ResetNVRAMOnlyDelete) {
+ DEBUG_INFO("Only allowed to delete %s via NVRAM reset\n", varName);
+ break;
}
if (kernel || perm.Bits.UserWrite || admin || writeEntitled) {
if (systemGuid) {
if (allowList) {
- if (!systemEntitled) {
+ if (!systemAllow) {
DEBUG_ERROR("Allowed delete to system region when NOT entitled for %s\n", varName);
}
- } else if (!systemEntitled) {
+ } else if (!systemAllow) {
DEBUG_ERROR("Not entitled for system region deletes for %s\n", varName);
break;
}
}
- return true;
+ ok = true;
}
break;
}
- DEBUG_INFO("Permission for %s denied, kernel=%d, admin=%d, writeEntitled=%d, readEntitled=%d, systemGuid=%d, systemEntitled=%d\n",
- varName, kernel, admin, writeEntitled, readEntitled, systemGuid, systemEntitled);
- return false;
+exit:
+ DEBUG_INFO("Permission for %s of %s %s: kernel=%d, admin=%d, writeEntitled=%d, readEntitled=%d, systemGuid=%d, systemEntitled=%d, systemInternalEntitled=%d, UserWrite=%d\n",
+ getNVRAMOpString(op), varName, ok ? "granted" : "denied", kernel, admin, writeEntitled, readEntitled, systemGuid, systemEntitled, systemInternalEntitled, perm.Bits.UserWrite);
+ return ok;
}
static bool
{
uuid_string_t temp = {0};
size_t keyLen = strlen(key);
- bool result = false;
+ bool ok = false;
const char *name = key;
uuid_t guid;
if ((uuid_parse(temp, guid) == 0) &&
(key[sizeof(temp) - 1] == ':')) {
name = key + sizeof(temp);
- result = true;
+ ok = true;
}
}
if (guidResult) {
- result ? uuid_copy(*guidResult, guid) : uuid_copy(*guidResult, gAppleNVRAMGuid);
+ ok ? uuid_copy(*guidResult, guid) : uuid_copy(*guidResult, gAppleNVRAMGuid);
}
if (nameResult) {
*nameResult = name;
return false;
}
+static bool
+skipKey(const OSSymbol *aKey)
+{
+ return aKey->isEqualTo(kIOClassNameOverrideKey) ||
+ aKey->isEqualTo(kIOBSDNameKey) ||
+ aKey->isEqualTo(kIOBSDNamesKey) ||
+ aKey->isEqualTo(kIOBSDMajorKey) ||
+ aKey->isEqualTo(kIOBSDMinorKey) ||
+ aKey->isEqualTo(kIOBSDUnitKey);
+}
+
+// ************************** IODTNVRAMVariables ****************************
+
// private IOService based class for publishing distinct dictionary properties on
// for easy ioreg access since the serializeProperties call is overloaded and is used
// as variable access
OSDeclareDefaultStructors(IODTNVRAMVariables)
private:
IODTNVRAM *_provider;
- OSDictionary *_properties;
+ OSDictionary *_variables;
uuid_t _guid;
public:
- bool init(const uuid_t *guid);
- virtual bool start(IOService * provider) APPLE_KEXT_OVERRIDE;
- virtual IOReturn setProperties(OSObject * properties) APPLE_KEXT_OVERRIDE;
- virtual bool serializeProperties(OSSerialize *s) const APPLE_KEXT_OVERRIDE;
+ bool init(const uuid_t *guid);
+ virtual bool start(IOService * provider) APPLE_KEXT_OVERRIDE;
+ virtual IOReturn setVariables(OSObject * properties);
+
+ virtual bool serializeProperties(OSSerialize *s) const APPLE_KEXT_OVERRIDE;
+ virtual OSPtr<OSObject> copyProperty(const OSSymbol *aKey) const APPLE_KEXT_OVERRIDE;
+ virtual OSObject *getProperty(const OSSymbol *aKey) const APPLE_KEXT_OVERRIDE;
+ virtual bool setProperty(const OSSymbol *aKey, OSObject *anObject) APPLE_KEXT_OVERRIDE;
+ virtual IOReturn setProperties(OSObject *properties) APPLE_KEXT_OVERRIDE;
+ virtual void removeProperty(const OSSymbol *aKey) APPLE_KEXT_OVERRIDE;
};
OSDefineMetaClassAndStructors(IODTNVRAMVariables, IOService)
bool
IODTNVRAMVariables::init(const uuid_t *guid)
{
- require(super::init(), error);
- require(guid, error);
+ if (!super::init()) {
+ return false;
+ }
+
+ if (guid == nullptr) {
+ return false;
+ }
uuid_copy(_guid, *guid);
return true;
-
-error:
- return false;
}
bool
IODTNVRAMVariables::start(IOService * provider)
{
- require(IOService::start(provider), error);
+ if (!IOService::start(provider)) {
+ goto error;
+ }
- require(_provider = OSDynamicCast(IODTNVRAM, provider), error);
+ _provider = OSDynamicCast(IODTNVRAM, provider);
+ if (_provider == nullptr) {
+ goto error;
+ }
registerService();
}
IOReturn
-IODTNVRAMVariables::setProperties(OSObject * properties)
+IODTNVRAMVariables::setVariables(OSObject * variables)
{
- if (OSDynamicCast(OSDictionary, properties)) {
- OSSafeReleaseNULL(_properties);
- _properties = OSDynamicCast(OSDictionary, properties);
- properties->retain();
+ if (OSDynamicCast(OSDictionary, variables)) {
+ OSSafeReleaseNULL(_variables);
+ _variables = OSDynamicCast(OSDictionary, variables);
+ variables->retain();
}
- return IOService::setProperties(properties);
+ return kIOReturnSuccess;
}
bool
const OSSymbol *key;
OSSharedPtr<OSDictionary> dict;
OSSharedPtr<OSCollectionIterator> iter;
- OSSharedPtr<OSDictionary> localProperties(_properties, OSRetain);
- bool result = false;
+ OSSharedPtr<OSDictionary> localVariables(_variables, OSRetain);
+ bool ok = false;
- require(localProperties != nullptr, exit);
+ if (localVariables == nullptr) {
+ goto exit;
+ }
- dict = OSDictionary::withCapacity(localProperties->getCount());
- require_action(dict, exit, DEBUG_ERROR("No dictionary\n"));
+ dict = OSDictionary::withCapacity(localVariables->getCount());
+ if (dict == nullptr) {
+ DEBUG_ERROR("No dictionary\n");
+ goto exit;
+ }
- iter = OSCollectionIterator::withCollection(localProperties.get());
- require_action(iter, exit, DEBUG_ERROR("failed to create iterator\n"));
+ iter = OSCollectionIterator::withCollection(localVariables.get());
+ if (iter == nullptr) {
+ DEBUG_ERROR("failed to create iterator\n");
+ goto exit;
+ }
while ((key = OSDynamicCast(OSSymbol, iter->getNextObject()))) {
if (verifyPermission(kIONVRAMOperationRead, &_guid, key)) {
- dict->setObject(key, localProperties->getObject(key));
+ dict->setObject(key, localVariables->getObject(key));
}
}
- result = dict->serialize(s);
+ ok = dict->serialize(s);
exit:
- DEBUG_INFO("result=%d\n", result);
- return result;
+ DEBUG_INFO("ok=%d\n", ok);
+ return ok;
+}
+
+OSPtr<OSObject>
+IODTNVRAMVariables::copyProperty(const OSSymbol *aKey) const
+{
+ if (_provider && !skipKey(aKey)) {
+ DEBUG_INFO("aKey=%s\n", aKey->getCStringNoCopy());
+
+ return _provider->copyPropertyWithGUIDAndName(&_guid, aKey->getCStringNoCopy());
+ } else {
+ return nullptr;
+ }
}
+OSObject *
+IODTNVRAMVariables::getProperty(const OSSymbol *aKey) const
+{
+ OSSharedPtr<OSObject> theObject = copyProperty(aKey);
+
+ return theObject.get();
+}
+
+bool
+IODTNVRAMVariables::setProperty(const OSSymbol *aKey, OSObject *anObject)
+{
+ if (_provider) {
+ return _provider->setPropertyWithGUIDAndName(&_guid, aKey->getCStringNoCopy(), anObject);
+ } else {
+ return false;
+ }
+}
+
+IOReturn
+IODTNVRAMVariables::setProperties(OSObject *properties)
+{
+ IOReturn ret = kIOReturnSuccess;
+ OSObject *object;
+ const OSSymbol *key;
+ OSDictionary *dict;
+ OSSharedPtr<OSCollectionIterator> iter;
+
+ if (_provider) {
+ dict = OSDynamicCast(OSDictionary, properties);
+ if (dict == nullptr) {
+ DEBUG_ERROR("Not a dictionary\n");
+ return kIOReturnBadArgument;
+ }
+
+ iter = OSCollectionIterator::withCollection(dict);
+ if (iter == nullptr) {
+ DEBUG_ERROR("Couldn't create iterator\n");
+ return kIOReturnBadArgument;
+ }
+
+ while (ret == kIOReturnSuccess) {
+ key = OSDynamicCast(OSSymbol, iter->getNextObject());
+ if (key == nullptr) {
+ break;
+ }
+
+ object = dict->getObject(key);
+ if (object == nullptr) {
+ continue;
+ }
+
+ ret = setProperty(key, object);
+ }
+ } else {
+ ret = kIOReturnNotReady;
+ }
+
+ DEBUG_INFO("ret=%#08x\n", ret);
+
+ return ret;
+}
+
+void
+IODTNVRAMVariables::removeProperty(const OSSymbol *aKey)
+{
+ if (_provider) {
+ _provider->removePropertyWithGUIDAndName(&_guid, aKey->getCStringNoCopy());
+ }
+}
+
+
+// **************************** IODTNVRAM *********************************
+
bool
IODTNVRAM::init(IORegistryEntry *old, const IORegistryPlane *plane)
{
return false;
}
- _variableLock = IOLockAlloc();
+ PE_parse_boot_argn("nvram-log", &gNVRAMLogging, sizeof(gNVRAMLogging));
+
+#if XNU_TARGET_OS_OSX
+#if CONFIG_CSR
+ gInternalBuild = (csr_check(CSR_ALLOW_APPLE_INTERNAL) == 0);
+#endif // CONFIG_CSR
+#endif // XNU_TARGET_OS_OSX
+
+ DEBUG_INFO("gInternalBuild = %d\n", gInternalBuild);
+
+ _variableLock = IORWLockAlloc();
if (!_variableLock) {
return false;
}
return false;
}
- PE_parse_boot_argn("nvram-log", &gNVRAMLogging, sizeof(gNVRAMLogging));
-
dict = OSDictionary::withCapacity(1);
if (dict == nullptr) {
return false;
DEBUG_INFO("setting controller\n");
+ CONTROLLERLOCK();
_nvramController = nvram;
+ CONTROLLERUNLOCK();
// <rdar://problem/9529235> race condition possible between
// IODTNVRAM and IONVRAMController (restore loses boot-args)
if (!_isProxied) {
- DEBUG_INFO("Proxied NVRAM data\n");
+ DEBUG_INFO("Reading non-proxied NVRAM data\n");
_nvramController->read(0, _nvramImage, _nvramSize);
initNVRAMImage();
}
no_common:
ret = serializeVariables();
- DEBUG_INFO("serializeVariables ret=0x%08x\n", ret);
+ DEBUG_INFO("serializeVariables ret=%#08x\n", ret);
}
void
while (currentOffset < _nvramSize) {
bool common_partition;
bool system_partition;
-
chrp_nvram_header_t * header = (chrp_nvram_header_t *)(_nvramImage + currentOffset);
+ const uint8_t common_v1_name[sizeof(header->name)] = {NVRAM_CHRP_PARTITION_NAME_COMMON_V1};
+ const uint8_t common_v2_name[sizeof(header->name)] = {NVRAM_CHRP_PARTITION_NAME_COMMON_V2};
+ const uint8_t system_v1_name[sizeof(header->name)] = {NVRAM_CHRP_PARTITION_NAME_SYSTEM_V1};
+ const uint8_t system_v2_name[sizeof(header->name)] = {NVRAM_CHRP_PARTITION_NAME_SYSTEM_V2};
currentLength = header->len * NVRAM_CHRP_LENGTH_BLOCK_SIZE;
break;
}
- common_partition = memcmp(header->name, NVRAM_CHRP_PARTITION_NAME_COMMON, strlen(NVRAM_CHRP_PARTITION_NAME_COMMON)) == 0;
- system_partition = (memcmp(header->name, NVRAM_CHRP_PARTITION_NAME_SYSTEM, strlen(NVRAM_CHRP_PARTITION_NAME_SYSTEM)) == 0) ||
- (memcmp(header->name, NVRAM_CHRP_PARTITION_NAME_SYSTEM_LEGACY, strlen(NVRAM_CHRP_PARTITION_NAME_SYSTEM_LEGACY)) == 0);
+ common_partition = (memcmp(header->name, common_v1_name, sizeof(header->name)) == 0) ||
+ (memcmp(header->name, common_v2_name, sizeof(header->name)) == 0);
+ system_partition = (memcmp(header->name, system_v1_name, sizeof(header->name)) == 0) ||
+ (memcmp(header->name, system_v2_name, sizeof(header->name)) == 0);
if (common_partition) {
_commonPartitionOffset = partitionOffset;
OSSharedPtr<OSNumber> partitionOffsetNumber, partitionLengthNumber;
// Construct the partition ID from the signature and name.
- snprintf(partitionID, sizeof(partitionID), "0x%02x,", header->sig);
- strncpy(partitionID + 5, header->name, sizeof(header->name));
+ snprintf(partitionID, sizeof(partitionID), "%#02x,", header->sig);
+ memcpy(partitionID + 5, header->name, sizeof(header->name));
partitionID[17] = '\0';
partitionOffsetNumber = OSNumber::withNumber(partitionOffset, 32);
_systemImage = _nvramImage + _systemPartitionOffset;
}
- DEBUG_ALWAYS("NVRAM : ofPartitionOffset - 0x%x, ofPartitionSize - 0x%x, systemPartitionOffset - 0x%x, systemPartitionSize - 0x%x\n",
+ DEBUG_ALWAYS("NVRAM : commonPartitionOffset - %#x, commonPartitionSize - %#x, systemPartitionOffset - %#x, systemPartitionSize - %#x\n",
(unsigned int) _commonPartitionOffset, (unsigned int) _commonPartitionSize, (unsigned int) _systemPartitionOffset, (unsigned int) _systemPartitionSize);
_lastDeviceSync = 0;
const OSSymbol *key;
OSSharedPtr<OSDictionary> systemDict, commonDict, dict;
OSSharedPtr<OSCollectionIterator> iter;
- bool result = false;
+ bool ok = false;
unsigned int totalCapacity = 0;
- NVRAMLOCK();
+ NVRAMREADLOCK();
if (_commonDict) {
commonDict = OSDictionary::withDictionary(_commonDict.get());
}
}
}
- result = dict->serialize(s);
+ ok = dict->serialize(s);
exit:
- DEBUG_INFO("result=%d\n", result);
+ DEBUG_INFO("ok=%d\n", ok);
- return result;
+ return ok;
}
IOReturn
DEBUG_INFO("Using common dictionary\n");
*dict = _commonDict.get();
}
- } else {
+ return kIOReturnSuccess;
+ } else if (_commonDict != nullptr) {
DEBUG_INFO("Defaulting to common dictionary\n");
*dict = _commonDict.get();
+ return kIOReturnSuccess;
}
- return kIOReturnSuccess;
+ return kIOReturnNotFound;
}
-bool
-IODTNVRAM::handleSpecialVariables(const char *name, uuid_t *guid, OSObject *obj, IOReturn *error)
+IOReturn
+IODTNVRAM::flushDict(const uuid_t *guid, IONVRAMOperation op)
{
IOReturn err = kIOReturnSuccess;
- bool special = false;
- NVRAMLOCKASSERT();
+ if ((_systemDict != nullptr) && (uuid_compare(*guid, gAppleSystemVariableGuid) == 0)) {
+ const OSSymbol *key;
+ OSSharedPtr<OSDictionary> newDict;
+ OSSharedPtr<OSCollectionIterator> iter;
- if (strcmp(name, "ResetNVRam") == 0) {
- DEBUG_INFO("%s requested\n", name);
+ newDict = OSDictionary::withCapacity(_systemDict->getCapacity());
+ iter = OSCollectionIterator::withCollection(_systemDict.get());
+ if ((newDict == nullptr) || (iter == nullptr)) {
+ err = kIOReturnNoMemory;
+ goto exit;
+ }
- if (uuid_compare(*guid, gAppleSystemVariableGuid) == 0) {
- if (_systemDict != nullptr) {
- _systemDict->flushCollection();
+ while ((key = OSDynamicCast(OSSymbol, iter->getNextObject()))) {
+ if (!verifyPermission(op, &gAppleSystemVariableGuid, key)) {
+ newDict->setObject(key, _systemDict->getObject(key));
}
-
- _commonDict->flushCollection();
- DEBUG_INFO("system & common dictionary flushed\n");
}
- special = true;
- } else if (strcmp(name, "ObliterateNVRam") == 0) {
- DEBUG_INFO("%s requested\n", name);
+ _systemDict = newDict;
- if ((_systemDict != nullptr) && (uuid_compare(*guid, gAppleSystemVariableGuid) == 0)) {
- const OSSymbol *key;
- OSSharedPtr<OSDictionary> newDict;
- OSSharedPtr<OSCollectionIterator> iter;
+ DEBUG_INFO("system dictionary flushed\n");
+ } else if ((_commonDict != nullptr) && (uuid_compare(*guid, gAppleNVRAMGuid) == 0)) {
+ const OSSymbol *key;
+ OSSharedPtr<OSDictionary> newDict;
+ OSSharedPtr<OSCollectionIterator> iter;
- newDict = OSDictionary::withCapacity(_systemDict->getCapacity());
- iter = OSCollectionIterator::withCollection(newDict.get());
- if ((newDict == nullptr) || (iter == nullptr)) {
- err = kIOReturnNoMemory;
- goto exit;
- }
-
- while ((key = OSDynamicCast(OSSymbol, iter->getNextObject()))) {
- const OSSymbol *key = OSDynamicCast(OSSymbol, iter->getNextObject());
- if (key == nullptr) {
- err = kIOReturnNoMemory;
- goto exit;
- }
+ newDict = OSDictionary::withCapacity(_commonDict->getCapacity());
+ iter = OSCollectionIterator::withCollection(_commonDict.get());
+ if ((newDict == nullptr) || (iter == nullptr)) {
+ err = kIOReturnNoMemory;
+ goto exit;
+ }
- if (!verifyPermission(kIONVRAMOperationObliterate, &gAppleSystemVariableGuid, key)) {
- newDict->setObject(key, _systemDict->getObject(key));
- }
+ while ((key = OSDynamicCast(OSSymbol, iter->getNextObject()))) {
+ if (!verifyPermission(op, &gAppleNVRAMGuid, key)) {
+ newDict->setObject(key, _commonDict->getObject(key));
}
+ }
- _systemDict = newDict;
+ _commonDict = newDict;
- DEBUG_INFO("system dictionary flushed\n");
- } else if (_commonDict != nullptr) {
- const OSSymbol *key;
- OSSharedPtr<OSDictionary> newDict;
- OSSharedPtr<OSCollectionIterator> iter;
+ DEBUG_INFO("common dictionary flushed\n");
+ }
- newDict = OSDictionary::withCapacity(_commonDict->getCapacity());
- iter = OSCollectionIterator::withCollection(newDict.get());
- if ((newDict == nullptr) || (iter == nullptr)) {
- err = kIOReturnNoMemory;
- goto exit;
- }
+exit:
+ return err;
+}
- while ((key = OSDynamicCast(OSSymbol, iter->getNextObject()))) {
- if (!verifyPermission(kIONVRAMOperationObliterate, &gAppleNVRAMGuid, key)) {
- newDict->setObject(key, _commonDict->getObject(key));
- }
- }
+bool
+IODTNVRAM::handleSpecialVariables(const char *name, const uuid_t *guid, const OSObject *obj, IOReturn *error)
+{
+ IOReturn err = kIOReturnSuccess;
+ bool special = false;
- _commonDict = newDict;
+ NVRAMLOCKASSERTEXCLUSIVE();
- DEBUG_INFO("common dictionary flushed\n");
+ // ResetNVRam flushes both regions in one call
+ // Obliterate can flush either separately
+ if (strcmp(name, "ObliterateNVRam") == 0) {
+ err = flushDict(guid, kIONVRAMOperationObliterate);
+ } else if (strcmp(name, "ResetNVRam") == 0) {
+ err = flushDict(&gAppleSystemVariableGuid, kIONVRAMOperationReset);
+
+ if (err != kIOReturnSuccess) {
+ goto exit;
}
- special = true;
+ err = flushDict(&gAppleNVRAMGuid, kIONVRAMOperationReset);
}
exit:
}
OSSharedPtr<OSObject>
-IODTNVRAM::copyProperty(const OSSymbol *aKey) const
+IODTNVRAM::copyPropertyWithGUIDAndName(const uuid_t *guid, const char *name) const
{
IOReturn result;
- const char *variableName;
- uuid_t varGuid;
OSDictionary *dict;
OSSharedPtr<OSObject> theObject = nullptr;
- if (aKey->isEqualTo(kIOBSDNameKey) ||
- aKey->isEqualTo(kIOBSDNamesKey) ||
- aKey->isEqualTo(kIOBSDMajorKey) ||
- aKey->isEqualTo(kIOBSDMinorKey) ||
- aKey->isEqualTo(kIOBSDUnitKey)) {
- // These will never match.
- // Check here and exit to avoid logging spam
- return nullptr;
- }
- DEBUG_INFO("aKey=%s\n", aKey->getCStringNoCopy());
-
- parseVariableName(aKey->getCStringNoCopy(), &varGuid, &variableName);
-
- result = chooseDictionary(kIONVRAMOperationRead, &varGuid, variableName, &dict);
+ result = chooseDictionary(kIONVRAMOperationRead, guid, name, &dict);
if (result != kIOReturnSuccess) {
+ DEBUG_INFO("No dictionary\n");
goto exit;
}
- if (!verifyPermission(kIONVRAMOperationRead, &varGuid, variableName)) {
+ if (!verifyPermission(kIONVRAMOperationRead, guid, name)) {
DEBUG_INFO("Not privileged\n");
goto exit;
}
- NVRAMLOCK();
- theObject.reset(dict->getObject(variableName), OSRetain);
+ NVRAMREADLOCK();
+ theObject.reset(dict->getObject(name), OSRetain);
NVRAMUNLOCK();
if (theObject != nullptr) {
return theObject;
}
+OSSharedPtr<OSObject>
+IODTNVRAM::copyProperty(const OSSymbol *aKey) const
+{
+ const char *variableName;
+ uuid_t varGuid;
+
+ if (skipKey(aKey)) {
+ return nullptr;
+ }
+ DEBUG_INFO("aKey=%s\n", aKey->getCStringNoCopy());
+
+ parseVariableName(aKey->getCStringNoCopy(), &varGuid, &variableName);
+
+ return copyPropertyWithGUIDAndName(&varGuid, variableName);
+}
+
OSSharedPtr<OSObject>
IODTNVRAM::copyProperty(const char *aKey) const
{
}
IOReturn
-IODTNVRAM::setPropertyInternal(const OSSymbol *aKey, OSObject *anObject)
+IODTNVRAM::setPropertyWithGUIDAndName(const uuid_t *guid, const char *name, OSObject *anObject)
{
- IOReturn result = kIOReturnSuccess;
+ IOReturn ret = kIOReturnSuccess;
bool remove = false;
OSString *tmpString = nullptr;
OSSharedPtr<OSObject> propObject, oldObject;
OSSharedPtr<OSObject> sharedObject(anObject, OSRetain);
- const char *variableName;
- uuid_t varGuid;
OSDictionary *dict;
bool deletePropertyKey, syncNowPropertyKey, forceSyncNowPropertyKey;
bool ok;
size_t propDataSize = 0;
- DEBUG_INFO("aKey=%s\n", aKey->getCStringNoCopy());
-
- parseVariableName(aKey->getCStringNoCopy(), &varGuid, &variableName);
- deletePropertyKey = strncmp(variableName, kIONVRAMDeletePropertyKey, sizeof(kIONVRAMDeletePropertyKey)) == 0;
- syncNowPropertyKey = strncmp(variableName, kIONVRAMSyncNowPropertyKey, sizeof(kIONVRAMSyncNowPropertyKey)) == 0;
- forceSyncNowPropertyKey = strncmp(variableName, kIONVRAMForceSyncNowPropertyKey, sizeof(kIONVRAMForceSyncNowPropertyKey)) == 0;
+ deletePropertyKey = strncmp(name, kIONVRAMDeletePropertyKey, sizeof(kIONVRAMDeletePropertyKey)) == 0;
+ syncNowPropertyKey = strncmp(name, kIONVRAMSyncNowPropertyKey, sizeof(kIONVRAMSyncNowPropertyKey)) == 0;
+ forceSyncNowPropertyKey = strncmp(name, kIONVRAMForceSyncNowPropertyKey, sizeof(kIONVRAMForceSyncNowPropertyKey)) == 0;
if (deletePropertyKey) {
tmpString = OSDynamicCast(OSString, anObject);
if (tmpString != nullptr) {
+ const char *variableName;
+ uuid_t varGuid;
+
DEBUG_INFO("kIONVRAMDeletePropertyKey found\n");
- OSSharedPtr<const OSSymbol> sharedKey = OSSymbol::withString(tmpString);
- removeProperty(sharedKey.get());
+
+ parseVariableName(tmpString->getCStringNoCopy(), &varGuid, &variableName);
+ removePropertyWithGUIDAndName(&varGuid, variableName);
} else {
DEBUG_INFO("kIONVRAMDeletePropertyKey value needs to be an OSString\n");
- result = kIOReturnError;
+ ret = kIOReturnError;
}
goto exit;
} else if (syncNowPropertyKey || forceSyncNowPropertyKey) {
tmpString = OSDynamicCast(OSString, anObject);
- DEBUG_INFO("NVRAM sync key %s found\n", aKey->getCStringNoCopy());
+ DEBUG_INFO("NVRAM sync key %s found\n", name);
if (tmpString != nullptr) {
// We still want to throttle NVRAM commit rate for SyncNow. ForceSyncNow is provided as a really big hammer.
syncInternal(syncNowPropertyKey);
} else {
- DEBUG_INFO("%s value needs to be an OSString\n", variableName);
- result = kIOReturnError;
+ DEBUG_INFO("%s value needs to be an OSString\n", name);
+ ret = kIOReturnError;
}
goto exit;
}
- result = chooseDictionary(kIONVRAMOperationWrite, &varGuid, variableName, &dict);
- if (result != kIOReturnSuccess) {
+ ret = chooseDictionary(kIONVRAMOperationWrite, guid, name, &dict);
+ if (ret != kIOReturnSuccess) {
+ DEBUG_INFO("No dictionary\n");
goto exit;
}
- if (!verifyPermission(kIONVRAMOperationWrite, &varGuid, variableName)) {
+ if (!verifyPermission(kIONVRAMOperationWrite, guid, name)) {
DEBUG_INFO("Not privileged\n");
- result = kIOReturnNotPrivileged;
+ ret = kIOReturnNotPrivileged;
goto exit;
}
// Make sure the object is of the correct type.
- switch (getVariableType(variableName)) {
+ switch (getVariableType(name)) {
case kOFVariableTypeBoolean:
propObject = OSDynamicPtrCast<OSBoolean>(sharedObject);
break;
if (propObject != nullptr) {
propDataSize = (OSDynamicPtrCast<OSString>(propObject))->getLength();
- if (aKey->isEqualTo(kIONVRAMBootArgsKey) && (propDataSize >= BOOT_LINE_LENGTH)) {
+ if ((strncmp(name, kIONVRAMBootArgsKey, sizeof(kIONVRAMBootArgsKey)) == 0) && (propDataSize >= BOOT_LINE_LENGTH)) {
DEBUG_ERROR("boot-args size too large for BOOT_LINE_LENGTH, propDataSize=%zu\n", propDataSize);
- result = kIOReturnNoSpace;
+ ret = kIOReturnNoSpace;
goto exit;
}
}
if (propObject == nullptr) {
DEBUG_INFO("No property object\n");
- result = kIOReturnBadArgument;
+ ret = kIOReturnBadArgument;
goto exit;
}
- if (!verifyWriteSizeLimit(&varGuid, variableName, propDataSize)) {
- DEBUG_ERROR("Property data size of %zu too long for %s\n", propDataSize, variableName);
- result = kIOReturnNoSpace;
+ if (!verifyWriteSizeLimit(guid, name, propDataSize)) {
+ DEBUG_ERROR("Property data size of %zu too long for %s\n", propDataSize, name);
+ ret = kIOReturnNoSpace;
goto exit;
}
- NVRAMLOCK();
- ok = handleSpecialVariables(variableName, &varGuid, propObject.get(), &result);
+ NVRAMWRITELOCK();
+ ok = handleSpecialVariables(name, guid, propObject.get(), &ret);
NVRAMUNLOCK();
if (ok) {
goto exit;
}
- NVRAMLOCK();
- oldObject.reset(dict->getObject(variableName), OSRetain);
+ NVRAMREADLOCK();
+ oldObject.reset(dict->getObject(name), OSRetain);
+ NVRAMUNLOCK();
+
if (remove == false) {
DEBUG_INFO("Adding object\n");
- if (!dict->setObject(variableName, propObject.get())) {
- result = kIOReturnBadArgument;
+ NVRAMWRITELOCK();
+ if (!dict->setObject(name, propObject.get())) {
+ ret = kIOReturnBadArgument;
}
+ NVRAMUNLOCK();
} else {
DEBUG_INFO("Removing object\n");
// Check for existence so we can decide whether we need to sync variables
if (oldObject) {
- result = removePropertyInternal(aKey);
+ ret = removePropertyWithGUIDAndName(guid, name);
} else {
- result = kIOReturnNotFound;
+ ret = kIOReturnNotFound;
}
}
- NVRAMUNLOCK();
- if (result == kIOReturnSuccess) {
- result = serializeVariables();
- if (result != kIOReturnSuccess) {
- DEBUG_ERROR("serializeVariables failed, result=0x%08x\n", result);
+ if (ret == kIOReturnSuccess) {
+ ret = serializeVariables();
+ if (ret != kIOReturnSuccess) {
+ DEBUG_ERROR("serializeVariables failed, ret=%#08x\n", ret);
- NVRAMLOCK();
+ NVRAMWRITELOCK();
if (oldObject) {
- dict->setObject(variableName, oldObject.get());
+ dict->setObject(name, oldObject.get());
} else {
- dict->removeObject(variableName);
+ dict->removeObject(name);
}
NVRAMUNLOCK();
(void) serializeVariables();
- result = kIOReturnNoMemory;
+ ret = kIOReturnNoMemory;
}
}
}
exit:
- DEBUG_INFO("result=0x%08x\n", result);
+ DEBUG_INFO("ret=%#08x\n", ret);
- return result;
+ return ret;
+}
+
+IOReturn
+IODTNVRAM::setPropertyInternal(const OSSymbol *aKey, OSObject *anObject)
+{
+ const char *variableName;
+ uuid_t varGuid;
+
+ DEBUG_INFO("aKey=%s\n", aKey->getCStringNoCopy());
+
+ parseVariableName(aKey->getCStringNoCopy(), &varGuid, &variableName);
+
+ return setPropertyWithGUIDAndName(&varGuid, variableName, anObject);
}
bool
{
IOReturn ret;
- NVRAMLOCK();
ret = removePropertyInternal(aKey);
- NVRAMUNLOCK();
if (ret == kIOReturnSuccess) {
serializeVariables();
} else {
- DEBUG_INFO("removePropertyInternal failed, ret=0x%08x\n", ret);
+ DEBUG_INFO("removePropertyInternal failed, ret=%#08x\n", ret);
}
}
IOReturn
-IODTNVRAM::removePropertyInternal(const OSSymbol *aKey)
+IODTNVRAM::removePropertyWithGUIDAndName(const uuid_t *guid, const char *name)
{
- IOReturn result;
- const char *variableName;
- uuid_t varGuid;
+ IOReturn ret;
OSDictionary *dict;
+ bool removed = false;
- DEBUG_INFO("aKey=%s\n", aKey->getCStringNoCopy());
-
- NVRAMLOCKASSERT();
-
- parseVariableName(aKey->getCStringNoCopy(), &varGuid, &variableName);
+ DEBUG_INFO("name=%s\n", name);
- result = chooseDictionary(kIONVRAMOperationDelete, &varGuid, variableName, &dict);
- if (result != kIOReturnSuccess) {
+ ret = chooseDictionary(kIONVRAMOperationDelete, guid, name, &dict);
+ if (ret != kIOReturnSuccess) {
+ DEBUG_INFO("No dictionary\n");
goto exit;
}
- if (!verifyPermission(kIONVRAMOperationDelete, &varGuid, variableName)) {
+ if (!verifyPermission(kIONVRAMOperationDelete, guid, name)) {
DEBUG_INFO("Not priveleged\n");
- result = kIOReturnNotPrivileged;
+ ret = kIOReturnNotPrivileged;
goto exit;
}
+ NVRAMWRITELOCK();
+
// If the object exists, remove it from the dictionary.
- if (dict->getObject(variableName) != nullptr) {
- dict->removeObject(variableName);
+ if (dict->getObject(name) != nullptr) {
+ dict->removeObject(name);
+ removed = true;
+ } else {
+ DEBUG_INFO("%s not found\n", name);
+ }
+
+ NVRAMUNLOCK();
+
+ if (removed) {
+ ret = serializeVariables();
+ DEBUG_INFO("serializeVariables ret=0x%08x\n", ret);
}
exit:
- return result;
+ return ret;
+}
+
+IOReturn
+IODTNVRAM::removePropertyInternal(const OSSymbol *aKey)
+{
+ IOReturn ret;
+ const char *variableName;
+ uuid_t varGuid;
+
+ DEBUG_INFO("aKey=%s\n", aKey->getCStringNoCopy());
+
+ parseVariableName(aKey->getCStringNoCopy(), &varGuid, &variableName);
+
+ ret = removePropertyWithGUIDAndName(&varGuid, variableName);
+
+ return ret;
}
IOReturn
IODTNVRAM::setProperties(OSObject *properties)
{
- IOReturn result = kIOReturnSuccess;
+ IOReturn ret = kIOReturnSuccess;
OSObject *object;
const OSSymbol *key;
OSDictionary *dict;
return kIOReturnBadArgument;
}
- while (result == kIOReturnSuccess) {
+ while (ret == kIOReturnSuccess) {
key = OSDynamicCast(OSSymbol, iter->getNextObject());
if (key == nullptr) {
break;
continue;
}
- result = setPropertyInternal(key, object);
+ ret = setPropertyInternal(key, object);
}
- DEBUG_INFO("result=0x%08x\n", result);
+ DEBUG_INFO("ret=%#08x\n", ret);
- return result;
+ return ret;
}
IOReturn
OSSharedPtr<const OSSymbol> propSymbol;
OSSharedPtr<OSObject> propObject;
NVRAMRegionInfo *currentRegion;
- NVRAMRegionInfo variableRegions[] = { { NVRAM_CHRP_PARTITION_NAME_COMMON, _commonPartitionOffset, _commonPartitionSize, _commonDict, _commonImage},
- { NVRAM_CHRP_PARTITION_NAME_SYSTEM, _systemPartitionOffset, _systemPartitionSize, _systemDict, _systemImage} };
+ NVRAMRegionInfo variableRegions[] = { { kIONVRAMPartitionCommon, _commonPartitionOffset, _commonPartitionSize, _commonDict, _commonImage},
+ { kIONVRAMPartitionSystem, _systemPartitionOffset, _systemPartitionSize, _systemDict, _systemImage} };
DEBUG_INFO("...\n");
currentRegion->dict = OSDictionary::withCapacity(1);
- DEBUG_INFO("region = %s\n", currentRegion->name);
+ DEBUG_INFO("region = %d\n", currentRegion->type);
cnt = 0;
while (cnt < currentRegion->size) {
// Break if there is no name.
}
// Create the boot-args property if it is not in the dictionary.
- if (_commonDict->getObject(kIONVRAMBootArgsKey) == nullptr) {
- propObject = OSString::withCStringNoCopy("");
- if (propObject != nullptr) {
- _commonDict->setObject(kIONVRAMBootArgsKey, propObject.get());
+ if (_systemDict != nullptr) {
+ if (_systemDict->getObject(kIONVRAMBootArgsKey) == nullptr) {
+ propObject = OSString::withCStringNoCopy("");
+ if (propObject != nullptr) {
+ _systemDict->setObject(kIONVRAMBootArgsKey, propObject.get());
+ }
+ }
+ } else if (_commonDict != nullptr) {
+ if (_commonDict->getObject(kIONVRAMBootArgsKey) == nullptr) {
+ propObject = OSString::withCStringNoCopy("");
+ if (propObject != nullptr) {
+ _commonDict->setObject(kIONVRAMBootArgsKey, propObject.get());
+ }
}
}
- DEBUG_INFO("%s _commonDict=%p _systemDict=%p\n", __FUNCTION__, _commonDict.get(), _systemDict.get());
+ DEBUG_INFO("%s _commonDict=%p _systemDict=%p\n", __FUNCTION__, _commonDict ? _commonDict.get() : nullptr, _systemDict ? _systemDict.get() : nullptr);
return kIOReturnSuccess;
}
UInt32 commonUsed = 0;
OSSharedPtr<OSData> nvramImage;
NVRAMRegionInfo *currentRegion;
- NVRAMRegionInfo variableRegions[] = { { NVRAM_CHRP_PARTITION_NAME_COMMON, _commonPartitionOffset, _commonPartitionSize, _commonDict, _commonImage},
- { NVRAM_CHRP_PARTITION_NAME_SYSTEM, _systemPartitionOffset, _systemPartitionSize, _systemDict, _systemImage} };
+ NVRAMRegionInfo variableRegions[] = { { kIONVRAMPartitionCommon, _commonPartitionOffset, _commonPartitionSize, _commonDict, _commonImage},
+ { kIONVRAMPartitionSystem, _systemPartitionOffset, _systemPartitionSize, _systemDict, _systemImage} };
if (_systemPanicked) {
return kIOReturnNotReady;
DEBUG_INFO("...\n");
- NVRAMLOCK();
+ NVRAMREADLOCK();
for (regionIndex = 0; regionIndex < ARRAY_SIZE(variableRegions); regionIndex++) {
currentRegion = &variableRegions[regionIndex];
continue;
}
- DEBUG_INFO("region = %s\n", currentRegion->name);
+ DEBUG_INFO("region = %d\n", currentRegion->type);
buffer = tmpBuffer = IONew(UInt8, currentRegion->size);
if (buffer == nullptr) {
- return kIOReturnNoMemory;
+ ok = false;
+ ret = kIOReturnNoMemory;
+ break;
}
bzero(buffer, currentRegion->size);
IODelete(buffer, UInt8, currentRegion->size);
- if ((strncmp(currentRegion->name, NVRAM_CHRP_PARTITION_NAME_SYSTEM, strlen(NVRAM_CHRP_PARTITION_NAME_SYSTEM)) == 0) &&
+ if ((currentRegion->type == kIONVRAMPartitionSystem) &&
(_systemService != nullptr)) {
- _systemService->setProperties(_systemDict.get());
- systemUsed = maxLength;
- } else if ((strncmp(currentRegion->name, NVRAM_CHRP_PARTITION_NAME_COMMON, strlen(NVRAM_CHRP_PARTITION_NAME_COMMON)) == 0) &&
+ _systemService->setVariables(_systemDict.get());
+ systemUsed = (uint32_t)(tmpBuffer - buffer);
+ } else if ((currentRegion->type == kIONVRAMPartitionCommon) &&
(_commonService != nullptr)) {
- _commonService->setProperties(_commonDict.get());
- commonUsed = maxLength;
+ _commonService->setVariables(_commonDict.get());
+ commonUsed = (uint32_t)(tmpBuffer - buffer);
}
if (!ok) {
- return kIOReturnBadArgument;
+ ret = kIOReturnBadArgument;
+ break;
}
}
- nvramImage = OSData::withBytes(_nvramImage, _nvramSize);
-
NVRAMUNLOCK();
DEBUG_INFO("ok=%d\n", ok);
- CONTROLLERLOCK();
+ if (ok) {
+ nvramImage = OSData::withBytes(_nvramImage, _nvramSize);
+ CONTROLLERLOCK();
- if (_systemService) {
- sizeUsed = OSNumber::withNumber(systemUsed, 32);
- _nvramController->setProperty("SystemUsed", sizeUsed.get());
- sizeUsed.reset();
- }
+ if (_systemService) {
+ sizeUsed = OSNumber::withNumber(systemUsed, 32);
+ _nvramController->setProperty("SystemUsed", sizeUsed.get());
+ DEBUG_INFO("SystemUsed=%u\n", (unsigned int)commonUsed);
+ sizeUsed.reset();
+ }
- if (_commonService) {
- sizeUsed = OSNumber::withNumber(commonUsed, 32);
- _nvramController->setProperty("CommonUsed", sizeUsed.get());
- sizeUsed.reset();
- }
+ if (_commonService) {
+ sizeUsed = OSNumber::withNumber(commonUsed, 32);
+ _nvramController->setProperty("CommonUsed", sizeUsed.get());
+ DEBUG_INFO("CommonUsed=%u\n", (unsigned int)commonUsed);
+ sizeUsed.reset();
+ }
- ret = _nvramController->write(0, (uint8_t *)nvramImage->getBytesNoCopy(), nvramImage->getLength());
+ ret = _nvramController->write(0, (uint8_t *)nvramImage->getBytesNoCopy(), nvramImage->getLength());
- CONTROLLERUNLOCK();
+ CONTROLLERUNLOCK();
+ }
return ret;
}
{
const OSSymbol* propSymbolRaw = nullptr;
OSObject* propObjectRaw = nullptr;
- bool result = convertPropToObject(propName, propNameLength, propData, propDataLength,
+ bool ok = convertPropToObject(propName, propNameLength, propData, propDataLength,
&propSymbolRaw, &propObjectRaw);
propSymbol.reset(propSymbolRaw, OSNoRetain);
propObject.reset(propObjectRaw, OSNoRetain);
- return result;
+ return ok;
}
bool
} else if (tmpValue < 1000) {
snprintf((char *)buffer, remaining, "%d", (uint32_t)tmpValue);
} else {
- snprintf((char *)buffer, remaining, "0x%x", (uint32_t)tmpValue);
+ snprintf((char *)buffer, remaining, "%#x", (uint32_t)tmpValue);
}
}
break;
UInt32 resultValueLen = 0;
UInt8 byte;
- NVRAMLOCK();
+ NVRAMREADLOCK();
data = OSDynamicCast(OSData, _commonDict->getObject(_registryPropertiesKey.get()));
NVRAMUNLOCK();
// copy over existing properties for other entries
- NVRAMLOCK();
+ NVRAMWRITELOCK();
oldData.reset(OSDynamicCast(OSData, _commonDict->getObject(_registryPropertiesKey.get())), OSRetain);
if (oldData) {
if (ok) {
if (serializeVariables() != kIOReturnSuccess) {
- NVRAMLOCK();
+ NVRAMWRITELOCK();
if (oldData) {
_commonDict->setObject(_registryPropertiesKey.get(), oldData.get());
} else {
#define super IOService
OSDefineMetaClassAndAbstractStructors(IOPMGR, IOService);
+
+void
+IOPMGR::enableCPUCore(unsigned int cpu_id, uint64_t entry_pa)
+{
+ // Fall back to the legacy method if the subclass doesn't override the
+ // new method.
+ enableCPUCore(cpu_id);
+}
+
+void
+IOPMGR::enableCPUCore(unsigned int cpu_id)
+{
+ panic("enableCPUCore is unimplemented");
+}
#include <IOKit/IOReportMacros.h>
#include <IOKit/IOLib.h>
#include <IOKit/IOKitKeys.h>
+#include <IOKit/IOUserServer.h>
#include "IOKitKernelInternal.h"
#if HIBERNATION
#include <IOKit/IOHibernatePrivate.h>
static UInt32 gPagingOff = 0;
static UInt32 gSleepWakeUUIDIsSet = false;
static uint32_t gAggressivesState = 0;
-static uint32_t gHaltTimeMaxLog;
-static uint32_t gHaltTimeMaxPanic;
+uint32_t gHaltTimeMaxLog;
+uint32_t gHaltTimeMaxPanic;
IOLock * gHaltLogLock;
static char * gHaltLog;
enum { kHaltLogSize = 2048 };
static bool gWakeReasonSysctlRegistered = false;
static bool gBootReasonSysctlRegistered = false;
static bool gShutdownReasonSysctlRegistered = false;
+static bool gWillShutdownSysctlRegistered = false;
static AbsoluteTime gUserActiveAbsTime;
static AbsoluteTime gUserInactiveAbsTime;
return;
}
+ if (kIOSystemShutdownNotificationTerminateDEXTs == stage) {
+ uint64_t nano, millis;
+ startTime = mach_absolute_time();
+ IOServicePH::systemHalt();
+ absolutetime_to_nanoseconds(mach_absolute_time() - startTime, &nano);
+ millis = nano / NSEC_PER_MSEC;
+ if (true || (gHaltTimeMaxLog && (millis >= gHaltTimeMaxLog))) {
+ printf("IOServicePH::systemHalt took %qd ms\n", millis);
+ }
+ return;
+ }
+
assert(kIOSystemShutdownNotificationStageProcessExit == stage);
IOLockLock(gHaltLogLock);
}
}
-
extern "C" int sync_internal(void);
/*
}
static SYSCTL_PROC(_kern, OID_AUTO, sleeptime,
- CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
+ CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED,
&gIOLastUserSleepTime, 0, sysctl_sleepwaketime, "S,timeval", "");
static SYSCTL_PROC(_kern, OID_AUTO, waketime,
- CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
+ CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED,
&gIOLastWakeTime, 0, sysctl_sleepwaketime, "S,timeval", "");
SYSCTL_QUAD(_kern, OID_AUTO, wake_abs_time, CTLFLAG_RD | CTLFLAG_LOCKED, &gIOLastWakeAbsTime, "");
SYSCTL_QUAD(_kern, OID_AUTO, userinactive_abs_time, CTLFLAG_RD | CTLFLAG_LOCKED, &gUserInactiveAbsTime, "");
static int
-sysctl_willshutdown
-(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
+sysctl_willshutdown SYSCTL_HANDLER_ARGS
{
- int new_value, changed;
- int error = sysctl_io_number(req, gWillShutdown, sizeof(int), &new_value, &changed);
+ int new_value, changed, error;
+
+ if (!gWillShutdownSysctlRegistered) {
+ return ENOENT;
+ }
+
+ error = sysctl_io_number(req, gWillShutdown, sizeof(int), &new_value, &changed);
if (changed) {
if (!gWillShutdown && (new_value == 1)) {
IOPMRootDomainWillShutdown();
}
static SYSCTL_PROC(_kern, OID_AUTO, willshutdown,
- CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
NULL, 0, sysctl_willshutdown, "I", "");
-extern struct sysctl_oid sysctl__kern_iokittest;
-extern struct sysctl_oid sysctl__debug_iokit;
-
#if defined(XNU_TARGET_OS_OSX)
static int
}
static SYSCTL_PROC(_kern, OID_AUTO, progressmeterenable,
- CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
NULL, 0, sysctl_progressmeterenable, "I", "");
static SYSCTL_PROC(_kern, OID_AUTO, progressmeter,
- CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
NULL, 0, sysctl_progressmeter, "I", "");
#endif /* defined(XNU_TARGET_OS_OSX) */
}
static SYSCTL_PROC(_kern, OID_AUTO, consoleoptions,
- CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
NULL, 0, sysctl_consoleoptions, "I", "");
}
static SYSCTL_PROC(_kern, OID_AUTO, progressoptions,
- CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED | CTLFLAG_ANYBODY,
+ CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED | CTLFLAG_ANYBODY,
NULL, 0, sysctl_progressoptions, "S,vc_progress_user_options", "");
char wr[sizeof(gWakeReasonString)];
wr[0] = '\0';
- if (gRootDomain) {
+ if (gRootDomain && gWakeReasonSysctlRegistered) {
gRootDomain->copyWakeReasonString(wr, sizeof(wr));
+ } else {
+ return ENOENT;
}
return sysctl_io_string(req, wr, 0, 0, NULL);
}
SYSCTL_PROC(_kern, OID_AUTO, wakereason,
- CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
+ CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED,
NULL, 0, sysctl_wakereason, "A", "wakereason");
-SYSCTL_STRING(_kern, OID_AUTO, bootreason,
- CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
- gBootReasonString, sizeof(gBootReasonString), "");
+static int
+sysctl_bootreason SYSCTL_HANDLER_ARGS
+{
+ if (!os_atomic_load(&gBootReasonSysctlRegistered, acquire)) {
+ return ENOENT;
+ }
+
+ return sysctl_io_string(req, gBootReasonString, 0, 0, NULL);
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, bootreason,
+ CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED,
+ NULL, 0, sysctl_bootreason, "A", "");
static int
sysctl_shutdownreason SYSCTL_HANDLER_ARGS
char sr[sizeof(gShutdownReasonString)];
sr[0] = '\0';
- if (gRootDomain) {
+ if (gRootDomain && gShutdownReasonSysctlRegistered) {
gRootDomain->copyShutdownReasonString(sr, sizeof(sr));
+ } else {
+ return ENOENT;
}
return sysctl_io_string(req, sr, 0, 0, NULL);
}
SYSCTL_PROC(_kern, OID_AUTO, shutdownreason,
- CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
+ CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED,
NULL, 0, sysctl_shutdownreason, "A", "shutdownreason");
static int
}
SYSCTL_PROC(_hw, OID_AUTO, targettype,
- CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
+ CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED,
NULL, 0, sysctl_targettype, "A", "targettype");
static SYSCTL_INT(_debug, OID_AUTO, noidle, CTLFLAG_RW, &gNoIdleFlag, 0, "");
}
static SYSCTL_PROC(_kern, OID_AUTO, aotmetrics,
- CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED | CTLFLAG_ANYBODY,
+ CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED | CTLFLAG_ANYBODY,
NULL, 0, sysctl_aotmetrics, "S,IOPMAOTMetrics", "");
}
static SYSCTL_PROC(_kern, OID_AUTO, aotmodebits,
- CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
NULL, 0, sysctl_aotmodebits, "I", "");
static int
}
static SYSCTL_PROC(_kern, OID_AUTO, aotmode,
- CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED | CTLFLAG_ANYBODY,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED | CTLFLAG_ANYBODY,
NULL, 0, sysctl_aotmode, "I", "");
//******************************************************************************
// read swd_panic boot-arg
PE_parse_boot_argn("swd_panic", &gSwdPanic, sizeof(gSwdPanic));
- sysctl_register_oid(&sysctl__kern_sleeptime);
- sysctl_register_oid(&sysctl__kern_waketime);
- sysctl_register_oid(&sysctl__kern_willshutdown);
- sysctl_register_oid(&sysctl__kern_iokittest);
- sysctl_register_oid(&sysctl__debug_iokit);
- sysctl_register_oid(&sysctl__hw_targettype);
-
-#if defined(XNU_TARGET_OS_OSX)
- sysctl_register_oid(&sysctl__kern_progressmeterenable);
- sysctl_register_oid(&sysctl__kern_progressmeter);
- sysctl_register_oid(&sysctl__kern_wakereason);
-#endif /* defined(XNU_TARGET_OS_OSX) */
- sysctl_register_oid(&sysctl__kern_consoleoptions);
- sysctl_register_oid(&sysctl__kern_progressoptions);
-
- sysctl_register_oid(&sysctl__kern_aotmode);
- sysctl_register_oid(&sysctl__kern_aotmodebits);
- sysctl_register_oid(&sysctl__kern_aotmetrics);
+ gWillShutdownSysctlRegistered = true;
#if HIBERNATION
#if defined(__arm64__)
// Until the platform driver can claim its wake reasons
strlcat(gWakeReasonString, wakeReason->getCStringNoCopy(),
sizeof(gWakeReasonString));
+ if (!gWakeReasonSysctlRegistered) {
+ gWakeReasonSysctlRegistered = true;
+ }
WAKEEVENT_UNLOCK();
}
_currentCapability, changeFlags,
request->getTag());
+
+#if defined(XNU_TARGET_OS_OSX) && !DISPLAY_WRANGLER_PRESENT
+ /*
+ * ASBM send lowBattery notifications every 1 second until the device
+ * enters hibernation. This queues up multiple sleep requests.
+ * After the device wakes from hibernation, none of these previously
+ * queued sleep requests are valid.
+ * lowBattteryCondition variable is set when ASBM notifies rootDomain
+ * and is cleared at the very last point in sleep.
+ * Any attempt to sleep with reason kIOPMSleepReasonLowPower without
+ * lowBatteryCondition is invalid
+ */
+ if (REQUEST_TAG_TO_REASON(request->getTag()) == kIOPMSleepReasonLowPower) {
+ if (!lowBatteryCondition) {
+ DLOG("Duplicate lowBattery sleep");
+ *inOutChangeFlags |= kIOPMNotDone;
+ return;
+ }
+ }
+#endif
+
if ((AOT_STATE == desiredPowerState) && (ON_STATE == currentPowerState)) {
// Assertion may have been taken in AOT leading to changePowerStateTo(AOT)
*inOutChangeFlags |= kIOPMNotDone;
return;
}
-#if defined(XNU_TARGET_OS_OSX) && !DISPLAY_WRANGLER_PRESENT
- if (lowBatteryCondition && (desiredPowerState < currentPowerState)) {
- // Reject sleep requests when lowBatteryCondition is TRUE to
- // avoid racing with the impending system shutdown.
- *inOutChangeFlags |= kIOPMNotDone;
- return;
- }
-#endif
-
if (desiredPowerState < currentPowerState) {
if (CAP_CURRENT(kIOPMSystemCapabilityGraphics)) {
// Root domain is dropping power state from ON->SLEEP.
* Power Emergency
*/
if (msg & kIOPMPowerEmergency) {
- DLOG("Low battery notification received\n");
-#if defined(XNU_TARGET_OS_OSX) && !DISPLAY_WRANGLER_PRESENT
- // Wait for the next low battery notification if the system state is
- // in transition.
- if ((_systemTransitionType == kSystemTransitionNone) &&
- CAP_CURRENT(kIOPMSystemCapabilityCPU) &&
- !systemBooting && !systemShutdown && !gWillShutdown) {
- // Setting lowBatteryCondition will prevent system sleep
- lowBatteryCondition = true;
-
- // Notify userspace to initiate system shutdown
- messageClients(kIOPMMessageRequestSystemShutdown);
- }
-#else
+ DLOG("Received kIOPMPowerEmergency");
lowBatteryCondition = true;
privateSleepSystem(kIOPMSleepReasonLowPower);
-#endif
}
/*
// Lazy registration until the platform driver stops registering
// the same name.
gWakeReasonSysctlRegistered = true;
-#if !defined(XNU_TARGET_OS_OSX)
- sysctl_register_oid(&sysctl__kern_wakereason);
-#endif /* !defined(XNU_TARGET_OS_OSX) */
}
if (addWakeReason) {
_systemWakeEventsArray->setObject(dict.get());
if (!gBootReasonSysctlRegistered) {
// Lazy sysctl registration after setting gBootReasonString
strlcat(gBootReasonString, reason, sizeof(gBootReasonString));
- sysctl_register_oid(&sysctl__kern_bootreason);
- gBootReasonSysctlRegistered = true;
+ os_atomic_store(&gBootReasonSysctlRegistered, true, release);
}
WAKEEVENT_UNLOCK();
}
}
strlcat(gShutdownReasonString, reason, sizeof(gShutdownReasonString));
- if (!gShutdownReasonSysctlRegistered) {
- sysctl_register_oid(&sysctl__kern_shutdownreason);
- gShutdownReasonSysctlRegistered = true;
- }
+ gShutdownReasonSysctlRegistered = true;
WAKEEVENT_UNLOCK();
}
IOCPURunPlatformPanicActions(type, details);
}
}
+ } else if (type == kPEPanicDiagnosticsDone) {
+ IOCPURunPlatformPanicActions(type, details);
}
skip_to_haltRestart:
uint32_t fSystemPowerAckRef;
uint8_t fSystemOff;
uint8_t fUserServerOff;
+uint8_t fWaitingUserServers;
void lock();
void unlock();
if (idx != -1U) {
fUserServers->removeObject(idx);
}
+
+ if (fWaitingUserServers) {
+ fWaitingUserServers = false;
+ IOLockWakeup(gJobsLock, &fWaitingUserServers, /* one-thread */ false);
+ }
+
unlock();
}
serverAck(NULL);
}
+
+void
+IOServicePH::systemHalt(void)
+{
+ OSArray * notifyServers;
+ uint64_t deadline;
+
+ lock();
+ notifyServers = OSArray::withArray(fUserServers);
+ unlock();
+
+ if (notifyServers) {
+ notifyServers->iterateObjects(^bool (OSObject * obj) {
+ IOUserServer * us;
+ us = (typeof(us))obj;
+ us->systemHalt();
+ return false;
+ });
+ OSSafeReleaseNULL(notifyServers);
+ }
+
+ lock();
+ clock_interval_to_deadline(1000, kMillisecondScale, &deadline);
+ while (0 < fUserServers->getCount()) {
+ fWaitingUserServers = true;
+ __assert_only int waitResult =
+ IOLockSleepDeadline(gJobsLock, &fWaitingUserServers, deadline, THREAD_UNINT);
+ assert((THREAD_AWAKENED == waitResult) || (THREAD_TIMED_OUT == waitResult));
+ if (THREAD_TIMED_OUT == waitResult) {
+ break;
+ }
+ }
+ unlock();
+}
+
bool
IOServicePH::serverSlept(void)
{
IOLibInit();
OSlibkernInit();
IOMachPortInitialize();
- devsw_init();
gIOProgressBackbufferKey = OSSymbol::withCStringNoCopy(kIOProgressBackbufferKey);
gIORemoveOnReadProperties = OSSet::withObjects((const OSObject **) &gIOProgressBackbufferKey, 1);
int error = EINVAL;
uint32_t request = arg2;
+ if (!IOStatistics::isEnabled()) {
+ return ENOENT;
+ }
+
switch (request) {
case kIOStatisticsGeneral:
error = IOStatistics::getStatistics(req);
SYSCTL_NODE(_debug, OID_AUTO, iokit_statistics, CTLFLAG_RW | CTLFLAG_LOCKED, NULL, "IOStatistics");
static SYSCTL_PROC(_debug_iokit_statistics, OID_AUTO, general,
- CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
+ CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED,
NULL, kIOStatisticsGeneral, oid_sysctl, "S", "");
static SYSCTL_PROC(_debug_iokit_statistics, OID_AUTO, workloop,
- CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
+ CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED,
NULL, kIOStatisticsWorkLoop, oid_sysctl, "S", "");
static SYSCTL_PROC(_debug_iokit_statistics, OID_AUTO, userclient,
- CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
+ CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
NULL, kIOStatisticsUserClient, oid_sysctl, "S", "");
+
void
IOStatistics::initialize()
{
return;
}
- sysctl_register_oid(&sysctl__debug_iokit_statistics_general);
- sysctl_register_oid(&sysctl__debug_iokit_statistics_workloop);
- sysctl_register_oid(&sysctl__debug_iokit_statistics_userclient);
-
lock = IORWLockAlloc();
if (!lock) {
return;
return kIOReturnBadArgument;
}
+#if CONFIG_MACF
+ if (mac_iokit_check_open_service(kauth_cred_get(), service, connect_type) != 0) {
+ return kIOReturnNotPermitted;
+ }
+#endif
do{
if (properties) {
return kIOReturnUnsupported;
0, &message_moved);
} else {
assert(replySize >= (sizeof(IORPCMessageMach) + sizeof(IORPCMessage)));
- ret = kernel_mach_msg_rpc(&mach->msgh, sendSize, replySize, FALSE, &message_moved);
+ ret = kernel_mach_msg_rpc(&mach->msgh, sendSize, replySize, FALSE, FALSE, &message_moved);
}
ipc_port_release_send(sendPort);
IOReturn
IOUserServer::clientClose(void)
{
+ OSArray * services;
+
+ if (kIODKLogSetup & gIODKDebug) {
+ DKLOG("%s::clientClose(%p)\n", getName(), this);
+ }
+
+ services = NULL;
+ IOLockLock(fLock);
+ if (fServices) {
+ services = OSArray::withArray(fServices);
+ }
+ IOLockUnlock(fLock);
+
+ // if this was a an expected exit, termination and stop should have detached at this
+ // point, so send any provider still attached and not owned by this user server
+ // the ClientCrashed() notification
+ if (services) {
+ services->iterateObjects(^bool (OSObject * obj) {
+ IOService * service;
+ IOService * provider;
+
+ service = (IOService *) obj;
+ if (service->isInactive()) {
+ return false;
+ }
+ provider = service->getProvider();
+ if (provider
+ && (!provider->reserved->uvars || (provider->reserved->uvars->userServer != this))) {
+ if (kIODKLogSetup & gIODKDebug) {
+ DKLOG(DKS "::ClientCrashed(" DKS ")\n", DKN(provider), DKN(service));
+ }
+ provider->ClientCrashed(service, 0);
+ }
+ return false;
+ });
+ services->release();
+ }
+
terminate();
return kIOReturnSuccess;
}
if (!(kIODKDisableEntitlementChecking & gIODKDebug)) {
bundleID = NULL;
- entitlements = NULL;
+ entitlements = IOUserClient::copyClientEntitlements(owningTask);
if (fEntitlements && fEntitlements->getObject(gIODriverKitUserClientEntitlementAllowAnyKey)) {
ok = true;
} else {
- entitlements = IOUserClient::copyClientEntitlements(owningTask);
bundleID = service->copyProperty(gIOModuleIdentifierKey);
ok = (entitlements
&& bundleID
}
+void
+IOUserServer::systemHalt(void)
+{
+ OSArray * services;
+
+ if (true || (kIODKLogPM & gIODKDebug)) {
+ DKLOG("%s::systemHalt()\n", getName());
+ }
+
+ IOLockLock(fLock);
+ services = OSArray::withArray(fServices);
+ IOLockUnlock(fLock);
+
+ if (services) {
+ services->iterateObjects(^bool (OSObject * obj) {
+ IOService * service;
+ IOService * provider;
+ IOOptionBits terminateOptions;
+ bool root;
+
+ service = (IOService *) obj;
+ provider = service->getProvider();
+ if (!provider) {
+ DKLOG("stale service " DKS " found, skipping termination\n", DKN(service));
+ return false;
+ }
+ root = (NULL == provider->getProperty(gIOUserServerNameKey, gIOServicePlane));
+ if (true || (kIODKLogPM & gIODKDebug)) {
+ DKLOG("%d: terminate(" DKS ")\n", root, DKN(service));
+ }
+ if (!root) {
+ return false;
+ }
+ terminateOptions = kIOServiceRequired | kIOServiceTerminateNeedWillTerminate;
+ if (!service->terminate(terminateOptions)) {
+ IOLog("failed to terminate service %s-0x%llx\n", service->getName(), service->getRegistryEntryID());
+ }
+ return false;
+ });
+ }
+ OSSafeReleaseNULL(services);
+}
IOReturn
IOUserServer::serviceStarted(IOService * service, IOService * provider, bool result)
pmProvider = pmProvider->getProvider();
}
if (pmProvider) {
+ IOService * entry;
OSObject * prop;
+ OSObject * nextProp;
OSString * str;
- prop = pmProvider->copyProperty("non-removable");
+
+ entry = pmProvider;
+ prop = NULL;
+ do {
+ nextProp = entry->copyProperty("non-removable");
+ if (nextProp) {
+ OSSafeReleaseNULL(prop);
+ prop = nextProp;
+ }
+ entry = entry->getProvider();
+ } while (entry);
if (prop) {
str = OSDynamicCast(OSString, prop);
if (str && str->isEqualTo("yes")) {
}
if (willTerminate) {
- if (IOServicePH::serverSlept()) {
+ if ((true) || IOServicePH::serverSlept()) {
client->Stop_async(provider);
ret = kIOReturnOffline;
} else {
}
}
+kern_return_t
+IOService::ClientCrashed_Impl(
+ IOService * client,
+ uint64_t options)
+{
+ return kIOReturnUnsupported;
+}
+
kern_return_t
IOService::Stop_Impl(
IOService * provider)
}
if (MACH_PORT_NULL != args->asyncWakePort) {
+ // this retain is for the OSAction to release
+ iokit_make_port_send(args->asyncWakePort);
kr = CreateActionKernelCompletion(sizeof(IOUserUserClientActionRef), &action);
assert(KERN_SUCCESS == kr);
ref = (typeof(ref))action->GetReference();
bcopy(args->asyncReference, &ref->asyncRef[0], args->asyncReferenceCount * sizeof(ref->asyncRef[0]));
-
kr = action->SetAbortedHandler(^(void) {
IOUserUserClientActionRef * ref;
IOReturn ret;
OSSafeReleaseNULL(action);
if (kIOReturnSuccess != kr) {
- if (ref) {
- // mig will destroy any async port, remove our pointer to it
- bzero(&ref->asyncRef[0], sizeof(ref->asyncRef));
- }
+ // mig will destroy any async port
return kr;
}
+ if (MACH_PORT_NULL != args->asyncWakePort) {
+ // this release is for the mig created send right
+ iokit_release_port_send(args->asyncWakePort);
+ }
+
if (structureOutput) {
if (args->structureVariableOutputData) {
*args->structureVariableOutputData = structureOutput;
#include <libkern/OSAtomic.h>
#include <libkern/c++/OSCollection.h>
+#include <IOKit/IODeviceTreeSupport.h>
#include <IOKit/IOLib.h>
#include <IOKit/IOPlatformActions.h>
#include <IOKit/IOPMGR.h>
gPMGR->updateCPUIdle(new_timeout_ticks);
}
+static OSDictionary *
+matching_dict_for_cpu_id(unsigned int cpu_id)
+{
+ // The cpu-id property in EDT doesn't necessarily match the dynamically
+ // assigned logical ID in XNU, so look up the cpu node by the physical
+ // (cluster/core) ID instead.
+ OSSymbolConstPtr cpuTypeSymbol = OSSymbol::withCString("cpu");
+ OSSymbolConstPtr cpuIdSymbol = OSSymbol::withCString("reg");
+ OSDataPtr cpuId = OSData::withBytes(&(topology_info->cpus[cpu_id].phys_id), sizeof(uint32_t));
+
+ OSDictionary *propMatch = OSDictionary::withCapacity(4);
+ propMatch->setObject(gIODTTypeKey, cpuTypeSymbol);
+ propMatch->setObject(cpuIdSymbol, cpuId);
+
+ OSDictionary *matching = IOService::serviceMatching("IOPlatformDevice");
+ matching->setObject(gIOPropertyMatchKey, propMatch);
+
+ propMatch->release();
+ cpuTypeSymbol->release();
+ cpuIdSymbol->release();
+ cpuId->release();
+
+ return matching;
+}
+
static void
register_aic_handlers(const ml_topology_cpu *cpu_info,
ipi_handler_t ipi_handler,
perfmon_interrupt_handler_func pmi_handler)
{
- const int n_irqs = 3;
- int i;
- IOInterruptVectorNumber irqlist[n_irqs] = {
- cpu_info->self_ipi_irq,
- cpu_info->other_ipi_irq,
- cpu_info->pmi_irq };
-
- IOService *fakeCPU = new IOService();
- if (!fakeCPU || !fakeCPU->init()) {
- panic("Can't initialize fakeCPU");
- }
+ OSDictionary *matching = matching_dict_for_cpu_id(cpu_info->cpu_id);
+ IOService *cpu = IOService::waitForMatchingService(matching, UINT64_MAX);
+ matching->release();
- IOInterruptSource source[n_irqs];
- for (i = 0; i < n_irqs; i++) {
- source[i].vectorData = OSData::withBytes(&irqlist[i], sizeof(irqlist[0]));
+ OSArray *irqs = (OSArray *) cpu->getProperty(gIOInterruptSpecifiersKey);
+ if (!irqs) {
+ panic("Error finding interrupts for CPU %d", cpu_info->cpu_id);
}
- fakeCPU->_interruptSources = source;
- if (cpu_info->self_ipi_irq && cpu_info->other_ipi_irq) {
+ unsigned int irqcount = irqs->getCount();
+
+ if (irqcount == 3) {
// Legacy configuration, for !HAS_IPI chips (pre-Skye).
- if (gAIC->registerInterrupt(fakeCPU, 0, NULL, (IOInterruptHandler)ipi_handler, NULL) != kIOReturnSuccess ||
- gAIC->enableInterrupt(fakeCPU, 0) != kIOReturnSuccess ||
- gAIC->registerInterrupt(fakeCPU, 1, NULL, (IOInterruptHandler)ipi_handler, NULL) != kIOReturnSuccess ||
- gAIC->enableInterrupt(fakeCPU, 1) != kIOReturnSuccess) {
+ if (cpu->registerInterrupt(0, NULL, (IOInterruptAction)ipi_handler, NULL) != kIOReturnSuccess ||
+ cpu->enableInterrupt(0) != kIOReturnSuccess ||
+ cpu->registerInterrupt(2, NULL, (IOInterruptAction)ipi_handler, NULL) != kIOReturnSuccess ||
+ cpu->enableInterrupt(2) != kIOReturnSuccess) {
panic("Error registering IPIs");
}
#if !defined(HAS_IPI)
aic_ipis = true;
#endif
}
+
// Conditional, because on Skye and later, we use an FIQ instead of an external IRQ.
- if (pmi_handler && cpu_info->pmi_irq) {
- if (gAIC->registerInterrupt(fakeCPU, 2, NULL, (IOInterruptHandler)pmi_handler, NULL) != kIOReturnSuccess ||
- gAIC->enableInterrupt(fakeCPU, 2) != kIOReturnSuccess) {
+ if (pmi_handler && irqcount == 1) {
+ if (cpu->registerInterrupt(1, NULL, (IOInterruptAction)pmi_handler, NULL) != kIOReturnSuccess ||
+ cpu->enableInterrupt(1) != kIOReturnSuccess) {
panic("Error registering PMI");
}
}
-
- for (i = 0; i < n_irqs; i++) {
- source[i].vectorData->release();
- }
}
static void
}
memset(machProcessors, 0, array_size);
- ml_cpu_init_state();
for (unsigned int cpu = 0; cpu < topology_info->num_cpus; cpu++) {
const ml_topology_cpu *cpu_info = &topology_info->cpus[cpu];
const unsigned int cpu_id = cpu_info->cpu_id;
panic("processor_start failed");
}
}
+ ml_cpu_init_completed();
IOService::publishResource(gIOAllCPUInitializedKey, kOSBooleanTrue);
}
unsigned int cpu_id = target_to_cpu_id(target);
if (cpu_id != boot_cpu) {
- gPMGR->enableCPUCore(cpu_id);
+ extern unsigned int LowResetVectorBase;
+ gPMGR->enableCPUCore(cpu_id, ml_vtophys((vm_offset_t)&LowResetVectorBase));
}
return KERN_SUCCESS;
}
IOMemoryDescriptor* IOGetAPFSKeyStoreData();
void IOSetAPFSKeyStoreData(IOMemoryDescriptor* data);
-static volatile UInt32 arvRootHashFetched = 0;
+static volatile UInt32 ARVRootHashFetched = 0;
static volatile UInt32 bsARVRootHashFetched = 0;
-static IOMemoryDescriptor* arvRootHashData = NULL;
-static IOMemoryDescriptor* bsARVRootHashData = NULL;
IOMemoryDescriptor* IOGetARVRootHashData(void);
-void IOSetARVRootHashData(IOMemoryDescriptor* arvData);
-
IOMemoryDescriptor* IOGetBaseSystemARVRootHashData(void);
-bool IOBaseSystemARVRootHashAvailable(void);
-void IOSetBaseSystemARVRootHashData(IOMemoryDescriptor* arvData);
+bool IOBaseSystemARVRootHashAvailable(void);
-static volatile UInt32 arvManifestFetched = 0;
-static IOMemoryDescriptor* arvManifestData = NULL;
+static volatile UInt32 ARVManifestFetched = 0;
+static volatile UInt32 bsARVManifestFetched = 0;
IOMemoryDescriptor* IOGetARVManifestData(void);
-void IOSetARVManifestData(IOMemoryDescriptor* arvData);
+IOMemoryDescriptor* IOGetBaseSystemARVManifestData(void);
__END_DECLS
// ARV Root Hash fetcher
-// Store in-memory Root Hash
-void
-IOSetARVRootHashData(IOMemoryDescriptor* arvData)
-{
- // Do not allow re-fetching of the boot_args root hash by passing NULL here.
- if (arvData) {
- arvRootHashData = arvData;
- arvRootHashFetched = 0;
- }
-}
-
-// Retrieve any root hash we may have (stored in boot_args or in-memory)
+// Retrieve any root hash we may have (stored in boot_args)
IOMemoryDescriptor*
IOGetARVRootHashData(void)
{
// Check if someone got the root hash before us
- if (!OSCompareAndSwap(0, 1, &arvRootHashFetched)) {
+ if (!OSCompareAndSwap(0, 1, &ARVRootHashFetched)) {
return NULL;
}
- // Do we have in-memory root hash?
- if (arvRootHashData) {
- IOMemoryDescriptor* arvData = arvRootHashData;
- arvRootHashData = NULL;
- return arvData;
- }
-
- // Looks like there was no in-memory root hash and it's the first call - try boot_args
boot_args* args = (boot_args*)PE_state.bootArgs;
DEBG("%s: data at address %llu size %llu\n", __func__, args->arvRootHashStart, args->arvRootHashSize);
return memoryDescriptor;
}
-// Base System Analogues
+// Base System Analogue
IOMemoryDescriptor*
IOGetBaseSystemARVRootHashData(void)
{
- //TBD!
- return NULL;
+ // Check if someone got the base system root hash before us
+ if (!OSCompareAndSwap(0, 1, &bsARVRootHashFetched)) {
+ return NULL;
+ }
+
+ boot_args* args = (boot_args*)PE_state.bootArgs;
+
+ DEBG("%s: data at address %llu size %llu\n", __func__, args->bsARVRootHashStart, args->bsARVRootHashSize);
+ if (args->bsARVRootHashStart == 0) {
+ return NULL;
+ }
+
+ // We have the base system root hash in the boot_args, create IOMemoryDescriptor for the blob
+ IOAddressRange ranges;
+ ranges.address = args->bsARVRootHashStart;
+ ranges.length = args->bsARVRootHashSize;
+
+ const IOOptionBits options = kIODirectionInOut | kIOMemoryTypePhysical64 | kIOMemoryMapperNone;
+
+ IOMemoryDescriptor* memoryDescriptor = IOMemoryDescriptor::withOptions(&ranges, 1, 0, NULL, options);
+ DEBG("%s: memory descriptor %p\n", __func__, memoryDescriptor);
+ return memoryDescriptor;
}
bool
IOBaseSystemARVRootHashAvailable(void)
{
- // Check if someone got the root hash before us
- if (!OSCompareAndSwap(0, 1, &bsARVRootHashFetched)) {
+ boot_args* args = (boot_args*)PE_state.bootArgs;
+
+ if (args->bsARVRootHashStart == 0 || args->bsARVRootHashSize == 0) {
return false;
}
- // Do we have in-memory root hash?
- if (bsARVRootHashData) {
- return true;
+ if (args->bsARVManifestStart == 0 || args->bsARVManifestSize == 0) {
+ return false;
}
- return false;
-}
-
-void
-IOSetBaseSystemARVRootHashData(IOMemoryDescriptor* arvData)
-{
- return;
+ return true;
}
-
// ARV Manifest fetcher
-// Store in-memory Manifest
-void
-IOSetARVManifestData(IOMemoryDescriptor* arvData)
-{
- // Do not allow re-fetching of the boot_args manifest by passing NULL here.
- if (arvData) {
- arvManifestData = arvData;
- arvManifestFetched = 0;
- }
-}
-
-// Retrieve any manifest we may have (stored in boot_args or in-memory)
+// Retrieve any manifest we may have (stored in boot_args)
IOMemoryDescriptor*
IOGetARVManifestData(void)
{
// Check if someone got the manifest before us
- if (!OSCompareAndSwap(0, 1, &arvManifestFetched)) {
+ if (!OSCompareAndSwap(0, 1, &ARVManifestFetched)) {
return NULL;
}
- // Do we have in-memory manifest?
- if (arvManifestData) {
- IOMemoryDescriptor* arvData = arvManifestData;
- arvManifestData = NULL;
- return arvData;
- }
-
- // Looks like there was no in-memory manifest and it's the first call - try boot_args
boot_args* args = (boot_args*)PE_state.bootArgs;
DEBG("%s: data at address %llu size %llu\n", __func__, args->arvManifestStart, args->arvManifestSize);
DEBG("%s: memory descriptor %p\n", __func__, memoryDescriptor);
return memoryDescriptor;
}
+
+// Base System Analogue
+
+IOMemoryDescriptor*
+IOGetBaseSystemARVManifestData(void)
+{
+ // Check if someone got the base system manifest before us
+ if (!OSCompareAndSwap(0, 1, &bsARVManifestFetched)) {
+ return NULL;
+ }
+
+ boot_args* args = (boot_args*)PE_state.bootArgs;
+
+ DEBG("%s: data at address %llu size %llu\n", __func__, args->bsARVManifestStart, args->bsARVManifestSize);
+ if (args->bsARVManifestStart == 0) {
+ return NULL;
+ }
+
+ // We have the manifest in the boot_args, create IOMemoryDescriptor for the blob
+ IOAddressRange ranges;
+ ranges.address = args->bsARVManifestStart;
+ ranges.length = args->bsARVManifestSize;
+
+ const IOOptionBits options = kIODirectionInOut | kIOMemoryTypePhysical64 | kIOMemoryMapperNone;
+
+ IOMemoryDescriptor* memoryDescriptor = IOMemoryDescriptor::withOptions(&ranges, 1, 0, NULL, options);
+ DEBG("%s: memory descriptor %p\n", __func__, memoryDescriptor);
+ return memoryDescriptor;
+}
}
SYSCTL_PROC(_kern, OID_AUTO, iokittest,
- CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
NULL, 0, sysctl_iokittest, "I", "");
#endif // __clang_analyzer__
extern void mdevremoveall(void);
extern int mdevgetrange(int devid, uint64_t *base, uint64_t *size);
extern void di_root_ramfile(IORegistryEntry * entry);
+extern int IODTGetDefault(const char *key, void *infoAddr, unsigned int infoSize);
+extern boolean_t cpuid_vmm_present(void);
#define ROUNDUP(a, b) (((a) + ((b) - 1)) & (~((b) - 1)))
return true;
}
+int
+IOGetVMMPresent(void)
+{
+ int hv_vmm_present = 0;
+
+#if defined(__arm64__)
+ if (IODTGetDefault("vmm-present", &hv_vmm_present, sizeof(hv_vmm_present)) < 0) {
+ return 0;
+ }
+
+ if (hv_vmm_present != 0) {
+ hv_vmm_present = 1;
+ }
+#elif defined(__x86_64__)
+ hv_vmm_present = cpuid_vmm_present();
+#endif
+
+ return hv_vmm_present;
+}
+
kern_return_t
IOFindBSDRoot( char * rootName, unsigned int rootNameSize,
dev_t * root, u_int32_t * oflags )
#define STACKSHOT_KCTYPE_TASK_SNAPSHOT 0x905u /* task_snapshot_v2 */
#define STACKSHOT_KCTYPE_THREAD_SNAPSHOT 0x906u /* thread_snapshot_v2, thread_snapshot_v3 */
#define STACKSHOT_KCTYPE_DONATING_PIDS 0x907u /* int[] */
-#define STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO 0x908u /* same as KCDATA_TYPE_LIBRARY_LOADINFO64 */
+#define STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO 0x908u /* dyld_shared_cache_loadinfo */
#define STACKSHOT_KCTYPE_THREAD_NAME 0x909u /* char[] */
#define STACKSHOT_KCTYPE_KERN_STACKFRAME 0x90Au /* struct stack_snapshot_frame32 */
#define STACKSHOT_KCTYPE_KERN_STACKFRAME64 0x90Bu /* struct stack_snapshot_frame64 */
uuid_t imageUUID;
};
+/*
+ * N.B.: Newer kernels output dyld_shared_cache_loadinfo structures
+ * instead of this, since the field names match their contents better.
+ */
struct dyld_uuid_info_64_v2 {
uint64_t imageLoadAddress; /* XXX image slide */
uuid_t imageUUID;
/* end of version 1 of dyld_uuid_info_64. sizeof v1 was 24 */
- uint64_t imageSlidBaseAddress; /* slid base address of image */
+ uint64_t imageSlidBaseAddress; /* slid base address or slid first mapping of image */
+};
+
+/*
+ * This is the renamed version of dyld_uuid_info_64 with more accurate
+ * field names, for STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO. Any users
+ * must be aware of the dyld_uuid_info_64* version history and ensure
+ * the fields they are accessing are within the actual bounds.
+ *
+ * OLD_FIELD NEW_FIELD
+ * imageLoadAddress sharedCacheSlide
+ * imageUUID sharedCacheUUID
+ * imageSlidBaseAddress sharedCacheUnreliableSlidBaseAddress
+ * - sharedCacheSlidFirstMapping
+ */
+struct dyld_shared_cache_loadinfo {
+ uint64_t sharedCacheSlide; /* image slide value */
+ uuid_t sharedCacheUUID;
+ /* end of version 1 of dyld_uuid_info_64. sizeof v1 was 24 */
+ uint64_t sharedCacheUnreliableSlidBaseAddress; /* for backwards-compatibility; use sharedCacheSlidFirstMapping if available */
+ /* end of version 2 of dyld_uuid_info_64. sizeof v2 was 32 */
+ uint64_t sharedCacheSlidFirstMapping; /* slid base address of first mapping */
};
struct dyld_aot_cache_uuid_info {
- uint64_t x86SlidBaseAddress; /* slid base address of x86 shared cache */
+ uint64_t x86SlidBaseAddress; /* slid first mapping address of x86 shared cache */
uuid_t x86UUID; /* UUID of x86 shared cache */
- uint64_t aotSlidBaseAddress; /* slide base address of aot cache */
+ uint64_t aotSlidBaseAddress; /* slide first mapping address of aot cache */
uuid_t aotUUID; /* UUID of aot shared cache */
};
kTaskIsDirtyTracked = 0x4000000,
kTaskAllowIdleExit = 0x8000000,
kTaskIsTranslated = 0x10000000,
+ kTaskSharedRegionNone = 0x20000000, /* task doesn't have a shared region */
+ kTaskSharedRegionSystem = 0x40000000, /* task is attached to system shared region */
+ kTaskSharedRegionOther = 0x80000000, /* task is attached to a different shared region */
};
enum thread_snapshot_flags {
uint64_t stackshot_duration_outer;
} __attribute__((packed));
+struct stackshot_duration_v2 {
+ uint64_t stackshot_duration;
+ uint64_t stackshot_duration_outer;
+ uint64_t stackshot_duration_prior;
+} __attribute__((packed));
+
struct stackshot_fault_stats {
uint32_t sfs_pages_faulted_in; /* number of pages faulted in using KDP fault path */
uint64_t sfs_time_spent_faulting; /* MATUs spent faulting */
case STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO: {
i = 0;
+ /*
+ * for backwards compatibility, we keep the old field names, but the
+ * new data is being put in dyld_shared_cache_loadinfo
+ */
_SUBTYPE(KC_ST_UINT64, struct dyld_uuid_info_64_v2, imageLoadAddress);
_SUBTYPE_ARRAY(KC_ST_UINT8, struct dyld_uuid_info_64_v2, imageUUID, 16);
_SUBTYPE(KC_ST_UINT64, struct dyld_uuid_info_64_v2, imageSlidBaseAddress);
+ _SUBTYPE(KC_ST_UINT64, struct dyld_shared_cache_loadinfo, sharedCacheSlidFirstMapping);
setup_type_definition(retval, type_id, i, "shared_cache_dyld_load_info");
break;
}
case STACKSHOT_KCTYPE_STACKSHOT_DURATION: {
i = 0;
- _SUBTYPE(KC_ST_UINT64, struct stackshot_duration, stackshot_duration);
- _SUBTYPE(KC_ST_UINT64, struct stackshot_duration, stackshot_duration_outer);
+ _SUBTYPE(KC_ST_UINT64, struct stackshot_duration_v2, stackshot_duration);
+ _SUBTYPE(KC_ST_UINT64, struct stackshot_duration_v2, stackshot_duration_outer);
+ _SUBTYPE(KC_ST_UINT64, struct stackshot_duration_v2, stackshot_duration_prior);
subtypes[0].kcs_flags |= KCS_SUBTYPE_FLAGS_MERGE;
subtypes[1].kcs_flags |= KCS_SUBTYPE_FLAGS_MERGE;
+ subtypes[2].kcs_flags |= KCS_SUBTYPE_FLAGS_MERGE;
setup_type_definition(retval, type_id, i, "stackshot_duration");
break;
}
int dt_symtab_size = 0;
int dt_result = 0;
- kernel_segment_command_t * seg_to_remove = NULL;
+ kernel_segment_command_t * seg_kld = NULL;
+ kernel_segment_command_t * seg_klddata = NULL;
+ kernel_segment_command_t * seg_linkedit = NULL;
const char __unused * dt_segment_name = NULL;
void __unused * segment_paddress = NULL;
}
/*****
- * KLD bootstrap segment.
+ * KLD & KLDDATA bootstrap segments.
*/
// xxx - should rename KLD segment
- seg_to_remove = getsegbyname("__KLD");
- if (seg_to_remove) {
- OSRuntimeUnloadCPPForSegment(seg_to_remove);
+ seg_kld = getsegbyname("__KLD");
+ seg_klddata = getsegbyname("__KLDDATA");
+ if (seg_klddata) {
+ // __mod_term_func is part of __KLDDATA
+ OSRuntimeUnloadCPPForSegment(seg_klddata);
}
#if __arm__ || __arm64__
- /* Free the memory that was set up by bootx.
+ /* Free the memory that was set up by iBoot.
+ */
+#if !defined(KERNEL_INTEGRITY_KTRR) && !defined(KERNEL_INTEGRITY_CTRR)
+ /* We cannot free the KLD segment with CTRR enabled as it contains text and
+ * is covered by the contiguous rorgn.
*/
dt_segment_name = "Kernel-__KLD";
if (0 == IODTGetLoaderInfo(dt_segment_name, &segment_paddress, &segment_size)) {
- /* We cannot free this with KTRR enabled, as we cannot
- * update the permissions on the KLD range this late
- * in the boot process.
- */
IODTFreeLoaderInfo(dt_segment_name, (void *)segment_paddress,
- (int)segment_size);
+ (int)segment_size); // calls ml_static_mfree
+ } else if (seg_kld && seg_kld->vmaddr && seg_kld->vmsize) {
+ /* With fileset KCs, the Kernel KLD segment is not recorded in the DT. */
+ ml_static_mfree(ml_static_ptovirt(seg_kld->vmaddr - gVirtBase + gPhysBase),
+ seg_kld->vmsize);
+ }
+#endif
+ dt_segment_name = "Kernel-__KLDDATA";
+ if (0 == IODTGetLoaderInfo(dt_segment_name, &segment_paddress, &segment_size)) {
+ IODTFreeLoaderInfo(dt_segment_name, (void *)segment_paddress,
+ (int)segment_size); // calls ml_static_mfree
+ } else if (seg_klddata && seg_klddata->vmaddr && seg_klddata->vmsize) {
+ /* With fileset KCs, the Kernel KLDDATA segment is not recorded in the DT. */
+ ml_static_mfree(ml_static_ptovirt(seg_klddata->vmaddr - gVirtBase + gPhysBase),
+ seg_klddata->vmsize);
}
#elif __i386__ || __x86_64__
/* On x86, use the mapping data from the segment load command to
- * unload KLD directly.
+ * unload KLD & KLDDATA directly.
* This may invalidate any assumptions about "avail_start"
* defining the lower bound for valid physical addresses.
*/
- if (seg_to_remove && seg_to_remove->vmaddr && seg_to_remove->vmsize) {
- bzero((void *)seg_to_remove->vmaddr, seg_to_remove->vmsize);
- ml_static_mfree(seg_to_remove->vmaddr, seg_to_remove->vmsize);
+ if (seg_kld && seg_kld->vmaddr && seg_kld->vmsize) {
+ bzero((void *)seg_kld->vmaddr, seg_kld->vmsize);
+ ml_static_mfree(seg_kld->vmaddr, seg_kld->vmsize);
+ }
+ if (seg_klddata && seg_klddata->vmaddr && seg_klddata->vmsize) {
+ bzero((void *)seg_klddata->vmaddr, seg_klddata->vmsize);
+ ml_static_mfree(seg_klddata->vmaddr, seg_klddata->vmsize);
}
#else
#error arch
#endif
- seg_to_remove = NULL;
-
/*****
* Prelinked kernel's symtab (if there is one).
*/
}
}
- seg_to_remove = (kernel_segment_command_t *)getsegbyname("__LINKEDIT");
+ seg_linkedit = (kernel_segment_command_t *)getsegbyname("__LINKEDIT");
/* kxld always needs the kernel's __LINKEDIT segment, but we can make it
* pageable, unless keepsyms is set. To do that, we have to copy it from
vm_map_offset_t seg_copy_offset = 0;
vm_map_size_t seg_length = 0;
- seg_data = (void *) seg_to_remove->vmaddr;
- seg_offset = (vm_map_offset_t) seg_to_remove->vmaddr;
- seg_length = (vm_map_size_t) seg_to_remove->vmsize;
+ seg_data = (void *) seg_linkedit->vmaddr;
+ seg_offset = (vm_map_offset_t) seg_linkedit->vmaddr;
+ seg_length = (vm_map_size_t) seg_linkedit->vmsize;
/* Allocate space for the LINKEDIT copy.
*/
}
#endif // VM_MAPPED_KEXTS
- seg_to_remove = NULL;
-
result = kOSReturnSuccess;
return result;
OSKext::setAutounloadEnabled(bool flag)
{
bool result = flags.autounloadEnabled ? true : false;
- flags.autounloadEnabled = flag ? 1 : 0;
+ flags.autounloadEnabled = flag ? (0 == flags.unloadUnsupported) : 0;
if (result != (flag ? true : false)) {
OSKextLog(this,
getPropertyForHostArch(kOSBundleAllowUserLoadKey) == kOSBooleanTrue);
if (shouldSaveSegments) {
flags.resetSegmentsFromImmutableCopy = 1;
+ } else {
+ flags.unloadUnsupported = 1;
}
break;
case KCKindPageable:
flags.resetSegmentsFromImmutableCopy = 1;
} else if (resetAuxKCSegmentOnUnload) {
flags.resetSegmentsFromVnode = 1;
+ } else {
+ flags.unloadUnsupported = 1;
}
break;
default:
if (aKext->countRequestCallbacks()) {
goto finish;
}
+ if (aKext->flags.unloadUnsupported) {
+ result = kOSKextReturnInUse;
+ OSKextLog(aKext,
+ kOSKextLogErrorLevel |
+ kOSKextLogKextBookkeepingFlag,
+ "Can't remove kext %s; unsupported by cache.",
+ aKext->getIdentifierCString());
+ goto finish;
+ }
/* If we are terminating, send the request to the IOCatalogue
* (which will actually call us right back but that's ok we have
getIdentifierCString(),
aClass->getClassName());
- flags.autounloadEnabled = 1;
+ flags.autounloadEnabled = (0 == flags.unloadUnsupported);
break;
}
}
allow_fileset_load = false;
#endif
+ /*
+ * Change with 70582300
+ */
+#if 0 || !defined(VM_MAPPED_KEXTS)
+ /*
+ * On platforms that don't support the SystemKC or a file-backed
+ * AuxKC, the kext receipt for 3rd party kexts loaded by the booter
+ * needs to be queried before we load any codeless kexts or release
+ * any 3rd party kexts to run. On platforms that support a file-backed
+ * AuxKC, this process is done via the kext audit mechanism.
+ */
+
+ printf("KextLog: waiting for kext receipt to be queried.\n");
+ while (!IOServiceWaitForMatchingResource(kOSKextReceiptQueried, UINT64_MAX)) {
+ IOSleep(30);
+ }
+#endif /* !VM_MAPPED_KEXTS */
+
/*
* Get the args from the request. Right now we need the file
* name for the pageable and the aux kext collection file sets.
OSDictionary *infoDict;
parsedXML = consumeDeferredKextCollection(KCKindAuxiliary);
infoDict = OSDynamicCast(OSDictionary, parsedXML.get());
+#if !defined(VM_MAPPED_KEXTS)
+ /*
+ * On platforms where we don't dynamically wire-down / page-in
+ * kext memory, we need to maintain the invariant that if the
+ * AuxKC in memory does not contain a kext receipt, then we
+ * should not load any of the kexts.
+ */
+ size_t receipt_sz = 0;
+ if (getsectdatafromheader(akc_mh, kReceiptInfoSegment, kAuxKCReceiptSection, &receipt_sz) == NULL || receipt_sz == 0) {
+ OSKextLog(/* kext */ NULL, kOSKextLogErrorLevel | kOSKextLogArchiveFlag,
+ "KextLog: WARNING: Failed to load AuxKC from memory: missing receipt");
+ ret = kOSKextReturnKCLoadFailure;
+ goto try_codeless;
+ }
+#endif
if (infoDict) {
bool added;
printf("KextLog: Adding kexts from in-memory AuxKC\n");
return kOSReturnSuccess;
}
+extern "C" kern_return_t
+OSKextSetReceiptQueried(void)
+{
+ OSKextLog(/* kext */ NULL,
+ kOSKextLogStepLevel | kOSKextLogGeneralFlag,
+ "Setting kext receipt as queried");
+
+ IOService::publishResource(kOSKextReceiptQueried, kOSBooleanTrue);
+ return KERN_SUCCESS;
+}
+
extern "C" const vm_allocation_site_t *
OSKextGetAllocationSiteForCaller(uintptr_t address)
{
}
#if defined(HAS_APPLE_PAC)
-static inline void
+#if !KASAN
+/*
+ * Place this function in __KLD,__text on non-kasan builds so it gets unmapped
+ * after CTRR lockdown.
+ */
+__attribute__((noinline, section("__KLD,__text")))
+#endif
+static void
OSRuntimeSignStructorsInSegment(kernel_segment_command_t *segment)
{
kernel_section_t * section;
libkern/uuid/uuid.c standard
libkern/os/log.c standard
+libkern/os/log_encode.c standard
+libkern/os/log_mem.c standard
libkern/os/object.c standard
libkern/os/internal.c standard
libkern/os/refcnt.c standard
libkern/crypto/corecrypto_rsa.c optional crypto
libkern/crypto/corecrypto_chacha20poly1305.c optional crypto
+libkern/coretrust/coretrust.c standard
+
libkern/img4/interface.c standard
libkern/stack_protector.c standard
--- /dev/null
+#include <libkern/libkern.h>
+#include <libkern/section_keywords.h>
+#include <libkern/coretrust/coretrust.h>
+
+#if defined(SECURITY_READ_ONLY_LATE)
+SECURITY_READ_ONLY_LATE(const coretrust_t *) coretrust = NULL;
+#else
+const coretrust_t *coretrust = NULL;
+#endif
+
+void
+coretrust_interface_register(const coretrust_t *ct)
+{
+ if (coretrust) {
+ panic("coretrust interface already set");
+ }
+ coretrust = ct;
+}
uint16_t fcr_length;
} *firehose_chunk_range_t;
+#if __has_include(<os/atomic_private.h>)
#if defined(KERNEL) || defined(OS_FIREHOSE_SPI)
OS_ALWAYS_INLINE
#endif // OS_ATOMIC_HAS_STARVATION_FREE_RMW || !OS_ATOMIC_CONFIG_STARVATION_FREE_ONLY
#endif // defined(KERNEL) || defined(OS_FIREHOSE_SPI)
+#endif // __has_include(<os/atomic_private.h>)
__END_DECLS
firehose_stream_memory_baseband = 6,
_firehose_stream_max,
+ _firehose_stream_disabled = (uint8_t)-1,
);
/*!
_firehose_tracepoint_flags_pc_style_main_plugin = 0x0003 << 1,
_firehose_tracepoint_flags_pc_style_absolute = 0x0004 << 1,
_firehose_tracepoint_flags_pc_style_uuid_relative = 0x0005 << 1,
- _firehose_tracepoint_flags_pc_style__unused6 = 0x0006 << 1,
+ _firehose_tracepoint_flags_pc_style_large_shared_cache = 0x0006 << 1,
_firehose_tracepoint_flags_pc_style__unused7 = 0x0007 << 1,
_firehose_tracepoint_flags_base_has_unique_pid = 0x0010,
+ _firehose_tracepoint_flags_base_has_large_offset = 0x0020,
);
/*
* @abstract
* Flags for Log tracepoints (namespace signpost).
*
- * When flags are shared with the log type, they should havethe same values.
+ * When flags are shared with the log type, they should have the same values.
*/
OS_OPTIONS(_firehose_tracepoint_flags_signpost, uint16_t,
+ // shared with log
_firehose_tracepoint_flags_signpost_has_private_data = 0x0100,
_firehose_tracepoint_flags_signpost_has_subsystem = 0x0200,
_firehose_tracepoint_flags_signpost_has_rules = 0x0400,
_firehose_tracepoint_flags_signpost_has_oversize = 0x0800,
_firehose_tracepoint_flags_signpost_has_context_data = 0x1000,
+
+ // specific to signpost
+ _firehose_tracepoint_flags_signpost_has_name = 0x8000,
);
/* MIG firehose push reply structure */
#if KERNEL
#include <atm/atm_internal.h>
#endif
+#if __has_include(<os/atomic_private.h>)
#include <os/atomic_private.h>
+#else
+#include <os/internal/internal_shared.h>
+#endif
#include "firehose_types_private.h"
OS_ASSUME_NONNULL_BEGIN
machine \
c++ \
crypto \
- img4
+ img4 \
+ coretrust
INSTINC_SUBDIRS_X86_64 = \
i386
INSTINC_SUBDIRS_X86_64H = \
#ifdef XNU_KERNEL_PRIVATE
+/*!
+ * @define kOSKextReceiptQueried
+ * @abstract Whether or not the kext receipt has been successfully loaded.
+ */
+#define kOSKextReceiptQueried "OSKextReceiptQueried"
+
#if PRAGMA_MARK
#pragma mark -
/********************************************************************/
extern uint32_t OSKextGetKmodIDForSite(const vm_allocation_site_t * site,
char * name, vm_size_t namelen);
extern void OSKextFreeSite(vm_allocation_site_t * site);
+extern kern_return_t OSKextSetReceiptQueried(void);
#if CONFIG_IMAGEBOOT
extern int OSKextGetUUIDForName(const char *, uuid_t);
unsigned int CPPInitialized:1;
unsigned int jettisonLinkeditSeg:1;
unsigned int resetSegmentsFromImmutableCopy:1;
+ unsigned int unloadUnsupported:1;
} flags;
uint32_t matchingRefCount;
unsigned int flags:14,
length:18;
- char * OS_PTRAUTH_SIGNED_PTR("OSString.string") string;;
+ char * OS_PTRAUTH_SIGNED_PTR("OSString.string") string;
#else /* APPLE_KEXT_ALIGN_CONTAINERS */
protected:
- char * OS_PTRAUTH_SIGNED_PTR("OSString.string") string;;
+ char * OS_PTRAUTH_SIGNED_PTR("OSString.string") string;
unsigned int flags;
unsigned int length;
--- /dev/null
+export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
+export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
+export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
+export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
+
+include $(MakeInc_cmd)
+include $(MakeInc_def)
+
+DATAFILES =
+PRIVATE_DATAFILES =
+KERNELFILES =
+PRIVATE_KERNELFILES = coretrust.h
+
+INSTALL_MI_LIST = ${DATAFILES}
+INSTALL_MI_LCL_LIST = ${PRIVATE_DATAFILES}
+INSTALL_KF_MI_LIST = ${KERNELFILES}
+INSTALL_KF_MI_LCL_LIST = ${PRIVATE_KERNELFILES}
+EXPORT_MI_LIST = ${INSTALL_KF_MI_LCL_LIST}
+
+INSTALL_MI_DIR = libkern/coretrust
+EXPORT_MI_DIR = libkern/coretrust
+
+include $(MakeInc_rule)
+include $(MakeInc_dir)
\ No newline at end of file
--- /dev/null
+/*
+ * Copyright (c) 2020 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef __CORETRUST_H
+#define __CORETRUST_H
+
+#include <os/base.h>
+#include <sys/cdefs.h>
+#include <sys/types.h>
+
+#if XNU_KERNEL_PRIVATE
+/*
+ * Only include this when building for XNU. CoreTrust will include its
+ * local copy of the header.
+ */
+#include <coretrust/CTEvaluate.h>
+#endif
+
+/*
+ * We add more definitions as the need for them arises. Please refer
+ * to <coretrust/CTEvaluate.h> for more information.
+ */
+
+typedef int (*coretrust_CTEvaluateAMFICodeSignatureCMS_t)(
+ const uint8_t *cms_data,
+ size_t cms_data_length,
+ const uint8_t *detached_data,
+ size_t detached_data_length,
+ bool allow_test_hierarchy,
+ const uint8_t **leaf_certificate,
+ size_t *leaf_certificate_length,
+ CoreTrustPolicyFlags *policy_flags,
+ CoreTrustDigestType *cms_digest_type,
+ CoreTrustDigestType *hash_agility_digest_type,
+ const uint8_t **digest_data,
+ size_t *digest_length
+ );
+
+typedef struct _coretrust {
+ coretrust_CTEvaluateAMFICodeSignatureCMS_t CTEvaluateAMFICodeSignatureCMS;
+} coretrust_t;
+
+__BEGIN_DECLS
+
+/*!
+ * @const coretrust
+ * The CoreTrust interface that was registered.
+ */
+extern const coretrust_t *coretrust;
+
+/*!
+ * @function coretrust_interface_register
+ * Registers the CoreTrust kext interface for use within the kernel proper.
+ *
+ * @param ct
+ * The interface to register.
+ *
+ * @discussion
+ * This routine may only be called once and must be called before late-const has
+ * been applied to kernel memory.
+ */
+OS_EXPORT OS_NONNULL1
+void
+coretrust_interface_register(const coretrust_t *ct);
+
+__END_DECLS
+
+#endif // __CORETRUST_H
*/
#if __has_feature(ptrauth_calls)
ptrauth_generic_signature_t
-ptrauth_utils_sign_blob_generic(void * ptr, size_t len_bytes, uint64_t data, int flags);
+ptrauth_utils_sign_blob_generic(const void * ptr, size_t len_bytes, uint64_t data, int flags);
#else
static inline ptrauth_generic_signature_t
-ptrauth_utils_sign_blob_generic(__unused void * ptr, __unused size_t len_bytes, __unused uint64_t data, __unused int flags)
+ptrauth_utils_sign_blob_generic(__unused const void * ptr, __unused size_t len_bytes, __unused uint64_t data, __unused int flags)
{
return 0;
}
*/
#if __has_feature(ptrauth_calls)
void
-ptrauth_utils_auth_blob_generic(void * ptr, size_t len_bytes, uint64_t data, int flags, ptrauth_generic_signature_t signature);
+ptrauth_utils_auth_blob_generic(const void * ptr, size_t len_bytes, uint64_t data, int flags, ptrauth_generic_signature_t signature);
#else
static inline void
-ptrauth_utils_auth_blob_generic(__unused void * ptr, __unused size_t len_bytes, __unused uint64_t data, __unused int flags, __unused ptrauth_generic_signature_t signature)
+ptrauth_utils_auth_blob_generic(__unused const void * ptr, __unused size_t len_bytes, __unused uint64_t data, __unused int flags, __unused ptrauth_generic_signature_t signature)
{
return;
}
#define _os_atomic_clang_op(p, v, m, o, op) ({ \
__auto_type _v = _os_atomic_value_cast(p, v); \
- __auto_type _r = _os_atomic_clang_op_orig(p, _v, m, o); \
- op(_r, _v); \
+ __auto_type _s = _os_atomic_clang_op_orig(p, _v, m, o); \
+ op(_s, _v); \
})
#if OS_ATOMIC_CONFIG_MEMORY_ORDER_DEPENDENCY
__BEGIN_DECLS
+static inline uint32_t
+os_hash_jenkins_update(const void *data, size_t length, uint32_t hash)
+{
+ const uint8_t *key = (const uint8_t *)data;
+
+ for (size_t i = 0; i < length; i++) {
+ hash += key[i];
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ }
+
+ return hash;
+}
+
+static inline uint32_t
+os_hash_jenkins_finish(uint32_t hash)
+{
+ hash += (hash << 3);
+ hash ^= (hash >> 11);
+ hash += (hash << 15);
+
+ return hash;
+}
+
/*!
* @function os_hash_jenkins
*
static inline uint32_t
os_hash_jenkins(const void *data, size_t length)
{
- const uint8_t *key = (const uint8_t *)data;
- uint32_t hash = 0;
-
- for (size_t i = 0; i < length; i++) {
- hash += key[i];
- hash += (hash << 10);
- hash ^= (hash >> 6);
- }
-
- hash += (hash << 3);
- hash ^= (hash >> 11);
- hash += (hash << 15);
-
- return hash;
+ return os_hash_jenkins_finish(os_hash_jenkins_update(data, length, 0));
}
/*!
#include "trace_internal.h"
#include "log_encode.h"
+#include "log_mem.h"
struct os_log_s {
int a;
struct os_log_s _os_log_default;
struct os_log_s _os_log_replay;
+
+LOGMEM_STATIC_INIT(os_log_mem, 14, 9, 10);
+
extern vm_offset_t kernel_firehose_addr;
extern firehose_chunk_t firehose_boot_chunk;
extern void *OSKextKextForAddress(const void *);
/* Counters for persistence mode */
-uint32_t oslog_p_total_msgcount = 0;
-uint32_t oslog_p_metadata_saved_msgcount = 0;
-uint32_t oslog_p_metadata_dropped_msgcount = 0;
-uint32_t oslog_p_error_count = 0;
-uint32_t oslog_p_saved_msgcount = 0;
-uint32_t oslog_p_dropped_msgcount = 0;
-uint32_t oslog_p_boot_dropped_msgcount = 0;
-uint32_t oslog_p_coprocessor_total_msgcount = 0;
-uint32_t oslog_p_coprocessor_dropped_msgcount = 0;
+SCALABLE_COUNTER_DEFINE(oslog_p_total_msgcount);
+SCALABLE_COUNTER_DEFINE(oslog_p_metadata_saved_msgcount);
+SCALABLE_COUNTER_DEFINE(oslog_p_metadata_dropped_msgcount);
+SCALABLE_COUNTER_DEFINE(oslog_p_error_count);
+SCALABLE_COUNTER_DEFINE(oslog_p_saved_msgcount);
+SCALABLE_COUNTER_DEFINE(oslog_p_dropped_msgcount);
+SCALABLE_COUNTER_DEFINE(oslog_p_boot_dropped_msgcount);
+SCALABLE_COUNTER_DEFINE(oslog_p_coprocessor_total_msgcount);
+SCALABLE_COUNTER_DEFINE(oslog_p_coprocessor_dropped_msgcount);
+SCALABLE_COUNTER_DEFINE(oslog_p_unresolved_kc_msgcount);
/* Counters for streaming mode */
-uint32_t oslog_s_total_msgcount = 0;
-uint32_t oslog_s_error_count = 0;
-uint32_t oslog_s_metadata_msgcount = 0;
+SCALABLE_COUNTER_DEFINE(oslog_s_error_count);
+/* Protected by the stream lock */
+uint32_t oslog_s_total_msgcount;
+uint32_t oslog_s_metadata_msgcount;
/* Counters for msgbuf logging */
-uint32_t oslog_msgbuf_msgcount = 0;
-uint32_t oslog_msgbuf_dropped_msgcount = 0;
+SCALABLE_COUNTER_DEFINE(oslog_msgbuf_msgcount)
+SCALABLE_COUNTER_DEFINE(oslog_msgbuf_dropped_msgcount)
static bool oslog_boot_done = false;
_os_log_to_msgbuf_internal(const char *format, va_list args, bool safe, bool logging, bool addcr);
static void
-_os_log_to_log_internal(os_log_t oslog, os_log_type_t type,
- const char *format, va_list args, void *addr, void *dso, bool driverKit);
+_os_log_to_log_internal(os_log_type_t type, const char *format, va_list args, void *addr, void *dso, bool driverKit);
-
-static void
-_os_log_actual(os_log_t oslog, os_log_type_t type, const char *format, void
- *dso, void *addr, os_log_buffer_context_t context, bool driverKit);
+static bool
+os_log_turned_off(void)
+{
+ return atm_get_diagnostic_config() & (ATM_TRACE_DISABLE | ATM_TRACE_OFF);
+}
bool
os_log_info_enabled(os_log_t log __unused)
{
- return true;
+ return !os_log_turned_off();
}
bool
os_log_debug_enabled(os_log_t log __unused)
{
- return true;
+ return !os_log_turned_off();
}
-os_log_t
-os_log_create(const char *subsystem __unused, const char *category __unused)
+static bool
+os_log_disabled(void)
{
- return &_os_log_default;
+ return atm_get_diagnostic_config() & ATM_TRACE_DISABLE;
}
-bool
-_os_log_string_is_public(const char *str __unused)
+os_log_t
+os_log_create(const char *subsystem __unused, const char *category __unused)
{
- return true;
+ return &_os_log_default;
}
__attribute__((noinline, not_tail_called)) void
_os_log_with_args_internal(os_log_t oslog, os_log_type_t type,
const char *format, va_list args, void *addr, void *dso, bool driverKit, bool addcr)
{
- uint32_t logging_config = atm_get_diagnostic_config();
- boolean_t safe;
- boolean_t logging;
-
if (format[0] == '\0') {
return;
}
/* early boot can log to dmesg for later replay (27307943) */
- safe = (startup_phase < STARTUP_SUB_EARLY_BOOT || oslog_is_safe());
-
- if (logging_config & ATM_TRACE_DISABLE || logging_config & ATM_TRACE_OFF) {
- logging = false;
- } else {
- logging = true;
- }
+ bool safe = (startup_phase < STARTUP_SUB_EARLY_BOOT || oslog_is_safe());
+ bool logging = !os_log_turned_off();
if (oslog != &_os_log_replay) {
_os_log_to_msgbuf_internal(format, args, safe, logging, addcr);
}
if (safe && logging) {
- _os_log_to_log_internal(oslog, type, format, args, addr, dso, driverKit);
+ _os_log_to_log_internal(type, format, args, addr, dso, driverKit);
}
}
va_list args_copy;
if (!bsd_log_lock(safe)) {
- os_atomic_inc(&oslog_msgbuf_dropped_msgcount, relaxed);
+ counter_inc(&oslog_msgbuf_dropped_msgcount);
return;
}
bsd_log_unlock();
logwakeup(msgbufp);
- os_atomic_inc(&oslog_msgbuf_msgcount, relaxed);
+ counter_inc(&oslog_msgbuf_msgcount);
}
-static void
-_os_log_to_log_internal(os_log_t oslog, os_log_type_t type,
- const char *format, va_list args, void *addr, void *dso, bool driverKit)
+static firehose_stream_t
+firehose_stream(os_log_type_t type)
{
- kc_format_t kcformat = KCFormatUnknown;
- struct os_log_buffer_context_s context;
- unsigned char buffer_data[OS_LOG_BUFFER_MAX_SIZE] __attribute__((aligned(8)));
- os_log_buffer_t buffer = (os_log_buffer_t)buffer_data;
- uint8_t pubdata[OS_LOG_BUFFER_MAX_SIZE];
- va_list args_copy;
-
- if (addr == NULL) {
- return;
- }
-
- if (!PE_get_primary_kc_format(&kcformat)) {
- return;
- }
-
- if (kcformat == KCFormatStatic || kcformat == KCFormatKCGEN) {
- void *baseAddress = PE_get_kc_baseaddress(KCKindPrimary);
- if (!baseAddress) {
- return;
- }
- dso = baseAddress;
- } else if (kcformat == KCFormatDynamic || kcformat == KCFormatFileset) {
- if (dso == NULL) {
- dso = (void *) OSKextKextForAddress(format);
- if (dso == NULL) {
- return;
- }
- }
- if (!_os_trace_addr_in_text_segment(dso, format)) {
- return;
- }
- if (!driverKit) {
- void *dso_addr = (void *) OSKextKextForAddress(addr);
- if (dso != dso_addr) {
- return;
- }
- }
- }
-
- memset(&context, 0, sizeof(context));
- memset(buffer, 0, OS_LOG_BUFFER_MAX_SIZE);
+ return (type == OS_LOG_TYPE_INFO || type == OS_LOG_TYPE_DEBUG) ?
+ firehose_stream_memory : firehose_stream_persist;
+}
- context.shimmed = true;
- context.buffer = buffer;
- context.content_sz = OS_LOG_BUFFER_MAX_SIZE - sizeof(*buffer);
- context.pubdata = pubdata;
- context.pubdata_sz = sizeof(pubdata);
+static void
+_os_log_actual(os_log_type_t type, const char *format, void *dso, void *addr, uint8_t *logdata, size_t logdata_sz,
+ firehose_tracepoint_flags_t flags, bool driverKit)
+{
+ firehose_tracepoint_id_u trace_id;
- va_copy(args_copy, args);
+ firehose_stream_t stream = firehose_stream(type);
+ uint64_t timestamp = firehose_tracepoint_time(firehose_activity_flags_default);
- os_atomic_inc(&oslog_p_total_msgcount, relaxed);
- if (_os_log_encode(format, args_copy, 0, &context)) {
- _os_log_actual(oslog, type, format, dso, addr, &context, driverKit);
+ if (driverKit) {
+ // set FIREHOSE_TRACEPOINT_PC_DYNAMIC_BIT so logd will not try to find the format string in
+ // the executable text
+ trace_id.ftid_value = FIREHOSE_TRACE_ID_MAKE(firehose_tracepoint_namespace_log,
+ type, flags, (uint32_t)((uintptr_t)addr | FIREHOSE_TRACEPOINT_PC_DYNAMIC_BIT));
} else {
- os_atomic_inc(&oslog_p_error_count, relaxed);
+ // create trace_id after we've set additional flags
+ trace_id.ftid_value = FIREHOSE_TRACE_ID_MAKE(firehose_tracepoint_namespace_log,
+ type, flags, _os_trace_offset(dso, format, (_firehose_tracepoint_flags_activity_t)flags));
}
- va_end(args_copy);
+
+ _firehose_trace(stream, trace_id, timestamp, logdata, logdata_sz, true);
}
-static inline size_t
-_os_trace_write_location_for_address(uint8_t buf[static sizeof(uint64_t)],
- void *dso, const void *address, firehose_tracepoint_flags_t *flags, __unused bool driverKit)
+static void *
+resolve_dso(const char *fmt, void *dso, void *addr, bool driverKit)
{
- uintptr_t shift_addr = (uintptr_t)address - (uintptr_t)dso;
-
kc_format_t kcformat = KCFormatUnknown;
- __assert_only bool result = PE_get_primary_kc_format(&kcformat);
- assert(result);
- if (kcformat == KCFormatStatic || kcformat == KCFormatKCGEN) {
- *flags = _firehose_tracepoint_flags_pc_style_shared_cache;
- memcpy(buf, (uint32_t[]){ (uint32_t)shift_addr }, sizeof(uint32_t));
- return sizeof(uint32_t);
- } else {
- kernel_mach_header_t *mh = dso;
-
- /*
- * driverKit will have the dso set as MH_EXECUTE
- * (it is logging from a syscall in the kernel)
- * but needs logd to parse the address as an
- * absolute pc.
- */
- if (mh->filetype == MH_EXECUTE && !driverKit) {
- *flags = _firehose_tracepoint_flags_pc_style_main_exe;
- memcpy(buf, (uint32_t[]){ (uint32_t)shift_addr }, sizeof(uint32_t));
- return sizeof(uint32_t);
- } else {
- *flags = _firehose_tracepoint_flags_pc_style_absolute;
- if (!driverKit) {
- shift_addr = VM_KERNEL_UNSLIDE(address);
- } else {
- shift_addr = (uintptr_t) address;
- }
- memcpy(buf, (uintptr_t[]){ shift_addr }, sizeof(uintptr_t));
-#if __LP64__
- return 6; // 48 bits are enough
-#else
- return sizeof(uintptr_t);
-#endif
- }
+ if (!PE_get_primary_kc_format(&kcformat)) {
+ return NULL;
}
-}
-
-OS_ALWAYS_INLINE
-static inline size_t
-_os_log_buffer_pack(uint8_t *buffdata, size_t buffdata_sz,
- os_log_buffer_context_t ctx)
-{
- os_log_buffer_t buffer = ctx->buffer;
- size_t buffer_sz = sizeof(*ctx->buffer) + ctx->content_sz;
- size_t total_sz = buffer_sz + ctx->pubdata_sz;
-
- if (total_sz > buffdata_sz) {
- return 0;
+ switch (kcformat) {
+ case KCFormatStatic:
+ case KCFormatKCGEN:
+ dso = PE_get_kc_baseaddress(KCKindPrimary);
+ break;
+ case KCFormatDynamic:
+ case KCFormatFileset:
+ if (!dso && (dso = (void *)OSKextKextForAddress(fmt)) == NULL) {
+ return NULL;
+ }
+ if (!_os_trace_addr_in_text_segment(dso, fmt)) {
+ return NULL;
+ }
+ if (!driverKit && (dso != (void *)OSKextKextForAddress(addr))) {
+ return NULL;
+ }
+ break;
+ default:
+ panic("unknown KC format type");
}
- memcpy(buffdata, buffer, buffer_sz);
- memcpy(&buffdata[buffer_sz], ctx->pubdata, ctx->pubdata_sz);
- return total_sz;
+ return dso;
}
static void
-_os_log_actual(os_log_t oslog __unused, os_log_type_t type, const char *format,
- void *dso, void *addr, os_log_buffer_context_t context, bool driverKit)
+_os_log_to_log_internal(os_log_type_t type, const char *fmt, va_list args, void *addr, void *dso, bool driverKit)
{
- firehose_stream_t stream;
- firehose_tracepoint_flags_t flags = 0;
- firehose_tracepoint_id_u trace_id;
- uint8_t buffdata[OS_LOG_BUFFER_MAX_SIZE];
- size_t addr_len = 0, buffdata_sz;
- uint64_t timestamp;
- uint64_t thread_id;
-
- // dso == the start of the binary that was loaded
- addr_len = _os_trace_write_location_for_address(buffdata, dso, addr, &flags, driverKit);
- buffdata_sz = _os_log_buffer_pack(buffdata + addr_len,
- sizeof(buffdata) - addr_len, context);
- if (buffdata_sz == 0) {
+ counter_inc(&oslog_p_total_msgcount);
+
+ if (addr == NULL) {
+ counter_inc(&oslog_p_unresolved_kc_msgcount);
return;
}
- buffdata_sz += addr_len;
- timestamp = firehose_tracepoint_time(firehose_activity_flags_default);
- thread_id = thread_tid(current_thread());
-
- if (driverKit) {
- // set FIREHOSE_TRACEPOINT_PC_DYNAMIC_BIT so logd will not try to find the format string in
- // the executable text
- trace_id.ftid_value = FIREHOSE_TRACE_ID_MAKE(firehose_tracepoint_namespace_log,
- type, flags, (uint32_t)((uintptr_t)addr | FIREHOSE_TRACEPOINT_PC_DYNAMIC_BIT));
- } else {
- // create trace_id after we've set additional flags
- trace_id.ftid_value = FIREHOSE_TRACE_ID_MAKE(firehose_tracepoint_namespace_log,
- type, flags, _os_trace_offset(dso, format, (_firehose_tracepoint_flags_activity_t)flags));
+ if ((dso = resolve_dso(fmt, dso, addr, driverKit)) == NULL) {
+ counter_inc(&oslog_p_unresolved_kc_msgcount);
+ return;
}
- if (type == OS_LOG_TYPE_INFO || type == OS_LOG_TYPE_DEBUG) {
- stream = firehose_stream_memory;
+ uint8_t buffer[OS_LOG_BUFFER_MAX_SIZE] __attribute__((aligned(8))) = { 0 };
+ struct os_log_context_s ctx;
+
+ os_log_context_init(&ctx, &os_log_mem, buffer, sizeof(buffer));
+
+ if (os_log_context_encode(&ctx, fmt, args, addr, dso, driverKit)) {
+ _os_log_actual(type, fmt, dso, addr, ctx.ctx_buffer, ctx.ctx_content_sz,
+ ctx.ctx_ft_flags, driverKit);
} else {
- stream = firehose_stream_persist;
+ counter_inc(&oslog_p_error_count);
}
- _firehose_trace(stream, trace_id, timestamp, buffdata, buffdata_sz, true);
+
+ os_log_context_free(&ctx);
}
bool
{
firehose_tracepoint_id_u trace_id;
firehose_tracepoint_id_t return_id = 0;
- firehose_stream_t stream;
uint8_t pubdata[OS_LOG_BUFFER_MAX_SIZE];
size_t wr_pos = 0;
+ if (os_log_turned_off()) {
+ return false;
+ }
+
if (buff_len + 16 + sizeof(uint32_t) > OS_LOG_BUFFER_MAX_SIZE) {
return false;
}
+ firehose_stream_t stream = firehose_stream(type);
// unlike kext, where pc is used to find uuid, in coprocessor logs the uuid is passed as part of the tracepoint
firehose_tracepoint_flags_t flags = _firehose_tracepoint_flags_pc_style_uuid_relative;
trace_id.ftid_value = FIREHOSE_TRACE_ID_MAKE(firehose_tracepoint_namespace_log,
type, flags, offset);
- if (type == OS_LOG_TYPE_INFO || type == OS_LOG_TYPE_DEBUG) {
- stream = firehose_stream_memory;
- } else {
- stream = firehose_stream_persist;
- }
-
- os_atomic_inc(&oslog_p_coprocessor_total_msgcount, relaxed);
+ counter_inc(&oslog_p_coprocessor_total_msgcount);
// send firehose tracepoint containing os log to firehose buffer
return_id = _firehose_trace(stream, trace_id, timestamp, pubdata,
buff_len + wr_pos, stream_log);
if (return_id == 0) {
- os_atomic_inc(&oslog_p_coprocessor_dropped_msgcount, relaxed);
+ counter_inc(&oslog_p_coprocessor_dropped_msgcount);
return false;
}
return true;
if (slowpath(ft_size + publen > _firehose_chunk_payload_size)) {
// We'll need to have some handling here. For now - return 0
- os_atomic_inc(&oslog_p_error_count, relaxed);
+ counter_inc(&oslog_p_error_count);
return 0;
}
if (!fastpath(ft)) {
if (oslog_boot_done) {
if (stream == firehose_stream_metadata) {
- os_atomic_inc(&oslog_p_metadata_dropped_msgcount, relaxed);
+ counter_inc(&oslog_p_metadata_dropped_msgcount);
} else {
// If we run out of space in the persistence buffer we're
// dropping the message.
- os_atomic_inc(&oslog_p_dropped_msgcount, relaxed);
+ counter_inc(&oslog_p_dropped_msgcount);
}
return 0;
}
offset = firehose_chunk_tracepoint_try_reserve(fbc, stamp,
firehose_stream_persist, 0, (uint16_t)publen, 0, NULL);
if (offset <= 0) {
- os_atomic_inc(&oslog_p_boot_dropped_msgcount, relaxed);
+ counter_inc(&oslog_p_boot_dropped_msgcount);
return 0;
}
thread_tid(current_thread()), offset);
memcpy(ft->ft_data, pubdata, publen);
firehose_chunk_tracepoint_end(fbc, ft, ftid);
- os_atomic_inc(&oslog_p_saved_msgcount, relaxed);
+ counter_inc(&oslog_p_saved_msgcount);
return ftid.ftid_value;
}
if (!oslog_boot_done) {
__firehose_buffer_tracepoint_flush(ft, ftid);
if (stream == firehose_stream_metadata) {
- os_atomic_inc(&oslog_p_metadata_saved_msgcount, relaxed);
+ counter_inc(&oslog_p_metadata_saved_msgcount);
} else {
- os_atomic_inc(&oslog_p_saved_msgcount, relaxed);
+ counter_inc(&oslog_p_saved_msgcount);
}
return ftid.ftid_value;
}
char path[PATH_MAX + sizeof(struct firehose_trace_uuid_info_s)];
} buf;
+ if (os_log_disabled()) {
+ return;
+ }
+
if (path_size > PATH_MAX) {
return;
}
{
oslog_stream_buf_entry_t m_entry = NULL;
+ if (os_log_disabled()) {
+ return;
+ }
+
// If streaming mode is not on, only log the metadata
// in the persistence buffer
m_entry = oslog_stream_create_buf_entry(oslog_stream_link_type_metadata, ftid,
stamp, pubdata, publen);
if (!m_entry) {
- os_atomic_inc(&oslog_s_error_count, relaxed);
+ counter_inc(&oslog_s_error_count);
goto finish;
}
T_ASSERT_EQ_INT(TRUE, os_log_debug_enabled(log_handle), "os_log_debug is enabled");
T_ASSERT_EQ_PTR(&_os_log_default, OS_LOG_DEFAULT, "ensure OS_LOG_DEFAULT is _os_log_default");
- total_msg = oslog_p_total_msgcount;
- saved_msg = oslog_p_saved_msgcount;
- dropped_msg = oslog_p_dropped_msgcount;
+ total_msg = counter_load(&oslog_p_total_msgcount);
+ saved_msg = counter_load(&oslog_p_saved_msgcount);
+ dropped_msg = counter_load(&oslog_p_dropped_msgcount);
T_LOG("oslog internal counters total %u , saved %u, dropped %u", total_msg, saved_msg, dropped_msg);
T_LOG("Validating with uniqid %u u64 %llu", uniqid, a);
}
/* for enabled logging printfs should be saved in oslog as well */
- T_EXPECT_GE_UINT((oslog_p_total_msgcount - total_msg), 2, "atleast 2 msgs should be seen by oslog system");
+ T_EXPECT_GE_UINT((counter_load(&oslog_p_total_msgcount) - total_msg), 2, "atleast 2 msgs should be seen by oslog system");
a = mach_absolute_time();
total_seqno = 1;
seqno = 1;
- total_msg = oslog_p_total_msgcount;
- saved_msg = oslog_p_saved_msgcount;
- dropped_msg = oslog_p_dropped_msgcount;
+ total_msg = counter_load(&oslog_p_total_msgcount);
+ saved_msg = counter_load(&oslog_p_saved_msgcount);
+ dropped_msg = counter_load(&oslog_p_dropped_msgcount);
datalen = scnprintf(databuffer, sizeof(databuffer), TESTOSLOGFMT("oslog_info"), uniqid, seqno, total_seqno);
checksum = crc32(0, databuffer, datalen);
os_log_info(log_handle, TESTOSLOG("oslog_info") "mat%llu", checksum, uniqid, seqno, total_seqno, a);
- T_EXPECT_GE_UINT((oslog_p_total_msgcount - total_msg), 1, "total message count in buffer");
+ T_EXPECT_GE_UINT((counter_load(&oslog_p_total_msgcount) - total_msg), 1, "total message count in buffer");
datalen = scnprintf(databuffer, sizeof(databuffer), "kernel^0^test^oslog_info#mat%llu", a);
match_count = find_pattern_in_buffer(databuffer, datalen, total_seqno);
T_EXPECT_EQ_ULONG(match_count, total_seqno, "verify oslog_info does not go to systemlog buffer");
- total_msg = oslog_p_total_msgcount;
+ total_msg = counter_load(&oslog_p_total_msgcount);
test_oslog_info_helper(uniqid, 10);
- T_EXPECT_GE_UINT(oslog_p_total_msgcount - total_msg, 10, "test_oslog_info_helper: Should have seen 10 msgs");
+ T_EXPECT_GE_UINT(counter_load(&oslog_p_total_msgcount) - total_msg, 10, "test_oslog_info_helper: Should have seen 10 msgs");
- total_msg = oslog_p_total_msgcount;
+ total_msg = counter_load(&oslog_p_total_msgcount);
test_oslog_debug_helper(uniqid, 10);
- T_EXPECT_GE_UINT(oslog_p_total_msgcount - total_msg, 10, "test_oslog_debug_helper:Should have seen 10 msgs");
+ T_EXPECT_GE_UINT(counter_load(&oslog_p_total_msgcount) - total_msg, 10, "test_oslog_debug_helper:Should have seen 10 msgs");
- total_msg = oslog_p_total_msgcount;
+ total_msg = counter_load(&oslog_p_total_msgcount);
test_oslog_error_helper(uniqid, 10);
- T_EXPECT_GE_UINT(oslog_p_total_msgcount - total_msg, 10, "test_oslog_error_helper:Should have seen 10 msgs");
+ T_EXPECT_GE_UINT(counter_load(&oslog_p_total_msgcount) - total_msg, 10, "test_oslog_error_helper:Should have seen 10 msgs");
- total_msg = oslog_p_total_msgcount;
+ total_msg = counter_load(&oslog_p_total_msgcount);
test_oslog_default_helper(uniqid, 10);
- T_EXPECT_GE_UINT(oslog_p_total_msgcount - total_msg, 10, "test_oslog_default_helper:Should have seen 10 msgs");
+ T_EXPECT_GE_UINT(counter_load(&oslog_p_total_msgcount) - total_msg, 10, "test_oslog_default_helper:Should have seen 10 msgs");
- total_msg = oslog_p_total_msgcount;
+ total_msg = counter_load(&oslog_p_total_msgcount);
test_oslog_fault_helper(uniqid, 10);
- T_EXPECT_GE_UINT(oslog_p_total_msgcount - total_msg, 10, "test_oslog_fault_helper:Should have seen 10 msgs");
+ T_EXPECT_GE_UINT(counter_load(&oslog_p_total_msgcount) - total_msg, 10, "test_oslog_fault_helper:Should have seen 10 msgs");
- T_LOG("oslog internal counters total %u , saved %u, dropped %u", oslog_p_total_msgcount, oslog_p_saved_msgcount,
- oslog_p_dropped_msgcount);
+ T_LOG("oslog internal counters total %u , saved %u, dropped %u", counter_load(&oslog_p_total_msgcount), counter_load(&oslog_p_saved_msgcount),
+ counter_load(&oslog_p_dropped_msgcount));
return KERN_SUCCESS;
}
kern_return_t kr;
uint32_t uniqid = RandomULong();
- printf("oslog internal counters total %u , saved %u, dropped %u", oslog_p_total_msgcount, oslog_p_saved_msgcount,
- oslog_p_dropped_msgcount);
+ printf("oslog internal counters total %lld , saved %lld, dropped %lld", counter_load(&oslog_p_total_msgcount), counter_load(&oslog_p_saved_msgcount),
+ counter_load(&oslog_p_dropped_msgcount));
kr = kernel_thread_start(_test_log_loop, NULL, &thread[0]);
T_ASSERT_EQ_INT(kr, KERN_SUCCESS, "kernel_thread_start returned successfully");
thread_deallocate(thread[0]);
thread_deallocate(thread[1]);
- T_LOG("oslog internal counters total %u , saved %u, dropped %u", oslog_p_total_msgcount, oslog_p_saved_msgcount,
- oslog_p_dropped_msgcount);
+ T_LOG("oslog internal counters total %lld , saved %lld, dropped %lld", counter_load(&oslog_p_total_msgcount), counter_load(&oslog_p_saved_msgcount),
+ counter_load(&oslog_p_dropped_msgcount));
T_PASS("parallel_logging tests is now complete");
return KERN_SUCCESS;
case 1:
{
/* send out counters */
- out[1] = oslog_p_total_msgcount;
- out[2] = oslog_p_saved_msgcount;
- out[3] = oslog_p_dropped_msgcount;
+ out[1] = counter_load(&oslog_p_total_msgcount);
+ out[2] = counter_load(&oslog_p_saved_msgcount);
+ out[3] = counter_load(&oslog_p_dropped_msgcount);
out[0] = KERN_SUCCESS;
break;
}
test_stresslog_dropmsg(uint32_t uniqid)
{
uint32_t total, saved, dropped;
- total = oslog_p_total_msgcount;
- saved = oslog_p_saved_msgcount;
- dropped = oslog_p_dropped_msgcount;
+ total = counter_load(&oslog_p_total_msgcount);
+ saved = counter_load(&oslog_p_saved_msgcount);
+ dropped = counter_load(&oslog_p_dropped_msgcount);
uniqid = RandomULong();
test_oslog_debug_helper(uniqid, 100);
- while ((oslog_p_dropped_msgcount - dropped) == 0) {
+ while ((counter_load(&oslog_p_dropped_msgcount) - dropped) == 0) {
test_oslog_debug_helper(uniqid, 100);
}
- printf("test_stresslog_dropmsg: logged %u msgs, saved %u and caused a drop of %u msgs. \n", oslog_p_total_msgcount - total,
- oslog_p_saved_msgcount - saved, oslog_p_dropped_msgcount - dropped);
+ printf("test_stresslog_dropmsg: logged %lld msgs, saved %lld and caused a drop of %lld msgs. \n", counter_load(&oslog_p_total_msgcount) - total,
+ counter_load(&oslog_p_saved_msgcount) - saved, counter_load(&oslog_p_dropped_msgcount) - dropped);
return KERN_SUCCESS;
}
--- /dev/null
+/*
+ * Copyright (c) 2015-2020 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include <stdbool.h>
+#include <firehose/tracepoint_private.h>
+#include <kern/assert.h>
+#include <kern/counter.h>
+#include <kern/locks.h>
+#include <pexpert/pexpert.h>
+#include <sys/param.h>
+
+#if __has_feature(ptrauth_calls)
+#include <mach/vm_param.h>
+#include <ptrauth.h>
+#endif /* __has_feature(ptrauth_calls) */
+
+#include "log_encode.h"
+#include "log_mem.h"
+
+#define isdigit(ch) (((ch) >= '0') && ((ch) <= '9'))
+#define log_context_cursor(ctx) &(ctx)->ctx_hdr->hdr_data[(ctx)->ctx_content_off]
+
+extern boolean_t doprnt_hide_pointers;
+
+SCALABLE_COUNTER_DEFINE(oslog_p_fmt_invalid_msgcount);
+SCALABLE_COUNTER_DEFINE(oslog_p_fmt_max_args_msgcount);
+SCALABLE_COUNTER_DEFINE(oslog_p_truncated_msgcount);
+
+static bool
+is_kernel_pointer(void *arg, size_t arg_len)
+{
+ if (arg_len < sizeof(void *)) {
+ return false;
+ }
+
+ unsigned long long value = 0;
+ assert(arg_len <= sizeof(value));
+ (void) memcpy(&value, arg, arg_len);
+
+#if __has_feature(ptrauth_calls)
+ /**
+ * Strip out the pointer authentication code before
+ * checking whether the pointer is a kernel address.
+ */
+ value = (unsigned long long)VM_KERNEL_STRIP_PTR(value);
+#endif /* __has_feature(ptrauth_calls) */
+
+ return value >= VM_MIN_KERNEL_AND_KEXT_ADDRESS && value <= VM_MAX_KERNEL_ADDRESS;
+}
+
+static void
+log_context_cursor_advance(os_log_context_t ctx, size_t amount)
+{
+ ctx->ctx_content_off += amount;
+ assert(log_context_cursor(ctx) <= (ctx->ctx_buffer + ctx->ctx_buffer_sz));
+}
+
+static bool
+log_fits(os_log_context_t ctx, size_t data_size)
+{
+ return (ctx->ctx_content_off + data_size) <= ctx->ctx_content_sz;
+}
+
+static bool
+log_fits_cmd(os_log_context_t ctx, size_t data_size)
+{
+ return log_fits(ctx, sizeof(*ctx->ctx_hdr) + data_size);
+}
+
+static void
+log_range_update(os_log_fmt_range_t range, uint16_t offset, uint16_t length)
+{
+ range->offset = offset;
+ /*
+ * Truncated flag may have already been set earlier, hence do not
+ * overwrite it blindly.
+ */
+ if (length < range->length) {
+ range->truncated = true;
+ }
+ range->length = length;
+}
+
+/*
+ * Stores a command in the main section. The value itself is wrapped in
+ * the os_log_fmt_cmd_t struct.
+ */
+static void
+log_add_cmd(os_log_context_t ctx, os_log_fmt_cmd_type_t type, uint8_t flags,
+ void *arg, size_t arg_size)
+{
+ os_log_fmt_cmd_t cmd;
+ const size_t cmd_sz = sizeof(*cmd) + arg_size;
+
+ assert(log_fits_cmd(ctx, cmd_sz));
+ assert(arg_size <= UINT8_MAX);
+
+ cmd = (os_log_fmt_cmd_t)log_context_cursor(ctx);
+ cmd->cmd_type = type;
+ cmd->cmd_flags = flags;
+ cmd->cmd_size = (uint8_t)arg_size;
+ (void) memcpy(cmd->cmd_data, arg, cmd->cmd_size);
+
+ assert(cmd_sz == sizeof(*cmd) + cmd->cmd_size);
+ log_context_cursor_advance(ctx, cmd_sz);
+}
+
+/*
+ * Collect details about argument which needs to be stored in the pubdata
+ * section.
+ */
+static void
+log_collect_public_range_data(os_log_context_t ctx, os_log_fmt_range_t range, void *arg)
+{
+ ctx->ctx_pubdata[ctx->ctx_pubdata_cnt++] = (char *)arg;
+ ctx->ctx_pubdata_sz += range->length;
+}
+
+static void
+log_add_range_data(os_log_context_t ctx, os_log_fmt_range_t range, void *arg)
+{
+ assert(log_fits(ctx, range->length));
+ (void) memcpy(log_context_cursor(ctx), arg, range->length);
+ log_context_cursor_advance(ctx, range->length);
+}
+
+static struct os_log_fmt_range_s
+log_create_range(os_log_context_t ctx, size_t arg_len)
+{
+ const size_t final_arg_len = MIN(arg_len, UINT16_MAX);
+
+ return (struct os_log_fmt_range_s) {
+ .offset = ctx->ctx_pubdata_sz,
+ .length = (uint16_t)final_arg_len,
+ .truncated = (final_arg_len < arg_len)
+ };
+}
+
+static int
+log_add_range_arg(os_log_context_t ctx, os_log_fmt_cmd_type_t type, os_log_fmt_cmd_flags_t flags,
+ void *arg, size_t arg_len)
+{
+ struct os_log_fmt_range_s range;
+
+ if (!log_fits_cmd(ctx, sizeof(range))) {
+ return ENOMEM;
+ }
+
+ range = log_create_range(ctx, arg_len);
+
+ if (flags == OSLF_CMD_FLAG_PUBLIC) {
+ if (ctx->ctx_pubdata_cnt == OS_LOG_MAX_PUB_ARGS) {
+ return ENOMEM;
+ }
+ assert(ctx->ctx_pubdata_cnt < OS_LOG_MAX_PUB_ARGS);
+ log_collect_public_range_data(ctx, &range, arg);
+ }
+ log_add_cmd(ctx, type, flags, &range, sizeof(range));
+ ctx->ctx_hdr->hdr_cmd_cnt++;
+
+ return 0;
+}
+
+/*
+ * Adds a scalar argument value to the main section.
+ */
+static int
+log_add_arg(os_log_context_t ctx, os_log_fmt_cmd_type_t type, void *arg, size_t arg_len)
+{
+ assert(type == OSLF_CMD_TYPE_COUNT || type == OSLF_CMD_TYPE_SCALAR);
+ assert(arg_len < UINT16_MAX);
+
+ if (log_fits_cmd(ctx, arg_len)) {
+ log_add_cmd(ctx, type, OSLF_CMD_FLAG_PUBLIC, arg, arg_len);
+ ctx->ctx_hdr->hdr_cmd_cnt++;
+ return 0;
+ }
+
+ return ENOMEM;
+}
+
+static void
+log_encode_public_data(os_log_context_t ctx)
+{
+ const uint16_t orig_content_off = ctx->ctx_content_off;
+ os_log_fmt_hdr_t const hdr = ctx->ctx_hdr;
+ os_log_fmt_cmd_t cmd = (os_log_fmt_cmd_t)hdr->hdr_data;
+
+ assert(ctx->ctx_pubdata_cnt <= hdr->hdr_cmd_cnt);
+
+ for (int i = 0, pub_i = 0; i < hdr->hdr_cmd_cnt; i++, cmd = (os_log_fmt_cmd_t)(cmd->cmd_data + cmd->cmd_size)) {
+ if (cmd->cmd_type != OSLF_CMD_TYPE_STRING) {
+ continue;
+ }
+
+ os_log_fmt_range_t const range __attribute__((aligned(8))) = (os_log_fmt_range_t)&cmd->cmd_data;
+
+ // Fix offset and length of the argument data in the hdr.
+ log_range_update(range, ctx->ctx_content_off - orig_content_off,
+ MIN(range->length, ctx->ctx_content_sz - ctx->ctx_content_off));
+
+ if (range->truncated) {
+ ctx->ctx_truncated = true;
+ }
+
+ assert(pub_i < ctx->ctx_pubdata_cnt);
+ log_add_range_data(ctx, range, ctx->ctx_pubdata[pub_i++]);
+ }
+}
+
+static bool
+log_expand(os_log_context_t ctx, size_t new_size)
+{
+ assert(new_size > ctx->ctx_buffer_sz);
+
+ if (!oslog_is_safe()) {
+ return false;
+ }
+
+ size_t final_size = new_size;
+
+ void *buf = logmem_alloc(ctx->ctx_logmem, &final_size);
+ if (!buf) {
+ return false;
+ }
+ assert(final_size >= new_size);
+
+ // address length header + already stored data
+ const size_t hdr_size = (uint8_t *)ctx->ctx_hdr - ctx->ctx_buffer;
+ const size_t copy_size = hdr_size + sizeof(*ctx->ctx_hdr) + ctx->ctx_content_sz;
+ assert(copy_size <= new_size);
+ (void) memcpy(buf, ctx->ctx_buffer, copy_size);
+
+ if (ctx->ctx_allocated) {
+ logmem_free(ctx->ctx_logmem, ctx->ctx_buffer, ctx->ctx_buffer_sz);
+ }
+
+ ctx->ctx_buffer = buf;
+ ctx->ctx_buffer_sz = final_size;
+ ctx->ctx_content_sz = (uint16_t)(ctx->ctx_buffer_sz - hdr_size - sizeof(*ctx->ctx_hdr));
+ ctx->ctx_hdr = (os_log_fmt_hdr_t)&ctx->ctx_buffer[hdr_size];
+ ctx->ctx_allocated = true;
+
+ return true;
+}
+
+static int
+log_encode_fmt_arg(void *arg, size_t arg_len, os_log_fmt_cmd_type_t type, os_log_context_t ctx)
+{
+ int rc = 0;
+
+ switch (type) {
+ case OSLF_CMD_TYPE_COUNT:
+ case OSLF_CMD_TYPE_SCALAR:
+ // Scrub kernel pointers.
+ if (doprnt_hide_pointers && is_kernel_pointer(arg, arg_len)) {
+ rc = log_add_range_arg(ctx, type, OSLF_CMD_FLAG_PRIVATE, NULL, 0);
+ ctx->ctx_hdr->hdr_flags |= OSLF_HDR_FLAG_HAS_PRIVATE;
+ } else {
+ rc = log_add_arg(ctx, type, arg, arg_len);
+ }
+ break;
+ case OSLF_CMD_TYPE_STRING:
+ rc = log_add_range_arg(ctx, type, OSLF_CMD_FLAG_PUBLIC, arg, arg_len);
+ ctx->ctx_hdr->hdr_flags |= OSLF_HDR_FLAG_HAS_NON_SCALAR;
+ break;
+ default:
+ panic("Unsupported log value type");
+ }
+
+ return rc;
+}
+
+static int
+log_encode_fmt(os_log_context_t ctx, const char *format, va_list args)
+{
+ const char *percent = strchr(format, '%');
+
+ while (percent != NULL) {
+ ++percent;
+
+ if (percent[0] == '%') {
+ percent = strchr(percent + 1, '%'); // Find next format after %%
+ continue;
+ }
+
+ struct os_log_format_value_s value;
+ int type = OST_INT;
+ int prec = 0;
+ char ch;
+
+ for (bool done = false; !done; percent++) {
+ int err = 0;
+
+ switch (ch = percent[0]) {
+ /* type of types or other */
+ case 'l': // longer
+ type++;
+ break;
+
+ case 'h': // shorter
+ type--;
+ break;
+
+ case 'z':
+ type = OST_SIZE;
+ break;
+
+ case 'j':
+ type = OST_INTMAX;
+ break;
+
+ case 't':
+ type = OST_PTRDIFF;
+ break;
+
+ case 'q':
+ type = OST_LONGLONG;
+ break;
+
+ case '.': // precision
+ if ((percent[1]) == '*') {
+ prec = va_arg(args, int);
+ err = log_encode_fmt_arg(&prec, sizeof(prec), OSLF_CMD_TYPE_COUNT, ctx);
+ if (slowpath(err)) {
+ return err;
+ }
+ percent++;
+ continue;
+ } else {
+ // we have to read the precision and do the right thing
+ const char *fmt = percent + 1;
+ prec = 0;
+ while (isdigit(ch = *fmt++)) {
+ prec = 10 * prec + (ch - '0');
+ }
+
+ if (prec > 1024) {
+ prec = 1024;
+ }
+
+ err = log_encode_fmt_arg(&prec, sizeof(prec), OSLF_CMD_TYPE_COUNT, ctx);
+ }
+ break;
+
+ case '-': // left-align
+ case '+': // force sign
+ case ' ': // prefix non-negative with space
+ case '#': // alternate
+ case '\'': // group by thousands
+ break;
+
+ /* fixed types */
+ case 'd': // integer
+ case 'i': // integer
+ case 'o': // octal
+ case 'u': // unsigned
+ case 'x': // hex
+ case 'X': // upper-hex
+ switch (type) {
+ case OST_CHAR:
+ value.type.ch = (char) va_arg(args, int);
+ err = log_encode_fmt_arg(&value.type.ch, sizeof(value.type.ch), OSLF_CMD_TYPE_SCALAR, ctx);
+ break;
+
+ case OST_SHORT:
+ value.type.s = (short) va_arg(args, int);
+ err = log_encode_fmt_arg(&value.type.s, sizeof(value.type.s), OSLF_CMD_TYPE_SCALAR, ctx);
+ break;
+
+ case OST_INT:
+ value.type.i = va_arg(args, int);
+ err = log_encode_fmt_arg(&value.type.i, sizeof(value.type.i), OSLF_CMD_TYPE_SCALAR, ctx);
+ break;
+
+ case OST_LONG:
+ value.type.l = va_arg(args, long);
+ err = log_encode_fmt_arg(&value.type.l, sizeof(value.type.l), OSLF_CMD_TYPE_SCALAR, ctx);
+ break;
+
+ case OST_LONGLONG:
+ value.type.ll = va_arg(args, long long);
+ err = log_encode_fmt_arg(&value.type.ll, sizeof(value.type.ll), OSLF_CMD_TYPE_SCALAR, ctx);
+ break;
+
+ case OST_SIZE:
+ value.type.z = va_arg(args, size_t);
+ err = log_encode_fmt_arg(&value.type.z, sizeof(value.type.z), OSLF_CMD_TYPE_SCALAR, ctx);
+ break;
+
+ case OST_INTMAX:
+ value.type.im = va_arg(args, intmax_t);
+ err = log_encode_fmt_arg(&value.type.im, sizeof(value.type.im), OSLF_CMD_TYPE_SCALAR, ctx);
+ break;
+
+ case OST_PTRDIFF:
+ value.type.pd = va_arg(args, ptrdiff_t);
+ err = log_encode_fmt_arg(&value.type.pd, sizeof(value.type.pd), OSLF_CMD_TYPE_SCALAR, ctx);
+ break;
+
+ default:
+ return EINVAL;
+ }
+ done = true;
+ break;
+
+ case 'p': // pointer
+ value.type.p = va_arg(args, void *);
+ err = log_encode_fmt_arg(&value.type.p, sizeof(value.type.p), OSLF_CMD_TYPE_SCALAR, ctx);
+ done = true;
+ break;
+
+ case 'c': // char
+ value.type.ch = (char) va_arg(args, int);
+ err = log_encode_fmt_arg(&value.type.ch, sizeof(value.type.ch), OSLF_CMD_TYPE_SCALAR, ctx);
+ done = true;
+ break;
+
+ case 's': // string
+ value.type.pch = va_arg(args, char *);
+ if (prec == 0 && value.type.pch) {
+ prec = (int) strlen(value.type.pch) + 1;
+ }
+ err = log_encode_fmt_arg(value.type.pch, prec, OSLF_CMD_TYPE_STRING, ctx);
+ prec = 0;
+ done = true;
+ break;
+
+ case 'm':
+ value.type.i = 0; // Does %m make sense in the kernel?
+ err = log_encode_fmt_arg(&value.type.i, sizeof(value.type.i), OSLF_CMD_TYPE_SCALAR, ctx);
+ done = true;
+ break;
+
+ default:
+ if (isdigit(ch)) { // [0-9]
+ continue;
+ }
+ return EINVAL;
+ }
+
+ if (slowpath(err)) {
+ return err;
+ }
+
+ if (done) {
+ percent = strchr(percent, '%'); // Find next format
+ break;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static inline size_t
+write_address_location(uint8_t buf[static sizeof(uint64_t)],
+ void *dso, const void *address, firehose_tracepoint_flags_t *flags, bool driverKit)
+{
+ uintptr_t shift_addr = (uintptr_t)address - (uintptr_t)dso;
+
+ kc_format_t kcformat = KCFormatUnknown;
+ __assert_only bool result = PE_get_primary_kc_format(&kcformat);
+ assert(result);
+
+ if (kcformat == KCFormatStatic || kcformat == KCFormatKCGEN) {
+ *flags = _firehose_tracepoint_flags_pc_style_shared_cache;
+ memcpy(buf, (uint32_t[]){ (uint32_t)shift_addr }, sizeof(uint32_t));
+ return sizeof(uint32_t);
+ }
+
+ /*
+ * driverKit will have the dso set as MH_EXECUTE (it is logging from a
+ * syscall in the kernel) but needs logd to parse the address as an
+ * absolute pc.
+ */
+ kernel_mach_header_t *mh = dso;
+ if (mh->filetype == MH_EXECUTE && !driverKit) {
+ *flags = _firehose_tracepoint_flags_pc_style_main_exe;
+ memcpy(buf, (uint32_t[]){ (uint32_t)shift_addr }, sizeof(uint32_t));
+ return sizeof(uint32_t);
+ }
+
+ *flags = _firehose_tracepoint_flags_pc_style_absolute;
+ shift_addr = driverKit ? (uintptr_t)address : VM_KERNEL_UNSLIDE(address);
+ size_t len = sizeof(uintptr_t);
+
+#if __LP64__
+ len = 6; // 48 bits are enough
+#endif
+ memcpy(buf, (uintptr_t[]){ shift_addr }, len);
+
+ return len;
+}
+
+static void
+os_log_encode_location(os_log_context_t ctx, void *addr, void *dso, bool driverKit,
+ firehose_tracepoint_flags_t *ft_flags)
+{
+ const size_t hdr_size = write_address_location(ctx->ctx_buffer, dso, addr, ft_flags, driverKit);
+ ctx->ctx_hdr = (os_log_fmt_hdr_t)&ctx->ctx_buffer[hdr_size];
+ ctx->ctx_content_sz = (uint16_t)(ctx->ctx_buffer_sz - hdr_size - sizeof(*ctx->ctx_hdr));
+}
+
+/*
+ * Encodes argument (meta)data into a format consumed by libtrace. Stores
+ * metadada for all arguments first. Metadata also include scalar argument
+ * values. Second step saves data which are encoded separately from respective
+ * metadata (like strings).
+ */
+bool
+os_log_context_encode(os_log_context_t ctx, const char *fmt, va_list args, void *addr, void *dso, bool driverKit)
+{
+ os_log_encode_location(ctx, addr, dso, driverKit, &ctx->ctx_ft_flags);
+
+ va_list args_copy;
+ va_copy(args_copy, args);
+
+ int rc = log_encode_fmt(ctx, fmt, args);
+
+ va_end(args_copy);
+
+ switch (rc) {
+ case EINVAL:
+ // Bogus/Unsupported fmt string
+ counter_inc(&oslog_p_fmt_invalid_msgcount);
+ return false;
+ case ENOMEM:
+ /*
+ * The fmt contains unreasonable number of arguments (> 32) and
+ * we ran out of space. We could call log_expand()
+ * here and retry. However, using such formatting strings rather
+ * seem like a misuse of the logging system, hence error.
+ */
+ counter_inc(&oslog_p_fmt_max_args_msgcount);
+ return false;
+ case 0:
+ break;
+ default:
+ panic("unhandled return value");
+ }
+
+ if (ctx->ctx_pubdata_sz == 0) {
+ goto finish;
+ }
+
+ if (!log_fits(ctx, ctx->ctx_pubdata_sz)) {
+ size_t space_needed = log_context_cursor(ctx) + ctx->ctx_pubdata_sz - ctx->ctx_buffer;
+ space_needed = MIN(space_needed, logmem_max_size(ctx->ctx_logmem));
+ (void) log_expand(ctx, space_needed);
+ }
+
+ log_encode_public_data(ctx);
+
+ if (ctx->ctx_truncated) {
+ counter_inc(&oslog_p_truncated_msgcount);
+ }
+finish:
+ ctx->ctx_content_sz = (uint16_t)(log_context_cursor(ctx) - ctx->ctx_buffer);
+ ctx->ctx_content_off = 0;
+ return true;
+}
+
+void
+os_log_context_init(os_log_context_t ctx, logmem_t *logmem, uint8_t *buffer, size_t buffer_sz)
+{
+ assert(logmem);
+ assert(buffer);
+ assert(buffer_sz > 0);
+
+ bzero(ctx, sizeof(*ctx));
+ ctx->ctx_logmem = logmem;
+ ctx->ctx_buffer = buffer;
+ ctx->ctx_buffer_sz = buffer_sz;
+}
+
+void
+os_log_context_free(os_log_context_t ctx)
+{
+ if (ctx->ctx_allocated) {
+ logmem_free(ctx->ctx_logmem, ctx->ctx_buffer, ctx->ctx_buffer_sz);
+ }
+}
/*
- * Copyright (c) 2015-2016 Apple Inc. All rights reserved.
+ * Copyright (c) 2015-2020 Apple Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
#define log_encode_h
#include "log_encode_types.h"
-#include <sys/param.h>
-#if __has_feature(ptrauth_calls)
-#include <mach/vm_param.h>
-#include <ptrauth.h>
-#endif /* __has_feature(ptrauth_calls) */
-
-#ifdef KERNEL
-#define isdigit(ch) (((ch) >= '0') && ((ch) <= '9'))
-extern boolean_t doprnt_hide_pointers;
-#endif
-
-static bool
-_encode_data(os_log_buffer_value_t content, const void *arg, size_t arg_len, os_log_buffer_context_t context)
-{
- struct os_log_arginfo_s arginfo;
- void *databuf;
-
- arg_len = MIN(arg_len, UINT16_MAX);
-
- if (content->flags & OS_LOG_CONTENT_FLAG_PRIVATE) {
- databuf = context->privdata + context->privdata_off;
- arginfo.length = MIN((uint16_t)arg_len, (context->privdata_sz - context->privdata_off));
- arginfo.offset = context->privdata_off;
- } else {
- databuf = context->pubdata + context->pubdata_off;
- arginfo.length = MIN((uint16_t)arg_len, (context->pubdata_sz - context->pubdata_off));
- arginfo.offset = context->pubdata_off;
- }
-
- if (context->arg_content_sz > 0) {
- arginfo.length = MIN((uint16_t)context->arg_content_sz, arginfo.length);
- }
-
- memcpy(content->value, &arginfo, sizeof(arginfo));
- content->size = sizeof(arginfo);
-
- if (arginfo.length) {
- if (content->type == OS_LOG_BUFFER_VALUE_TYPE_STRING
-#ifndef KERNEL
- || content->type == OS_LOG_BUFFER_VALUE_TYPE_OBJECT
-#endif
- ) {
- strlcpy(databuf, arg, arginfo.length);
- } else {
- memcpy(databuf, arg, arginfo.length);
- }
- }
-
- if (content->flags & OS_LOG_CONTENT_FLAG_PRIVATE) {
- context->privdata_off += arginfo.length;
- } else {
- context->pubdata_off += arginfo.length;
- }
-
- context->content_off += sizeof(*content) + content->size;
- context->arg_content_sz = 0;
-
- return true;
-}
-
-#ifndef KERNEL
-static void
-_os_log_parse_annotated(char *annotated, const char **visibility, const char **library, const char **type)
-{
- char *values[3] = { NULL };
- int cnt = 0;
- int idx = 0;
-
- for (; cnt < 3;) {
- char *token = strsep(&annotated, ", {}");
- if (token == NULL) {
- break;
- }
-
- if (*token == '\0') {
- continue;
- }
-
- values[cnt++] = token;
- }
-
- if ((cnt > 0) && (!strcmp(values[0], "public") || !strcmp(values[0], "private"))) {
- if (visibility != NULL) {
- (*visibility) = values[0];
- }
-
- idx++;
- }
-
- if (idx < cnt && (library != NULL) && (type != NULL)) {
- char *decoder = values[idx];
-
- for (cnt = 0; cnt < 3;) {
- char *token = strsep(&decoder, ": {}");
- if (token == NULL) {
- break;
- }
-
- if (*token == '\0') {
- continue;
- }
-
- values[cnt++] = token;
- }
-
- if (cnt == 2) {
- (*library) = values[0];
- (*type) = values[1];
- }
-
- if (cnt == 1) {
- (*library) = "builtin";
- (*type) = values[0];
- }
- }
-}
-#endif /* !KERNEL */
-
-OS_ALWAYS_INLINE
-static inline bool
-_os_log_encode_arg(void *arg, size_t arg_len, os_log_value_type_t ctype, bool is_private, os_log_buffer_context_t context)
-{
- os_log_buffer_value_t content = (os_log_buffer_value_t) &context->buffer->content[context->content_off];
- size_t content_sz = sizeof(*content) + arg_len;
- char tempString[OS_LOG_BUFFER_MAX_SIZE] = {};
-#ifndef KERNEL
- bool obj_private = true;
-#endif
-
-#ifdef KERNEL
- /* scrub kernel pointers */
- if (doprnt_hide_pointers &&
- ctype == OS_LOG_BUFFER_VALUE_TYPE_SCALAR &&
- arg_len >= sizeof(void *)) {
- unsigned long long value = 0;
- memcpy(&value, arg, arg_len);
-
-#if __has_feature(ptrauth_calls)
- /**
- * Strip out the pointer authentication code before
- * checking whether the pointer is a kernel address.
- */
- value = (unsigned long long)VM_KERNEL_STRIP_PTR(value);
-#endif /* __has_feature(ptrauth_calls) */
-
- if (value >= VM_MIN_KERNEL_AND_KEXT_ADDRESS && value <= VM_MAX_KERNEL_ADDRESS) {
- is_private = true;
- bzero(arg, arg_len);
- }
- }
-#endif
-
- content->type = ctype;
- content->flags = (is_private ? OS_LOG_CONTENT_FLAG_PRIVATE : 0);
-
-#ifndef KERNEL
- if (context->annotated != NULL) {
- const char *visibility = NULL;
-
- _os_log_parse_annotated(context->annotated, &visibility, NULL, NULL);
- if (visibility) {
- if (!strcasecmp(visibility, "private")) {
- content->flags |= OS_LOG_CONTENT_FLAG_PRIVATE;
- } else if (!strcasecmp(visibility, "public")) {
- content->flags &= ~OS_LOG_CONTENT_FLAG_PRIVATE;
- }
- }
-
- context->annotated = NULL;
- }
-#endif /* !KERNEL */
-
- switch (ctype) {
- case OS_LOG_BUFFER_VALUE_TYPE_COUNT:
- case OS_LOG_BUFFER_VALUE_TYPE_SCALAR:
- if (is_private) {
- _encode_data(content, tempString, strlen(tempString) + 1, context);
- } else {
- if ((context->content_off + content_sz) > context->content_sz) {
- return false;
- }
-
- memcpy(content->value, arg, arg_len);
- content->size = (uint8_t)arg_len;
- context->content_off += content_sz;
- }
- break;
-
- case OS_LOG_BUFFER_VALUE_TYPE_STRING:
- context->buffer->flags |= OS_LOG_BUFFER_HAS_NON_SCALAR;
- if (_os_log_string_is_public(arg)) {
- content->flags &= ~OS_LOG_CONTENT_FLAG_PRIVATE;
- }
-
- _encode_data(content, arg, arg_len, context);
- break;
-
-#ifndef KERNEL
- case OS_LOG_BUFFER_VALUE_TYPE_POINTER:
- context->buffer->flags |= OS_LOG_BUFFER_HAS_NON_SCALAR;
- _encode_data(content, arg, arg_len, context);
- break;
-
- case OS_LOG_BUFFER_VALUE_TYPE_OBJECT:
- context->buffer->flags |= OS_LOG_BUFFER_HAS_NON_SCALAR;
- if (!_NSCF2data(arg, tempString, sizeof(tempString), &obj_private)) {
- tempString[0] = '\0';
- }
-
- if (!obj_private) {
- content->flags &= ~OS_LOG_CONTENT_FLAG_PRIVATE;
- }
-
- _encode_data(content, tempString, strlen(tempString) + 1, context);
- break;
-#endif /* !KERNEL */
- }
-
- if (content->flags & OS_LOG_CONTENT_FLAG_PRIVATE) {
- context->buffer->flags |= OS_LOG_BUFFER_HAS_PRIVATE;
- }
-
- context->arg_idx++;
-
- return true;
-}
-
-static bool
-_os_log_encode(const char *format, va_list args, int saved_errno, os_log_buffer_context_t context)
-{
- const char *percent = strchr(format, '%');
-#ifndef KERNEL
- char annotated[256];
-#endif
-
- while (percent != NULL) {
- ++percent;
- if (percent[0] != '%') {
- struct os_log_format_value_s value;
- int type = OST_INT;
-#ifndef KERNEL
- bool long_double = false;
-#endif
- int prec = 0;
- char ch;
-
- for (bool done = false; !done; percent++) {
- switch (ch = percent[0]) {
- /* type of types or other */
- case 'l': // longer
- type++;
- break;
-
- case 'h': // shorter
- type--;
- break;
-
- case 'z':
- type = OST_SIZE;
- break;
-
- case 'j':
- type = OST_INTMAX;
- break;
-
- case 't':
- type = OST_PTRDIFF;
- break;
-
- case '.': // precision
- if ((percent[1]) == '*') {
- prec = va_arg(args, int);
- _os_log_encode_arg(&prec, sizeof(prec), OS_LOG_BUFFER_VALUE_TYPE_COUNT, false, context);
- percent++;
- continue;
- } else {
- // we have to read the precision and do the right thing
- const char *fmt = percent + 1;
- prec = 0;
- while (isdigit(ch = *fmt++)) {
- prec = 10 * prec + (ch - '0');
- }
-
- if (prec > 1024) {
- prec = 1024;
- }
-
- _os_log_encode_arg(&prec, sizeof(prec), OS_LOG_BUFFER_VALUE_TYPE_COUNT, false, context);
- }
- break;
-
- case '-': // left-align
- case '+': // force sign
- case ' ': // prefix non-negative with space
- case '#': // alternate
- case '\'': // group by thousands
- break;
-
- /* fixed types */
- case 'd': // integer
- case 'i': // integer
- case 'o': // octal
- case 'u': // unsigned
- case 'x': // hex
- case 'X': // upper-hex
- switch (type) {
- case OST_CHAR:
- value.type.ch = (char) va_arg(args, int);
- _os_log_encode_arg(&value.type.ch, sizeof(value.type.ch), OS_LOG_BUFFER_VALUE_TYPE_SCALAR, false, context);
- break;
-
- case OST_SHORT:
- value.type.s = (short) va_arg(args, int);
- _os_log_encode_arg(&value.type.s, sizeof(value.type.s), OS_LOG_BUFFER_VALUE_TYPE_SCALAR, false, context);
- break;
-
- case OST_INT:
- value.type.i = va_arg(args, int);
- _os_log_encode_arg(&value.type.i, sizeof(value.type.i), OS_LOG_BUFFER_VALUE_TYPE_SCALAR, false, context);
- break;
-
- case OST_LONG:
- value.type.l = va_arg(args, long);
- _os_log_encode_arg(&value.type.l, sizeof(value.type.l), OS_LOG_BUFFER_VALUE_TYPE_SCALAR, false, context);
- break;
-
- case OST_LONGLONG:
- value.type.ll = va_arg(args, long long);
- _os_log_encode_arg(&value.type.ll, sizeof(value.type.ll), OS_LOG_BUFFER_VALUE_TYPE_SCALAR, false, context);
- break;
-
- case OST_SIZE:
- value.type.z = va_arg(args, size_t);
- _os_log_encode_arg(&value.type.z, sizeof(value.type.z), OS_LOG_BUFFER_VALUE_TYPE_SCALAR, false, context);
- break;
-
- case OST_INTMAX:
- value.type.im = va_arg(args, intmax_t);
- _os_log_encode_arg(&value.type.im, sizeof(value.type.im), OS_LOG_BUFFER_VALUE_TYPE_SCALAR, false, context);
- break;
-
- case OST_PTRDIFF:
- value.type.pd = va_arg(args, ptrdiff_t);
- _os_log_encode_arg(&value.type.pd, sizeof(value.type.pd), OS_LOG_BUFFER_VALUE_TYPE_SCALAR, false, context);
- break;
-
- default:
- return false;
- }
- done = true;
- break;
-
-#ifndef KERNEL
- case '{':
- // we do not support this for shimmed code
- if (context->shimmed) {
- return false;
- }
-
- for (const char *curr2 = percent + 1; (ch = (*curr2)) != NUL; curr2++) {
- if (ch == '}') {
- strlcpy(annotated, percent, MIN(curr2 - (percent + 1), sizeof(annotated)));
- context->annotated = annotated;
- percent = curr2;
- break;
- }
- }
- break;
-#endif /* !KERNEL */
-
- case 'p': // pointer
- value.type.p = va_arg(args, void *);
- _os_log_encode_arg(&value.type.p, sizeof(value.type.p), OS_LOG_BUFFER_VALUE_TYPE_SCALAR, false, context);
- done = true;
- break;
-
-#ifndef KERNEL
- case 'P': // pointer data
- if (context->shimmed) { // we do not support this for shimmed code
- return false;
- }
-
- context->buffer->flags |= OS_LOG_BUFFER_HAS_NON_SCALAR;
- value.type.p = va_arg(args, void *);
-
- // capture the string pointer to generate a symptom
- if (context->log && context->log->generate_symptoms && context->arg_idx == 1 && value.type.pch && prec) {
- context->symptom_ptr = value.type.p;
- context->symptom_ptr_len = prec;
- }
-
- _os_log_encode_arg(value.type.p, prec, OS_LOG_BUFFER_VALUE_TYPE_POINTER, false, context);
- prec = 0;
- done = true;
- break;
-#endif /* !KERNEL */
-
-#ifndef KERNEL
- case 'L': // long double
- long_double = true;
- break;
-
- case 'a': case 'A': case 'e': case 'E': // floating types
- case 'f': case 'F': case 'g': case 'G':
- if (long_double) {
- value.type.ld = va_arg(args, long double);
- _os_log_encode_arg(&value.type.ld, sizeof(value.type.ld), OS_LOG_BUFFER_VALUE_TYPE_SCALAR, false, context);
- } else {
- value.type.d = va_arg(args, double);
- _os_log_encode_arg(&value.type.d, sizeof(value.type.d), OS_LOG_BUFFER_VALUE_TYPE_SCALAR, false, context);
- }
- done = true;
- break;
-#endif /* !KERNEL */
-
- case 'c': // char
- value.type.ch = (char) va_arg(args, int);
- _os_log_encode_arg(&value.type.ch, sizeof(value.type.ch), OS_LOG_BUFFER_VALUE_TYPE_SCALAR, false, context);
- done = true;
- break;
-
-#ifndef KERNEL
- case 'C': // wide-char
- value.type.wch = va_arg(args, wint_t);
- _os_log_encode_arg(&value.type.wch, sizeof(value.type.wch), OS_LOG_BUFFER_VALUE_TYPE_SCALAR, false, context);
- done = true;
- break;
-#endif /* !KERNEL */
-
- case 's': // string
- value.type.pch = va_arg(args, char *);
- if (!prec && value.type.pch != NULL) {
- prec = (int) strlen(value.type.pch) + 1;
- }
-
-#ifndef KERNEL
- // capture the string pointer to generate a symptom
- if (context->log && context->log->generate_symptoms && context->arg_idx == 0 && value.type.pch) {
- context->symptom_str = value.type.pch;
- }
-#endif
-
- context->buffer->flags |= OS_LOG_BUFFER_HAS_NON_SCALAR;
- _os_log_encode_arg(value.type.pch, prec, OS_LOG_BUFFER_VALUE_TYPE_STRING, false, context);
- prec = 0;
- done = true;
- break;
-
-#ifndef KERNEL
- case 'S': // wide-string
- value.type.pwch = va_arg(args, wchar_t *);
- if (!prec && value.type.pwch != NULL) {
- prec = (int) wcslen(value.type.pwch) + 1;
- }
-
- context->buffer->flags |= OS_LOG_BUFFER_HAS_NON_SCALAR;
- _os_log_encode_arg(value.type.pwch, prec, OS_LOG_BUFFER_VALUE_TYPE_STRING, false, context);
- prec = 0;
- done = true;
- break;
-#endif /* !KERNEL */
-
-#ifndef KERNEL
- case '@': // CFTypeRef aka NSObject *
- context->buffer->flags |= OS_LOG_BUFFER_HAS_NON_SCALAR;
- _os_log_encode_arg(va_arg(args, void *), 0, OS_LOG_BUFFER_VALUE_TYPE_OBJECT, false, context);
- done = true;
- break;
-#endif /* !KERNEL */
-
- case 'm':
- value.type.i = saved_errno;
- _os_log_encode_arg(&value.type.i, sizeof(value.type.i), OS_LOG_BUFFER_VALUE_TYPE_SCALAR, false, context);
- done = true;
- break;
-
- default:
- if (isdigit(ch)) { // [0-9]
- continue;
- }
- return false;
- }
-
- if (done) {
- percent = strchr(percent, '%'); // Find next format
- break;
- }
- }
- } else {
- percent = strchr(percent + 1, '%'); // Find next format after %%
- }
- }
-
- context->buffer->arg_cnt = context->arg_idx;
- context->content_sz = context->content_off;
- context->pubdata_sz = context->pubdata_off;
- context->privdata_sz = context->privdata_off;
- context->arg_idx = context->content_off = context->pubdata_off = context->privdata_off = 0;
-
- return true;
-}
+void os_log_context_init(os_log_context_t, logmem_t *, uint8_t *, size_t);
+void os_log_context_free(os_log_context_t);
+bool os_log_context_encode(os_log_context_t, const char *, va_list, void *, void *, bool);
#endif /* log_encode_h */
/*
- * Copyright (c) 2015-2016 Apple Inc. All rights reserved.
+ * Copyright (c) 2015-2020 Apple Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
#include <stdbool.h>
#include <stddef.h>
+#include "log_mem.h"
+
#pragma mark - buffer support structures, enums
-OS_ENUM(os_log_value_type, uint8_t,
- OS_LOG_BUFFER_VALUE_TYPE_SCALAR = 0,
- OS_LOG_BUFFER_VALUE_TYPE_COUNT = 1,
- OS_LOG_BUFFER_VALUE_TYPE_STRING = 2,
-#ifndef KERNEL
- OS_LOG_BUFFER_VALUE_TYPE_POINTER = 3,
- OS_LOG_BUFFER_VALUE_TYPE_OBJECT = 4,
-#endif
+OS_ENUM(os_log_fmt_hdr_flags, uint8_t,
+ OSLF_HDR_FLAG_HAS_PRIVATE = 0x01,
+ OSLF_HDR_FLAG_HAS_NON_SCALAR = 0x02,
+ );
+
+OS_ENUM(os_log_fmt_cmd_type, uint8_t,
+ OSLF_CMD_TYPE_SCALAR = 0, // %u, %lld, %x, %p, %g, ...
+ OSLF_CMD_TYPE_COUNT = 1, // %.16P, %.*s
+ OSLF_CMD_TYPE_STRING = 2, // %s
+ OSLF_CMD_TYPE_POINTER = 3, // %P
+ OSLF_CMD_TYPE_OBJECT = 4, // %@
+ OSLF_CMD_TYPE_WIDE_STRING = 5, // %S
+ OSLF_CMD_TYPE_ERRNO = 6, // %m
+ OSLF_CMD_TYPE_MASK = 7, // %{mask.foo}...
);
-OS_ENUM(os_log_value_subtype, uint8_t,
- OS_LOG_BUFFER_VALUE_SUBTYPE_NONE = 0,
- OS_LOG_BUFFER_VALUE_SUBTYPE_INTEGER = 1,
-#ifndef KERNEL
- OS_LOG_BUFFER_VALUE_SUBTYPE_FLOAT = 2,
-#endif
+OS_ENUM(os_log_fmt_cmd_flags, uint8_t,
+ OSLF_CMD_FLAG_PRIVATE = 0x1,
+ OSLF_CMD_FLAG_PUBLIC = 0x2,
+ OSLF_CMD_FLAG_SENSITIVE = 0x4 | OSLF_CMD_FLAG_PRIVATE,
);
enum os_log_int_types_t {
OST_PTRDIFF = 5,
};
-union os_log_format_types_u {
+union os_log_fmt_types_u {
uint16_t u16;
uint32_t u32;
uint64_t u64;
int i;
void *p;
char *pch;
-#ifndef KERNEL
- wchar_t wch;
- wchar_t *pwch;
-#endif
size_t z;
intmax_t im;
ptrdiff_t pd;
long l;
long long ll;
-#ifndef KERNEL
- double d;
- float f;
- long double ld;
-#endif
};
typedef struct os_log_format_value_s {
- union os_log_format_types_u type;
- os_log_value_type_t ctype;
+ union os_log_fmt_types_u type;
+ os_log_fmt_cmd_type_t ctype;
uint16_t size;
} *os_log_format_value_t;
-#define OST_FORMAT_MAX_ARGS 48
-#ifdef KERNEL
-#define OST_FORMAT_MAX_STRING_SIZE 512
-#else
-#define OST_FORMAT_MAX_STRING_SIZE 1024
-#endif
-
-#define OST_FORMAT_NON_STATIC ~0
-
-typedef struct os_log_buffer_value_s {
-#define OS_LOG_CONTENT_FLAG_PRIVATE 0x1
- uint8_t flags : 4;
- os_log_value_type_t type : 4;
- uint8_t size;
- uint8_t value[];
-} *os_log_buffer_value_t;
-
-typedef struct os_log_buffer_s {
-#define OS_LOG_BUFFER_HAS_PRIVATE 0x1
-#define OS_LOG_BUFFER_HAS_NON_SCALAR 0x2
- uint8_t flags;
- uint8_t arg_cnt;
- uint8_t content[];
-} *os_log_buffer_t;
-
-typedef struct os_log_buffer_context_s {
- os_log_t log;
- os_log_buffer_t buffer;
- uint8_t *pubdata;
- uint8_t *privdata;
-
- // composed string
- char *comp;
- size_t comp_off;
- size_t comp_sz;
-
- // sizes and offsets
- uint16_t content_off; // offset into buffer->content
- uint16_t content_sz; // size not including the header
- uint16_t pubdata_off;
- uint16_t pubdata_sz;
- uint16_t privdata_off;
- uint16_t privdata_sz;
-
- uint8_t arg_idx;
-
- // if argument content was limited with %.* or %.#
-
-#ifndef KERNEL
- const char *symptom_str;
- const void *symptom_ptr;
- uint16_t symptom_ptr_len;
- char *annotated;
-#endif
- int arg_content_sz;
- bool need_size;
- bool shimmed;
-} *os_log_buffer_context_t;
-
-typedef struct os_log_arginfo_s {
- uint16_t offset;
- uint16_t length;
-} *os_log_arginfo_t;
-
-/* Clients of these interfaces/structures may be expected to provide implementations of the following functions */
+typedef struct os_log_fmt_hdr_s {
+ os_log_fmt_hdr_flags_t hdr_flags;
+ uint8_t hdr_cmd_cnt;
+ uint8_t hdr_data[];
+} *os_log_fmt_hdr_t;
-#ifndef KERNEL
-extern bool
-_NSCF2data(const void *obj, char *string_value, size_t string_sz, bool *is_private);
-#endif
+typedef struct os_log_fmt_cmd_s {
+ os_log_fmt_cmd_flags_t cmd_flags : 4;
+ os_log_fmt_cmd_type_t cmd_type : 4;
+ uint8_t cmd_size;
+ uint8_t cmd_data[];
+} *os_log_fmt_cmd_t;
-extern bool
-_os_log_string_is_public(const char *str);
+typedef struct os_log_fmt_range_s {
+ uint16_t offset;
+ uint16_t length : 15;
+ uint16_t truncated : 1;
+} *os_log_fmt_range_t;
+
+#define OS_LOG_MAX_PUB_ARGS (32)
+
+typedef struct os_log_context_s {
+ logmem_t *ctx_logmem;
+ uint8_t *ctx_buffer;
+ size_t ctx_buffer_sz;
+ os_log_fmt_hdr_t ctx_hdr;
+ char *ctx_pubdata[OS_LOG_MAX_PUB_ARGS];
+ uint16_t ctx_content_off; // offset into buffer->hdr_data
+ uint16_t ctx_content_sz; // size not including the header
+ uint16_t ctx_pubdata_sz;
+ uint16_t ctx_pubdata_cnt;
+ firehose_tracepoint_flags_t ctx_ft_flags;
+ uint8_t ctx_truncated : 1;
+ uint8_t ctx_allocated : 1;
+} *os_log_context_t;
#endif /* log_encode_types_h */
--- /dev/null
+/*
+ * Copyright (c) 2020 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <kern/assert.h>
+#include <kern/locks.h>
+#include <os/atomic_private.h>
+
+#include "log_mem.h"
+
+#define BLOCK_INVALID ((size_t)-1)
+#define BLOCK_LEVEL_BASE(level) ((1 << (level)) - 1)
+#define BLOCK_SIZE(level) (1 << (level))
+#define BLOCK_PARENT(b) (((b) % 2 == 0) ? ((b) >> 1) - 1 : ((b) >> 1))
+#define BLOCK_LCHILD(b) (((b) << 1) + 1)
+#define BLOCK_BUDDY(b) (((b) & 0x1) ? (b) + 1 : (b) - 1)
+#define BLOCK_INDEX(lm, l, a, s) \
+ (BLOCK_LEVEL_BASE(l) + ((uintptr_t)(a) - (uintptr_t)(lm)->lm_mem) / (s))
+
+#define BITMAP_BUCKET_SIZE (8 * sizeof(((logmem_t *)0)->lm_mem_map[0]))
+#define BITMAP_BUCKET(i) ((i) / BITMAP_BUCKET_SIZE)
+#define BITMAP_BIT(i) (1 << (BITMAP_BUCKET_SIZE - ((i) % BITMAP_BUCKET_SIZE) - 1))
+
+static bool
+bitmap_get(logmem_t *lm, size_t block)
+{
+ return lm->lm_mem_map[BITMAP_BUCKET(block)] & BITMAP_BIT(block);
+}
+
+static void
+bitmap_set(logmem_t *lm, size_t block)
+{
+ lm->lm_mem_map[BITMAP_BUCKET(block)] |= BITMAP_BIT(block);
+}
+
+static void
+bitmap_clear(logmem_t *lm, size_t block)
+{
+ lm->lm_mem_map[BITMAP_BUCKET(block)] &= ~BITMAP_BIT(block);
+}
+
+static void
+bitmap_reserve_root(logmem_t *lm, size_t block)
+{
+ const size_t top_block = BLOCK_LEVEL_BASE(lm->lm_cap_order - lm->lm_max_order);
+
+ for (ssize_t next = BLOCK_PARENT(block); next >= top_block; next = BLOCK_PARENT(next)) {
+ /*
+ * If the rest of the root path is already marked as
+ * allocated we are done.
+ */
+ if (bitmap_get(lm, next)) {
+ break;
+ }
+ bitmap_set(lm, next);
+ }
+}
+
+static void
+bitmap_release_root(logmem_t *lm, size_t block)
+{
+ const size_t top_block = BLOCK_LEVEL_BASE(lm->lm_cap_order - lm->lm_max_order);
+ int buddy_allocated = 0;
+
+ while (block > top_block) {
+ buddy_allocated = bitmap_get(lm, BLOCK_BUDDY(block));
+ block = BLOCK_PARENT(block);
+ /*
+ * If there is another allocation within the parent subtree
+ * in place we cannot mark the rest of the root path as free.
+ */
+ if (buddy_allocated) {
+ break;
+ }
+ bitmap_clear(lm, block);
+ }
+}
+
+static void
+bitmap_update_subtree(logmem_t *lm, size_t level, size_t block, void (*fun)(logmem_t *, size_t))
+{
+ const size_t lcount = lm->lm_cap_order - lm->lm_min_order - level + 1;
+
+ for (size_t l = 0, n = 1; l < lcount; l++, n <<= 1) {
+ for (int i = 0; i < n; i++) {
+ fun(lm, block + i);
+ }
+ block = BLOCK_LCHILD(block);
+ }
+}
+
+static void
+bitmap_release_subtree(logmem_t *lm, size_t level, size_t block)
+{
+ bitmap_update_subtree(lm, level, block, bitmap_clear);
+}
+
+static void
+bitmap_reserve_subtree(logmem_t *lm, size_t level, size_t block)
+{
+ bitmap_update_subtree(lm, level, block, bitmap_set);
+}
+
+static size_t
+block_size_level(logmem_t *lm, size_t amount)
+{
+ for (size_t l = lm->lm_min_order; l <= lm->lm_max_order; l++) {
+ if (amount <= BLOCK_SIZE(l)) {
+ return lm->lm_cap_order - l;
+ }
+ }
+ return BLOCK_INVALID;
+}
+
+static size_t
+block_locate(logmem_t *lm, void *addr, size_t amount, size_t *block)
+{
+ size_t level = block_size_level(lm, amount);
+ if (level != BLOCK_INVALID) {
+ *block = BLOCK_INDEX(lm, level, addr, amount);
+ }
+ return level;
+}
+
+static size_t
+block_reserve(logmem_t *lm, size_t level)
+{
+ assert(level != BLOCK_INVALID);
+
+ const size_t base = BLOCK_LEVEL_BASE(level);
+ const size_t end = base + BLOCK_SIZE(level);
+
+ lck_spin_lock(lm->lm_lock);
+ for (size_t block = base; block < end; block++) {
+ if (!bitmap_get(lm, block)) {
+ bitmap_reserve_root(lm, block);
+ bitmap_reserve_subtree(lm, level, block);
+ lck_spin_unlock(lm->lm_lock);
+ return block - base;
+ }
+ }
+ lck_spin_unlock(lm->lm_lock);
+
+ return BLOCK_INVALID;
+}
+
+void *
+logmem_alloc(logmem_t *lm, size_t *amount)
+{
+ assert(amount);
+
+ os_atomic_inc(&lm->lm_cnt_allocations, relaxed);
+
+ if (*amount == 0 || *amount > BLOCK_SIZE(lm->lm_max_order)) {
+ os_atomic_inc(&lm->lm_cnt_failed_size, relaxed);
+ return NULL;
+ }
+
+ size_t level = block_size_level(lm, *amount);
+ size_t block = block_reserve(lm, level);
+
+ if (block == BLOCK_INVALID) {
+ os_atomic_inc(&lm->lm_cnt_failed_full, relaxed);
+ return NULL;
+ }
+
+ *amount = BLOCK_SIZE(lm->lm_cap_order - level);
+ os_atomic_sub(&lm->lm_cnt_free, (uint32_t)*amount, relaxed);
+
+ return &lm->lm_mem[block * *amount];
+}
+
+void
+logmem_free(logmem_t *lm, void *addr, size_t amount)
+{
+ assert(addr);
+ assert(amount > 0 && ((amount & (amount - 1)) == 0));
+
+ size_t block = BLOCK_INVALID;
+ size_t level = block_locate(lm, addr, amount, &block);
+ assert(level != BLOCK_INVALID);
+ assert(block != BLOCK_INVALID);
+
+ lck_spin_lock(lm->lm_lock);
+ bitmap_release_root(lm, block);
+ bitmap_release_subtree(lm, level, block);
+ lck_spin_unlock(lm->lm_lock);
+
+ os_atomic_add(&lm->lm_cnt_free, (uint32_t)amount, relaxed);
+}
+
+size_t
+logmem_max_size(const logmem_t *lm)
+{
+ return BLOCK_SIZE(lm->lm_max_order);
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#ifndef log_mem_h
+#define log_mem_h
+
+#include <stddef.h>
+#include <stdint.h>
+
+/*
+ * A simple allocator on a top of a plain byte array. Primarily intended to
+ * support OS kernel logging in order to avoid dependency to VM.
+ */
+typedef struct logmem_s {
+ lck_spin_t *lm_lock;
+ uint8_t *lm_mem;
+ uint8_t *lm_mem_map;
+ size_t lm_cap_order;
+ size_t lm_min_order;
+ size_t lm_max_order;
+ uint32_t lm_cnt_allocations;
+ uint32_t lm_cnt_failed_size;
+ uint32_t lm_cnt_failed_full;
+ uint32_t lm_cnt_free;
+} logmem_t;
+
+/*
+ * Static initializer for global instances of logmem. Size order defines the
+ * total amount of logmem memory, the min and max order set the minimum and the
+ * maximum size respectively of the memory allocatable by the given logmem.
+ * Local or dynamically allocated instances of logmem should not be initialized
+ * by this macro.
+ */
+#define LOGMEM_STATIC_INIT(name, size_order, min_order, max_order) \
+ SIMPLE_LOCK_DECLARE(name##_lck, 0); \
+ logmem_t name = { \
+ .lm_lock = (lck_spin_t *)&name##_lck, \
+ .lm_mem = (uint8_t[(1 << (size_order))]){ 0 }, \
+ .lm_mem_map = (uint8_t[MAX(1, (1 << ((size_order) - (min_order) + 1)) / 8)]){ 0 }, \
+ .lm_cap_order = (size_order), \
+ .lm_max_order = (max_order), \
+ .lm_min_order = (min_order), \
+ .lm_cnt_free = (1 << (size_order)) \
+ };
+
+/*
+ * Allocates memory from a respective logmem. Returns a pointer to the beginning
+ * of the allocated block. The resulting size of the allocated block is equal or
+ * bigger than the size passed in during the call.
+ */
+void *logmem_alloc(logmem_t *, size_t *);
+
+/*
+ * Frees memory previously allocated by logmem_alloc(). The caller must call
+ * logmem_free() with exact pointer and size value returned by logmem_alloc().
+ */
+void logmem_free(logmem_t *, void *, size_t);
+
+/*
+ * Returns the maximum memory size allocatable by the logmem.
+ */
+size_t logmem_max_size(const logmem_t *);
+
+#endif /* log_mem_h */
* Sign a blob of data with the GA key
*
*/
+__attribute__((noinline))
ptrauth_generic_signature_t
-ptrauth_utils_sign_blob_generic(void * ptr, size_t len_bytes, uint64_t data, int flags)
+ptrauth_utils_sign_blob_generic(const void * ptr, size_t len_bytes, uint64_t data, int flags)
{
ptrauth_generic_signature_t sig = 0;
data ^= (uint64_t)ptr;
}
- /* First round adds salt */
+ /* First round adds ptrauth_utils_sign_blob_generic discrimination. */
+ sig = ptrauth_sign_generic_data(sig, ptrauth_string_discriminator("ptrauth_utils_sign_blob_generic-prologue") | 0x01);
+
+ /* Second round adds salt */
sig = ptrauth_sign_generic_data(sig, data);
/* Calculate an additive signature of the buffer */
for (uint64_t i = 0; i < rounds; i++) {
- sig = ptrauth_sign_generic_data(*(uintptr_t *)ptr, sig);
+ sig = ptrauth_sign_generic_data(*(const uintptr_t *)ptr, sig);
ptr += sizeof(uintptr_t);
}
/* ptrauth_sign_generic_data operates on pointer-sized values only,
* so we need to handle trailing bytes for the non-pointer-aligned case */
if (ntrailing) {
- memcpy(&trailing, ptr, ntrailing);
+ for (int i = 0; i < ntrailing; i++) {
+ ((uint8_t *)&trailing)[i] = ((const uint8_t *)ptr)[i];
+ }
sig = ptrauth_sign_generic_data(trailing, sig);
}
+
+ /* Final round to add an additional cookie */
+ sig = ptrauth_sign_generic_data(sig, ptrauth_string_discriminator("ptrauth_utils_sign_blob_generic-epilogue") | 0x01);
+
return sig;
}
*
* Authenticate signature produced by ptrauth_utils_sign_blob_generic
*/
+__attribute__((noinline))
void
-ptrauth_utils_auth_blob_generic(void * ptr, size_t len_bytes, uint64_t data, int flags, ptrauth_generic_signature_t signature)
+ptrauth_utils_auth_blob_generic(const void * ptr, size_t len_bytes, uint64_t data, int flags, ptrauth_generic_signature_t signature)
{
ptrauth_generic_signature_t calculated_signature = 0;
$(COMPONENT).filelist: $(OBJS)
$(_v)for kld_file in ${KLD_FILES}; do \
- $(SEG_HACK) -n __KLD -o $${kld_file}__ $${kld_file} || exit 1; \
+ $(SEG_HACK) -s __TEXT -n __KLD -o $${kld_file}__ $${kld_file} || exit 1; \
+ mv $${kld_file}__ $${kld_file} || exit 1; \
+ $(SEG_HACK) -i __KLD -n __KLDDATA -o $${kld_file}__ $${kld_file} || exit 1; \
mv $${kld_file}__ $${kld_file} || exit 1; \
done
@$(LOG_LDFILELIST) "$(COMPONENT)"
CODE_SIGN_IDENTITY = -
DYLIB_CURRENT_VERSION = $(RC_ProjectSourceVersion)
DYLIB_LDFLAGS = -umbrella System -all_load
-DYLIB_LDFLAGS[sdk=iphoneos*] = $(inherited) -Wl,-sectalign,__DATA,__data,1000
-DYLIB_LDFLAGS[sdk=watchos*] = $(inherited) -Wl,-sectalign,__DATA,__data,1000
-DYLIB_LDFLAGS[sdk=tvos*] = $(inherited) -Wl,-sectalign,__DATA,__data,1000
-DYLIB_LDFLAGS[sdk=appletvos*] = $(inherited) -Wl,-sectalign,__DATA,__data,1000
-DYLIB_LDFLAGS[sdk=bridgeos*] = $(inherited) -Wl,-sectalign,__DATA,__data,1000
OTHER_LDFLAGS = $(SIMULATOR_LDFLAGS)
SIMULATOR_LDFLAGS =
SIMULATOR_LDFLAGS[sdk=macosx*] = -Wl,-simulator_support
24B8C2621237F53900D36CC3 /* remove-counter.c in Sources */ = {isa = PBXBuildFile; fileRef = 24B8C2611237F53900D36CC3 /* remove-counter.c */; };
24D1158311E671B20063D54D /* SYS.h in Headers */ = {isa = PBXBuildFile; fileRef = 24D1157411E671B20063D54D /* SYS.h */; };
24E4782712088267009A384D /* _libc_funcptr.c in Sources */ = {isa = PBXBuildFile; fileRef = 24E47824120881DF009A384D /* _libc_funcptr.c */; };
+ 2561E8AA25082E6300EAA925 /* task.c in Sources */ = {isa = PBXBuildFile; fileRef = 2561E8A925082E6300EAA925 /* task.c */; };
291D3C281354FDD100D46061 /* mach_port.c in Sources */ = {isa = PBXBuildFile; fileRef = 291D3C261354FDD100D46061 /* mach_port.c */; };
291D3C291354FDD100D46061 /* mach_vm.c in Sources */ = {isa = PBXBuildFile; fileRef = 291D3C271354FDD100D46061 /* mach_vm.c */; };
29A59AE2183B0DE000E8B896 /* renameat.c in Sources */ = {isa = PBXBuildFile; fileRef = 29A59AE1183B0DE000E8B896 /* renameat.c */; };
24D1159811E672270063D54D /* syscall.map */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = syscall.map; sourceTree = "<group>"; };
24D1159911E6723E0063D54D /* create-syscalls.pl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.perl; path = "create-syscalls.pl"; sourceTree = "<group>"; };
24E47824120881DF009A384D /* _libc_funcptr.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = _libc_funcptr.c; sourceTree = "<group>"; };
+ 2561E8A925082E6300EAA925 /* task.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = task.c; sourceTree = "<group>"; };
291D3C261354FDD100D46061 /* mach_port.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = mach_port.c; sourceTree = "<group>"; };
291D3C271354FDD100D46061 /* mach_vm.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = mach_vm.c; sourceTree = "<group>"; };
29A59AE1183B0DE000E8B896 /* renameat.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = renameat.c; sourceTree = "<group>"; };
C9D9BCCC114B00600000D8B9 /* err_libkern.sub */,
C9D9BCCD114B00600000D8B9 /* err_mach_ipc.sub */,
C9D9BCCE114B00600000D8B9 /* err_server.sub */,
+ 2561E8A925082E6300EAA925 /* task.c */,
C9D9BCCF114B00600000D8B9 /* err_us.sub */,
C9D9BCD0114B00600000D8B9 /* error_codes.c */,
C9D9BCD1114B00600000D8B9 /* errorlib.h */,
24A7C5C111FF8DA6007669EB /* getsockname.c in Sources */,
925559921CBC23C300E527CE /* mach_boottime.c in Sources */,
24A7C5C211FF8DA6007669EB /* lchown.c in Sources */,
+ 2561E8AA25082E6300EAA925 /* task.c in Sources */,
24A7C5C311FF8DA6007669EB /* listen.c in Sources */,
24A7C5C411FF8DA6007669EB /* recvfrom.c in Sources */,
13CBF78224575F9F00B26F7D /* open-base.c in Sources */,
+++ /dev/null
-*.pbxuser
-*.perspectivev3
-build/
host_get_atm_diagnostic_flag(host_t host __unused,
uint32_t *diagnostic_flag)
{
- volatile uint32_t *diagnostic_flag_address = (volatile uint32_t *)(uintptr_t)(_COMM_PAGE_ATM_DIAGNOSTIC_CONFIG);
- *diagnostic_flag = *diagnostic_flag_address;
+ *diagnostic_flag = COMM_PAGE_READ(uint32_t, ATM_DIAGNOSTIC_CONFIG);
return KERN_SUCCESS;
}
uint32_t *multiuser_flags)
{
#if (TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR)
- volatile uint32_t *multiuser_flag_address = (volatile uint32_t *)(uintptr_t)(_COMM_PAGE_MULTIUSER_CONFIG);
- *multiuser_flags = *multiuser_flag_address;
+ *multiuser_flags = COMM_PAGE_READ(uint32_t, MULTIUSER_CONFIG);
return KERN_SUCCESS;
#else
(void)multiuser_flags;
#include <sys/cdefs.h>
+#ifndef KERNEL
+#include <Availability.h>
+#endif
+
/*
* Kernel-related ports; how a task/thread controls itself
*/
__BEGIN_DECLS
extern mach_port_t mach_host_self(void);
extern mach_port_t mach_thread_self(void);
+__API_AVAILABLE(macos(11.3), ios(14.5), tvos(14.5), watchos(7.3))
+extern boolean_t mach_task_is_self(task_name_t task);
extern kern_return_t host_page_size(host_t, vm_size_t *);
extern mach_port_t mach_task_self_;
*/
const char *mach_task_special_port_description(int offset);
+/*
+ * Returns a string describing the thread special port offset provided, or NULL if
+ * the provided offset is not a thread special port offset.
+ */
+const char *mach_thread_special_port_description(int offset);
+
/*
* Returns the port for the given identifier of a host special port. For
* instance, passing "HOST_PRIV_PORT" would return 1.
*/
int mach_task_special_port_for_id(const char *id);
+/*
+ * Returns the port for the given identifier of a thread special port.
+ *
+ * Returns -1 on error.
+ */
+int mach_thread_special_port_for_id(const char *id);
+
__END_DECLS
#endif /* !defined(_MACH_PORT_DESCRIPTIONS_) */
if (vm_kernel_page_shift == 0) {
#if defined(__x86_64__) || defined(__i386__)
- if ((*((uint16_t *)_COMM_PAGE_VERSION) >= COMM_PAGE_KERNEL_PAGE_SHIFT_MIN_VERSION)) {
- vm_kernel_page_shift = *(uint8_t*) _COMM_PAGE_KERNEL_PAGE_SHIFT;
+ if (COMM_PAGE_READ(uint16_t, VERSION) >= COMM_PAGE_KERNEL_PAGE_SHIFT_MIN_VERSION) {
+ vm_kernel_page_shift = COMM_PAGE_READ(uint8_t, KERNEL_PAGE_SHIFT);
} else {
vm_kernel_page_shift = I386_PGSHIFT;
}
#else
- vm_kernel_page_shift = *(uint8_t*) _COMM_PAGE_KERNEL_PAGE_SHIFT;
+ vm_kernel_page_shift = COMM_PAGE_READ(uint8_t, KERNEL_PAGE_SHIFT);
#endif
vm_kernel_page_size = 1 << vm_kernel_page_shift;
vm_kernel_page_mask = vm_kernel_page_size - 1;
if (vm_page_shift == 0) {
#if defined(__arm64__)
- vm_page_shift = *(uint8_t*) _COMM_PAGE_USER_PAGE_SHIFT_64;
+ vm_page_shift = COMM_PAGE_READ(uint8_t, USER_PAGE_SHIFT_64);
#elif defined(__arm__)
- vm_page_shift = *(uint8_t*) _COMM_PAGE_USER_PAGE_SHIFT_32;
+ vm_page_shift = COMM_PAGE_READ(uint8_t, USER_PAGE_SHIFT_32);
#else
- if ((*((uint16_t *)_COMM_PAGE_VERSION) >= COMM_PAGE_KERNEL_PAGE_SHIFT_MIN_VERSION)) {
- vm_page_shift = *(uint8_t*) _COMM_PAGE_USER_PAGE_SHIFT_64;
+ if (COMM_PAGE_READ(uint16_t, VERSION) >= COMM_PAGE_KERNEL_PAGE_SHIFT_MIN_VERSION) {
+ vm_page_shift = COMM_PAGE_READ(uint8_t, USER_PAGE_SHIFT_64);
} else {
vm_page_shift = vm_kernel_page_shift;
}
}
static inline mach_port_t
-_tsd_get_special_reply_port()
+_tsd_get_special_reply_port(void)
{
return (mach_port_t)(uintptr_t)_os_tsd_get_direct(__TSD_MACH_SPECIAL_REPLY);
}
return rv;
}
+kern_return_t
+mach_vm_remap_new(
+ mach_port_name_t target,
+ mach_vm_address_t *address,
+ mach_vm_size_t size,
+ mach_vm_offset_t mask,
+ int flags,
+ mach_port_name_t src_task,
+ mach_vm_address_t src_address,
+ boolean_t copy,
+ vm_prot_t *cur_protection,
+ vm_prot_t *max_protection,
+ vm_inherit_t inheritance)
+{
+ kern_return_t rv;
+
+ /* {max,cur}_protection is inout */
+ rv = _kernelrpc_mach_vm_remap_new(target, address, size, mask, flags,
+ src_task, src_address, copy, cur_protection, max_protection,
+ inheritance);
+
+ if (__syscall_logger && rv == KERN_SUCCESS) {
+ int eventTypeFlags = stack_logging_type_vm_allocate | stack_logging_type_mapped_file_or_shared_mem;
+ int userTagFlags = flags & VM_FLAGS_ALIAS_MASK;
+ __syscall_logger(eventTypeFlags | userTagFlags, (uintptr_t)target, (uintptr_t)size, 0, (uintptr_t)*address, 0);
+ }
+
+ return rv;
+}
+
kern_return_t
mach_vm_read(
mach_port_name_t target,
return rv;
}
+kern_return_t
+vm_remap_new(
+ mach_port_name_t target,
+ vm_address_t *address,
+ vm_size_t size,
+ vm_offset_t mask,
+ int flags,
+ mach_port_name_t src_task,
+ vm_address_t src_address,
+ boolean_t copy,
+ vm_prot_t *cur_protection,
+ vm_prot_t *max_protection,
+ vm_inherit_t inheritance)
+{
+ kern_return_t rv;
+
+ /* {max,cur}_protection is inout */
+ rv = _kernelrpc_vm_remap_new(target, address, size, mask, flags,
+ src_task, src_address, copy, cur_protection, max_protection,
+ inheritance);
+
+ if (__syscall_logger) {
+ int eventTypeFlags = stack_logging_type_vm_allocate | stack_logging_type_mapped_file_or_shared_mem;
+ int userTagFlags = flags & VM_FLAGS_ALIAS_MASK;
+ __syscall_logger(eventTypeFlags | userTagFlags, (uintptr_t)target, (uintptr_t)size, 0, (uintptr_t)*address, 0);
+ }
+
+ return rv;
+}
+
kern_return_t
vm_read(
mach_port_name_t target,
#include <errno.h>
#include <mach/host_special_ports.h>
#include <mach/task_special_ports.h>
+#include <mach/thread_special_ports.h>
#include <mach/port_descriptions.h>
#include <stdlib.h>
#include <strings.h>
[HOST_SYSPOLICYD_PORT] = "syspolicyd",
[HOST_FILECOORDINATIOND_PORT] = "filecoordinationd",
[HOST_FAIRPLAYD_PORT] = "fairplayd",
+ [HOST_IOCOMPRESSIONSTATS_PORT] = "I/O compression stats",
};
- _Static_assert(HOST_FAIRPLAYD_PORT == HOST_MAX_SPECIAL_PORT,
+ _Static_assert(HOST_IOCOMPRESSIONSTATS_PORT == HOST_MAX_SPECIAL_PORT,
"all host special ports must have descriptions");
return hsp_descs[port_index];
[TASK_HOST_PORT] = "host",
[TASK_NAME_PORT] = "name",
[TASK_BOOTSTRAP_PORT] = "bootstrap",
+ [TASK_INSPECT_PORT] = "inspect",
+ [TASK_READ_PORT] = "read",
[TASK_SEATBELT_PORT] = "seatbelt",
[TASK_ACCESS_PORT] = "access",
[TASK_DEBUG_CONTROL_PORT] = "debug control",
return tsp_descs[port_index];
}
+const char *
+mach_thread_special_port_description(int port)
+{
+ int port_index = (int)port;
+
+ if (port_index < 0 || port_index > THREAD_MAX_SPECIAL_PORT) {
+ return NULL;
+ }
+
+ static const char *tsp_descs[] = {
+ [THREAD_KERNEL_PORT] = "kernel",
+ [THREAD_INSPECT_PORT] = "inspect",
+ [THREAD_READ_PORT] = "read",
+ };
+ _Static_assert(THREAD_READ_PORT == THREAD_MAX_SPECIAL_PORT,
+ "all thread special ports must have descriptions");
+
+ return tsp_descs[port_index];
+}
+
static int
port_for_id_internal(const char *id, const char **ids, int nids)
{
SP_ENTRY(TASK_HOST_PORT),
SP_ENTRY(TASK_NAME_PORT),
SP_ENTRY(TASK_BOOTSTRAP_PORT),
+ SP_ENTRY(TASK_INSPECT_PORT),
+ SP_ENTRY(TASK_READ_PORT),
SP_ENTRY(TASK_SEATBELT_PORT),
SP_ENTRY(TASK_ACCESS_PORT),
SP_ENTRY(TASK_DEBUG_CONTROL_PORT),
SP_ENTRY(TASK_RESOURCE_NOTIFY_PORT),
+ };
+
+ return port_for_id_internal(id, tsp_ids,
+ sizeof(tsp_ids) / sizeof(tsp_ids[0]));
+}
+
+int
+mach_thread_special_port_for_id(const char *id)
+{
+ static const char *tsp_ids[] = {
+ SP_ENTRY(THREAD_KERNEL_PORT),
+ SP_ENTRY(THREAD_INSPECT_PORT),
+ SP_ENTRY(THREAD_READ_PORT),
#undef SP_ENTRY
};
--- /dev/null
+/*
+ * Copyright (c) 2020 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#undef _task_user_
+#include <mach/task_internal.h>
+
+extern mach_port_t mach_task_self_;
+
+boolean_t
+mach_task_is_self(task_name_t task)
+{
+ boolean_t is_self;
+ kern_return_t kr;
+
+ if (task == mach_task_self_) {
+ return TRUE;
+ }
+
+ kr = _kernelrpc_mach_task_is_self(task, &is_self);
+
+ return kr == KERN_SUCCESS && is_self;
+}
volatile uint64_t *gtod_Ticks_scale_p;
volatile uint64_t *gtod_Ticks_per_sec_p;
- new_commpage_timeofday_data_t *commpage_timeofday_datap;
-
- commpage_timeofday_datap = (new_commpage_timeofday_data_t *)_COMM_PAGE_NEWTIMEOFDAY_DATA;
+ COMM_PAGE_SLOT_TYPE(new_commpage_timeofday_data_t) commpage_timeofday_datap =
+ COMM_PAGE_SLOT(new_commpage_timeofday_data_t, NEWTIMEOFDAY_DATA);
gtod_TimeStamp_tick_p = &commpage_timeofday_datap->TimeStamp_tick;
gtod_TimeStamp_sec_p = &commpage_timeofday_datap->TimeStamp_sec;
#include <stdbool.h>
#include <strings.h>
#include <unistd.h>
+#include <mach/vm_page_size.h>
#include "_libkernel_init.h"
extern int mach_init(void);
_dlsym = fns->dlsym;
}
mach_init();
+#if TARGET_OS_OSX
+ for (size_t i = 0; envp[i]; i++) {
+
+#if defined(__i386__) || defined(__x86_64__)
+ const char *VM_KERNEL_PAGE_SHIFT_ENV = "VM_KERNEL_PAGE_SIZE_4K=1";
+ if (vm_kernel_page_shift != 12 && strcmp(VM_KERNEL_PAGE_SHIFT_ENV, envp[i]) == 0) {
+ vm_kernel_page_shift = 12;
+ vm_kernel_page_size = 1 << vm_kernel_page_shift;
+ vm_kernel_page_mask = vm_kernel_page_size - 1;
+ }
+#endif /* defined(__i386__) || defined(__x86_64__) */
+ }
+#endif /* TARGET_OS_OSX */
}
void
int policy, error;
struct _iopol_param_t iop_param;
- if ((iotype != IOPOL_TYPE_DISK && iotype != IOPOL_TYPE_VFS_ATIME_UPDATES && iotype != IOPOL_TYPE_VFS_MATERIALIZE_DATALESS_FILES) ||
- (scope != IOPOL_SCOPE_PROCESS && scope != IOPOL_SCOPE_THREAD)) {
- errno = EINVAL;
- policy = -1;
- goto exit;
- }
+ /* Do not sanity check iotype and scope, leave it to kernel. */
iop_param.iop_scope = scope;
iop_param.iop_iotype = iotype;
bool
kdebug_is_enabled(uint32_t debugid)
{
- uint32_t state = *((volatile uint32_t *)(uintptr_t)(_COMM_PAGE_KDEBUG_ENABLE));
+ uint32_t state = COMM_PAGE_READ(uint32_t, KDEBUG_ENABLE);
if (state == 0) {
return FALSE;
bool
kdebug_using_continuous_time(void)
{
- uint32_t state = *((volatile uint32_t *)(uintptr_t)(_COMM_PAGE_KDEBUG_ENABLE));
+ uint32_t state = COMM_PAGE_READ(uint32_t, KDEBUG_ENABLE);
return state & KDEBUG_ENABLE_CONT_TIME;
}
uint64_t
mach_approximate_time(void)
{
- uint8_t supported = *((uint8_t *)_COMM_PAGE_APPROX_TIME_SUPPORTED);
- if (supported) {
- return *((uint64_t *)_COMM_PAGE_APPROX_TIME);
+ if (COMM_PAGE_READ(uint8_t, APPROX_TIME_SUPPORTED)) {
+ return COMM_PAGE_READ(uint64_t, APPROX_TIME);
}
return mach_absolute_time();
}
uint64_t
mach_boottime_usec(void)
{
- return *(uint64_t*)_COMM_PAGE_BOOTTIME_USEC;
+ return COMM_PAGE_READ(uint64_t, BOOTTIME_USEC);
}
uint64_t now = 0;
struct bt_params params = {};
- volatile struct bt_params *commpage_bt_params_p = (struct bt_params *)_COMM_PAGE_REMOTETIME_PARAMS;
+ COMM_PAGE_SLOT_TYPE(struct bt_params) commpage_bt_params_p =
+ COMM_PAGE_SLOT(struct bt_params, REMOTETIME_PARAMS);
volatile uint64_t *base_local_ts_p = &commpage_bt_params_p->base_local_ts;
volatile uint64_t *base_remote_ts_p = &commpage_bt_params_p->base_remote_ts;
volatile double *rate_p = &commpage_bt_params_p->rate;
MIG=`xcrun -sdk "$SDKROOT" -find mig`
MIGCC=`xcrun -sdk "$SDKROOT" -find cc`
export MIGCC
-MIG_DEFINES="-DLIBSYSCALL_INTERFACE"
+[ -n "$DRIVERKITROOT" ] && MIG_DRIVERKIT_DEFINES="-DDRIVERKIT"
+MIG_DEFINES="-DLIBSYSCALL_INTERFACE $MIG_DRIVERKIT_DEFINES"
+MIG_PRIVATE_DEFINES="-DPRIVATE -D_OPEN_SOURCE_ -D__OPEN_SOURCE__"
MIG_HEADER_OBJ="$OBJROOT/mig_hdr/include/mach"
MIG_HEADER_DST="$BUILT_PRODUCTS_DIR/mig_hdr/include/mach"
MIG_PRIVATE_HEADER_DST="$BUILT_PRODUCTS_DIR/mig_hdr/local/include/mach"
MACH_HEADER_DST="$BUILT_PRODUCTS_DIR/mig_hdr/include/mach"
MACH_PRIVATE_HEADER_DST="$BUILT_PRODUCTS_DIR/mig_hdr/local/include/mach"
MIG_INTERNAL_HEADER_DST="$BUILT_PRODUCTS_DIR/internal_hdr/include/mach"
-MIG_INCFLAGS="-I${SDKROOT}/${SDK_INSTALL_HEADERS_ROOT}/usr/include -I${SDKROOT}/${SDK_INSTALL_HEADERS_ROOT}/usr/local/include"
-MIG_PRIVATE_DEFS_INCFLAGS="-I${SDKROOT}/${SDK_INSTALL_HEADERS_ROOT}/System/Library/Frameworks/System.framework/PrivateHeaders"
+MIG_INCFLAGS="-I${SRCROOT}/../osfmk"
SRC="$SRCROOT/mach"
FILTER_MIG="$SRCROOT/xcodescripts/filter_mig.awk"
MIGS_INTERNAL="mach_port.defs
mach_vm.defs
+ task.defs
thread_act.defs
vm_map.defs"
for mig in $MIGS_PRIVATE $MIGS_DUAL_PUBLIC_PRIVATE; do
MIG_NAME=`basename $mig .defs`
- $MIG -novouchers -arch $MACHINE_ARCH -cc $MIGCC -header "$MIG_PRIVATE_HEADER_DST/$MIG_NAME.h" $MIG_DEFINES $MIG_INCFLAGS $MIG_PRIVATE_DEFS_INCFLAGS $SRC/$mig
+ $MIG -novouchers -arch $MACHINE_ARCH -cc $MIGCC -header "$MIG_PRIVATE_HEADER_DST/$MIG_NAME.h" $MIG_DEFINES $MIG_PRIVATE_DEFINES $MIG_INCFLAGS $SRC/$mig
if [ ! -e "$MIG_HEADER_DST/$MIG_NAME.h" ]; then
echo "#error $MIG_NAME.h unsupported." > "$MIG_HEADER_DST/$MIG_NAME.h"
fi
MIG_NAME=`basename $mig .defs`
$MIG -novouchers -arch $MACHINE_ARCH -cc $MIGCC -header "$MIG_INTERNAL_HEADER_DST/${MIG_NAME}_internal.h" $MIG_INCFLAGS $SRC/$mig
done
-
+
\ No newline at end of file
ifeq ($(NMEDIT),)
export NMEDIT := $(shell $(XCRUN) -sdk $(SDKROOT) -find nmedit)
endif
+ifeq ($(SCAN_BUILD),)
+ export SCAN_BUILD := $(shell $(XCRUN) -sdk $(SDKROOT) -find scan-build)
+endif
#
# Platform options
AWK = /usr/bin/awk
SED = /usr/bin/sed
PLUTIL = /usr/bin/plutil
+GREP = /usr/bin/grep
#
# Command to generate host binaries. Intentionally not
COMPONENT = $(if $(word 2,$(subst /, ,$(RELATIVE_SOURCE_PATH))),$(word 2,$(subst /, ,$(RELATIVE_SOURCE_PATH))),$(firstword $(subst /, ,$(RELATIVE_SOURCE_PATH))))
COMPONENT_IMPORT_LIST = $(filter-out $(COMPONENT),$(COMPONENT_LIST))
-MACHINE_FLAGS_ARM64_T7000 = -DARM64_BOARD_CONFIG_T7000
-MACHINE_FLAGS_ARM64_T7001 = -DARM64_BOARD_CONFIG_T7001
-MACHINE_FLAGS_ARM64_S8000 = -DARM64_BOARD_CONFIG_S8000
-MACHINE_FLAGS_ARM64_S8001 = -DARM64_BOARD_CONFIG_S8001
+MACHINE_FLAGS_ARM64_T7000 = -DARM64_BOARD_CONFIG_T7000 -mcpu=apple-h7
+MACHINE_FLAGS_ARM64_T7001 = -DARM64_BOARD_CONFIG_T7001 -mcpu=apple-h7
+MACHINE_FLAGS_ARM64_S8000 = -DARM64_BOARD_CONFIG_S8000 -mcpu=apple-h8
+MACHINE_FLAGS_ARM64_S8001 = -DARM64_BOARD_CONFIG_S8001 -mcpu=apple-h8
MACHINE_FLAGS_ARM_T8002 = -DARM_BOARD_CONFIG_T8002
MACHINE_FLAGS_ARM_T8004 = -DARM_BOARD_CONFIG_T8004
MACHINE_FLAGS_ARM64_T8010 = -DARM64_BOARD_CONFIG_T8010 -mcpu=hurricane
MACHINE_FLAGS_ARM64_T8011 = -DARM64_BOARD_CONFIG_T8011 -mcpu=hurricane
MACHINE_FLAGS_ARM64_BCM2837 = -DARM64_BOARD_CONFIG_BCM2837
MACHINE_FLAGS_ARM64_T8020 = -DARM64_BOARD_CONFIG_T8020 -mcpu=vortex
-MACHINE_FLAGS_ARM64_T8101 = -DARM64_BOARD_CONFIG_T8101 -D__ARM_ARCH_8_5__=1
-MACHINE_FLAGS_ARM64_T8103 = -DARM64_BOARD_CONFIG_T8103 -D__ARM_ARCH_8_5__=1
+MACHINE_FLAGS_ARM64_T8101 = -DARM64_BOARD_CONFIG_T8101 -mcpu=apple-a14
+MACHINE_FLAGS_ARM64_T8103 = -DARM64_BOARD_CONFIG_T8103 -mcpu=apple-a14
#
-Wl,-sectalign,__HIB,__cstring,0x1000 \
-Wl,-rename_section,__DATA,__const,__DATA_CONST,__const \
-Wl,-segprot,__DATA_CONST,r--,r-- \
+ -Wl,-rename_section,__KLD,__const,__KLDDATA,__const \
+ -Wl,-rename_section,__KLD,__cstring,__KLDDATA,__cstring \
+ -Wl,-segprot,__KLDDATA,rw-,rw- \
+ -Wl,-segprot,__KLD,r-x,r-x \
-Wl,-no_zero_fill_sections \
$(LDFLAGS_NOSTRIP_FLAG)
-Wl,-static \
-Wl,-image_base,0x80001000 \
-Wl,-sectalign,__DATA,__const,0x1000 \
- -Wl,-u,___udivmoddi4
+ -Wl,-u,___udivmoddi4 \
+ -Wl,-rename_section,__KLD,__const,__KLDDATA,__const \
+ -Wl,-rename_section,__KLD,__cstring,__KLDDATA,__cstring \
+ -Wl,-segprot,__KLDDATA,rw-,rw- \
+ -Wl,-segprot,__KLD,r-x,r-x
LDFLAGS_KERNEL_RELEASEARM = \
$(LDFLAGS_KERNEL_GENARM) \
-Wl,-rename_section,__DATA,__auth_got,__DATA_CONST,__auth_got \
-Wl,-rename_section,__DATA,__const,__DATA_CONST,__const \
-Wl,-segprot,__DATA_CONST,r--,r-- \
+ -Wl,-rename_section,__KLD,__const,__KLDDATA,__const \
+ -Wl,-rename_section,__KLD,__cstring,__KLDDATA,__cstring \
+ -Wl,-segprot,__KLDDATA,rw-,rw- \
+ -Wl,-segprot,__KLD,r-x,r-x \
-Wl,-rename_section,__TEXT,__text,__TEXT_EXEC,__text \
-Wl,-rename_section,__TEXT,__stubs,__TEXT_EXEC,__stubs \
-Wl,-sectcreate,"__PLK_TEXT_EXEC",__text,/dev/null \
LDFLAGS_KERNEL_SEGARM64 = \
-Wl,-rename_section,__PPLDATA,__const,__PPLDATA_CONST,__const \
- -Wl,-segment_order,__TEXT:__DATA_CONST:__LINKEDIT:__TEXT_EXEC:__PPLTEXT:__PPLTRAMP:__PPLDATA_CONST:__LASTDATA_CONST:__LAST:__PPLDATA:__KLD:__DATA:__HIBDATA:__BOOTDATA \
- -Wl,-segprot,__PPLTEXT,r-x,r-x -Wl,-segprot,__PPLTRAMP,r-x,r-x -Wl,-segprot,__PPLDATA_CONST,r--,r-- -Wl,-segprot,__LASTDATA_CONST,r--,r-- -Wl,-segprot,__LAST,r-x,r-x
+ -Wl,-segment_order,__TEXT:__DATA_CONST:__LINKEDIT:__TEXT_EXEC:__KLD:__PPLTEXT:__PPLTRAMP:__PPLDATA_CONST:__LASTDATA_CONST:__LAST:__PPLDATA:__KLDDATA:__DATA:__HIBDATA:__BOOTDATA \
+ -Wl,-segprot,__PPLTEXT,r-x,r-x -Wl,-segprot,__PPLTRAMP,r-x,r-x -Wl,-segprot,__PPLDATA_CONST,r--,r-- -Wl,-segprot,__LASTDATA_CONST,r--,r-- -Wl,-segprot,__LAST,r-x,r-x \
LDFLAGS_KERNEL_RELEASEARM64 = \
$(LDFLAGS_KERNEL_GENARM64) \
#include <IOKit/IOCFUnserialize.h>
#endif
+#if CONFIG_USER_NOTIFICATION
/*
* DEFINES AND STRUCTURES
*/
}
return UND_REPLY_NULL;
}
+#endif
/*
* User interface for setting the host UserNotification Daemon port.
host_priv_t host_priv,
UNDServerRef server)
{
+#if CONFIG_USER_NOTIFICATION
return host_set_user_notification_port(host_priv, server);
+#else
+#pragma unused(host_priv, server)
+ return KERN_NOT_SUPPORTED;
+#endif
}
/*
host_priv_t host_priv,
UNDServerRef *serverp)
{
+#if CONFIG_USER_NOTIFICATION
return host_get_user_notification_port(host_priv, serverp);
+#else
+#pragma unused(host_priv, serverp)
+ return KERN_NOT_SUPPORTED;
+#endif
}
uint64_t stackshot_interrupt_masked_timeout = 0xf9999;
#endif
+/*
+ * A 6-second timeout will give the watchdog code a chance to run
+ * before a panic is triggered by the xcall routine.
+ */
+#define XCALL_ACK_TIMEOUT_NS ((uint64_t) 6000000000)
+uint64_t xcall_ack_timeout_abstime;
+
+
boot_args const_boot_args __attribute__((section("__DATA, __const")));
boot_args *BootArgs __attribute__((section("__DATA, __const")));
SECURITY_READ_ONLY_LATE(uint64_t) gDramBase;
SECURITY_READ_ONLY_LATE(uint64_t) gDramSize;
+SECURITY_READ_ONLY_LATE(bool) serial_console_enabled = false;
+
/*
* Forward definition
*/
}
PE_parse_boot_argn("interrupt_masked_debug_timeout", &interrupt_masked_timeout, sizeof(interrupt_masked_timeout));
-#endif
+
+#endif /* INTERRUPT_MASKED_DEBUG */
+
+ nanoseconds_to_absolutetime(XCALL_ACK_TIMEOUT_NS, &xcall_ack_timeout_abstime);
+
#if HAS_BP_RET
PE_parse_boot_argn("bpret", &bp_ret, sizeof(bp_ret));
}
if (serialmode & SERIALMODE_OUTPUT) { /* Start serial if requested */
+ serial_console_enabled = true;
(void)switch_to_serial_console(); /* Switch into serial mode */
disableConsoleOutput = FALSE; /* Allow printfs to happen */
}
static unsigned long segSizeLINK;
static vm_offset_t segKLDB;
static unsigned long segSizeKLD;
+static vm_offset_t segKLDDATAB;
+static unsigned long segSizeKLDDATA;
static vm_offset_t segLASTB;
static vm_offset_t segLASTDATACONSTB;
static unsigned long segSizeLASTDATACONST;
arm_vm_page_granular_RNX((vm_offset_t)&fiqstack_high_guard, PAGE_MAX_SIZE, TRUE);
arm_vm_page_granular_ROX(segKLDB, segSizeKLD, force_coarse_physmap);
+ arm_vm_page_granular_RNX(segKLDDATAB, segSizeKLDDATA, force_coarse_physmap);
arm_vm_page_granular_RWNX(segLINKB, segSizeLINK, force_coarse_physmap);
arm_vm_page_granular_RWNX(segLASTB, segSizeLAST, FALSE); // __LAST may be empty, but we cannot assume this
if (segLASTDATACONSTB) {
segDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__DATA", &segSizeDATA);
segLINKB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__LINKEDIT", &segSizeLINK);
segKLDB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__KLD", &segSizeKLD);
+ segKLDDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__KLDDATA", &segSizeKLDDATA);
segLASTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__LAST", &segSizeLAST);
segLASTDATACONSTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__LASTDATA_CONST", &segSizeLASTDATACONST);
segPRELINKTEXTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_TEXT", &segSizePRELINKTEXT);
#include <mach/thread_status.h>
#include <mach/vm_param.h>
-#include <kern/counters.h>
#include <kern/cpu_data.h>
#include <arm/cpu_data_internal.h>
#include <kern/mach_param.h>
--- /dev/null
+/* * Copyright (c) 2020 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <kern/assert.h>
+#include <kern/cpu_data.h>
+#include <kern/counter.h>
+#include <kern/zalloc.h>
+#include <machine/atomic.h>
+#include <machine/machine_routines.h>
+#include <machine/cpu_number.h>
+
+OS_OVERLOADABLE
+void
+counter_add(scalable_counter_t *counter, uint64_t amount)
+{
+ os_atomic_add(zpercpu_get(*counter), amount, relaxed);
+}
+
+OS_OVERLOADABLE
+void
+counter_inc(scalable_counter_t *counter)
+{
+ os_atomic_inc(zpercpu_get(*counter), relaxed);
+}
+
+OS_OVERLOADABLE
+void
+counter_dec(scalable_counter_t *counter)
+{
+ os_atomic_dec(zpercpu_get(*counter), relaxed);
+}
+
+/*
+ * NB: On arm, the preemption disabled implementation is the same as
+ * the normal implementation. Otherwise we would need to enforce that
+ * callers never mix the interfaces for the same counter.
+ */
+OS_OVERLOADABLE
+void
+counter_add_preemption_disabled(scalable_counter_t *counter, uint64_t amount)
+{
+ counter_add(counter, amount);
+}
+
+OS_OVERLOADABLE
+void
+counter_inc_preemption_disabled(scalable_counter_t *counter)
+{
+ counter_inc(counter);
+}
+
+OS_OVERLOADABLE
+void
+counter_dec_preemption_disabled(scalable_counter_t *counter)
+{
+ counter_dec(counter);
+}
"region probably needs to be updated.");
#else /* KERNEL_PRIVATE */
+/*
+ * <sys/commpage.h> defines a couple of conveniency macros
+ * to help read data from the commpage.
+ */
#define _COMM_PAGE_AREA_LENGTH (4096)
#define _COMM_PAGE_BASE_ADDRESS _COMM_PAGE64_BASE_ADDRESS
cpu_data_t PERCPU_DATA(cpu_data);
cpu_data_entry_t CpuDataEntries[MAX_CPUS];
-static lck_grp_t cpu_lck_grp;
-static lck_rw_t cpu_state_lock;
+static LCK_GRP_DECLARE(cpu_lck_grp, "cpu_lck_grp");
+static LCK_RW_DECLARE(cpu_state_lock, &cpu_lck_grp);
unsigned int real_ncpus = 1;
boolean_t idle_enable = FALSE;
uint64_t wake_abstime = 0x0ULL;
+extern uint64_t xcall_ack_timeout_abstime;
+
#if defined(HAS_IPI)
extern unsigned int gFastIPI;
#endif /* defined(HAS_IPI) */
}
if ((signal == SIGPxcall) || (signal == SIGPxcallImm)) {
+ uint64_t start_mabs_time, max_mabs_time, current_mabs_time;
+ current_mabs_time = start_mabs_time = mach_absolute_time();
+ max_mabs_time = xcall_ack_timeout_abstime + current_mabs_time;
+ assert(max_mabs_time > current_mabs_time);
+
do {
current_signals = target_proc->cpu_signal;
if ((current_signals & SIGPdisabled) == SIGPdisabled) {
if (!swap_success && (current_proc->cpu_signal & signal)) {
cpu_handle_xcall(current_proc);
}
- } while (!swap_success);
+ } while (!swap_success && ((current_mabs_time = mach_absolute_time()) < max_mabs_time));
+
+ /*
+ * If we time out while waiting for the target CPU to respond, it's possible that no
+ * other CPU is available to handle the watchdog interrupt that would eventually trigger
+ * a panic. To prevent this from happening, we just panic here to flag this condition.
+ */
+ if (__improbable(current_mabs_time >= max_mabs_time)) {
+ uint64_t end_time_ns, xcall_ack_timeout_ns;
+ absolutetime_to_nanoseconds(current_mabs_time - start_mabs_time, &end_time_ns);
+ absolutetime_to_nanoseconds(xcall_ack_timeout_abstime, &xcall_ack_timeout_ns);
+ panic("CPU%u has failed to respond to cross-call after %llu nanoseconds (timeout = %llu ns)",
+ target_proc->cpu_number, end_time_ns, xcall_ack_timeout_ns);
+ }
if (signal == SIGPxcallImm) {
target_proc->cpu_imm_xcall_p0 = p0;
return false;
}
-void
-ml_cpu_init_state(void)
-{
- lck_grp_init(&cpu_lck_grp, "cpu_lck_grp", LCK_GRP_ATTR_NULL);
- lck_rw_init(&cpu_state_lock, &cpu_lck_grp, LCK_ATTR_NULL);
-}
-
#ifdef USE_APPLEARMSMP
void
#define CPUWINDOWS_BASE (VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK)
#define CPUWINDOWS_TOP (CPUWINDOWS_BASE + (MAX_CPUS * CPUWINDOWS_MAX * ARM_PGBYTES))
-static_assert((CPUWINDOWS_BASE >= VM_MIN_KERNEL_ADDRESS) && (CPUWINDOWS_TOP <= VM_MAX_KERNEL_ADDRESS),
+static_assert((CPUWINDOWS_BASE >= VM_MIN_KERNEL_ADDRESS) && ((CPUWINDOWS_TOP - 1) <= VM_MAX_KERNEL_ADDRESS),
"CPU copy windows too large for CPUWINDOWS_BASE_MASK value");
typedef struct cpu_data_entry {
/*
* arm_usimple_lock is a lck_spin_t without a group or attributes
*/
-void inline
+MARK_AS_HIBERNATE_TEXT void inline
arm_usimple_lock_init(simple_lock_t lck, __unused unsigned short initial_value)
{
lck->type = LCK_SPIN_TYPE;
high_MutexSpin = low_MutexSpin;
}
+/*
+ * This is called when all of the ml_processor_info_t structures have been
+ * initialized and all the processors have been started through processor_start().
+ *
+ * Required by the scheduler subsystem.
+ */
+void
+ml_cpu_init_completed(void)
+{
+}
+
/*
* This is called from the machine-independent routine cpu_up()
* to perform machine-dependent info updates.
* @field coresight_regs IO-mapped virtual address of CoreSight debug register block.
* @field coresight_pa Physical address of CoreSight register block.
* @field coresight_len Length of CoreSight register block.
- * @field self_ipi_irq AIC IRQ vector for self IPI (cpuX->cpuX). 0 if unsupported.
- * @field other_ipi_irq AIC IRQ vector for other IPI (cpuX->cpuY). 0 if unsupported.
- * @field pmi_irq AIC IRQ vector for performance management IRQ. 0 if unsupported.
* @field die_cluster_id Cluster ID within the local die (EDT: die-cluster-id)
* @field cluster_core_id Core ID within the local cluster (EDT: cluster-core-id)
*/
vm_offset_t coresight_regs;
uint64_t coresight_pa;
uint64_t coresight_len;
- int self_ipi_irq;
- int other_ipi_irq;
- int pmi_irq;
unsigned int die_cluster_id;
unsigned int cluster_core_id;
} ml_topology_cpu_t;
void ml_init_lock_timeout(void);
+#if __arm64__
+uint64_t virtual_timeout_inflate_ns(unsigned int vti, uint64_t timeout);
+uint64_t virtual_timeout_inflate_abs(unsigned int vti, uint64_t timeout);
+#endif
+
boolean_t ml_delay_should_spin(uint64_t interval);
void ml_delay_on_yield(void);
#define MACHINE_MAX_OFFSET_DEVICE 0x08
#endif
+extern void ml_cpu_init_completed(void);
extern void ml_cpu_up(void);
extern void ml_cpu_down(void);
extern void ml_arm_sleep(void);
#endif /* __arm64__ */
#if HAS_APPLE_PAC
+
+
+/**
+ * Returns the default ROP key.
+ */
uint64_t
ml_default_rop_pid(void)
{
return 0;
}
+/**
+ * Returns the default JOP key.
+ */
uint64_t
ml_default_jop_pid(void)
{
data.thread_group_id = thread_group_get_id(tg);
data.thread_group_data = thread_group_get_machine_data(tg);
data.thread_group_size = thread_group_machine_data_size();
+ data.thread_group_flags = thread_group_get_flags(tg);
sched_perfcontrol_thread_group_init(&data);
}
data.thread_group_id = thread_group_get_id(tg);
data.thread_group_data = thread_group_get_machine_data(tg);
data.thread_group_size = thread_group_machine_data_size();
+ data.thread_group_flags = thread_group_get_flags(tg);
sched_perfcontrol_thread_group_deinit(&data);
}
INTERRUPT_MASKED_DEBUG_START(current_thread()->machine.int_handler_addr, current_thread()->machine.int_type);
}
+#if defined(__arm64__)
+ current_thread()->machine.kpcb = NULL;
+#endif /* defined(__arm64__) */
+
/* Any cleanup for our pushed context should go here */
}
static bool alloc_asid(pmap_t pmap);
static void free_asid(pmap_t pmap);
static void flush_mmu_tlb_region_asid_async(vm_offset_t va, size_t length, pmap_t pmap);
-static void flush_mmu_tlb_tte_asid_async(vm_offset_t va, pmap_t pmap);
static void flush_mmu_tlb_full_asid_async(pmap_t pmap);
static pt_entry_t wimg_to_pte(unsigned int wimg);
bool (*alloc_id)(pmap_t pmap);
void (*free_id)(pmap_t pmap);
void (*flush_tlb_region_async)(vm_offset_t va, size_t length, pmap_t pmap);
- void (*flush_tlb_tte_async)(vm_offset_t va, pmap_t pmap);
void (*flush_tlb_async)(pmap_t pmap);
pt_entry_t (*wimg_to_pte)(unsigned int wimg);
};
.alloc_id = alloc_asid,
.free_id = free_asid,
.flush_tlb_region_async = flush_mmu_tlb_region_asid_async,
- .flush_tlb_tte_async = flush_mmu_tlb_tte_asid_async,
.flush_tlb_async = flush_mmu_tlb_full_asid_async,
.wimg_to_pte = wimg_to_pte,
};
return ARM_PTE_NX;
}
+static inline uintptr_t
+pt_attr_leaf_x(__unused const pt_attr_t * const pt_attr)
+{
+ return ARM_PTE_PNX;
+}
+
__unused static inline uintptr_t
pt_attr_ln_offmask(__unused const pt_attr_t * const pt_attr, unsigned int level)
{
SECURITY_READ_ONLY_LATE(pmap_t) kernel_pmap = &kernel_pmap_store;
struct vm_object pmap_object_store VM_PAGE_PACKED_ALIGNED; /* store pt pages */
-vm_object_t pmap_object = &pmap_object_store;
+SECURITY_READ_ONLY_LATE(vm_object_t) pmap_object = &pmap_object_store;
static SECURITY_READ_ONLY_LATE(zone_t) pmap_zone; /* zone of pmap structures */
* Trace levels are controlled by a bitmask in which each
* level can be enabled/disabled by the (1<<level) position
* in the boot arg
+ * Level 0: PPL extension functionality
* Level 1: pmap lifecycle (create/destroy/switch)
* Level 2: mapping lifecycle (enter/remove/protect/nest/unnest)
* Level 3: internal state management (attributes/fast-fault)
pmap_t, vm_map_address_t, unsigned int options, unsigned int level);
static int pmap_remove_range(
- pmap_t, vm_map_address_t, pt_entry_t *, pt_entry_t *, uint32_t *);
+ pmap_t, vm_map_address_t, pt_entry_t *, pt_entry_t *);
static int pmap_remove_range_options(
- pmap_t, vm_map_address_t, pt_entry_t *, pt_entry_t *, uint32_t *, bool *, int);
+ pmap_t, vm_map_address_t, pt_entry_t *, pt_entry_t *, vm_map_address_t *, bool *, int);
static tt_entry_t *pmap_tt1_allocate(
pmap_t, vm_size_t, unsigned int);
#define PMAP_TT_ALLOCATE_NOWAIT 0x1
static void pmap_tte_deallocate(
- pmap_t, tt_entry_t *, unsigned int);
+ pmap_t, vm_offset_t, vm_offset_t, bool, tt_entry_t *, unsigned int);
const unsigned int arm_hardware_page_size = ARM_PGBYTES;
const unsigned int arm_pt_desc_size = sizeof(pt_desc_t);
void,
pmap_page_protect_options, (ppnum_t ppnum,
vm_prot_t prot,
- unsigned int options), PMAP_PAGE_PROTECT_OPTIONS_INDEX);
+ unsigned int options,
+ void *arg), PMAP_PAGE_PROTECT_OPTIONS_INDEX);
PMAP_SUPPORT_PROTOTYPES(
- void,
+ vm_map_address_t,
pmap_protect_options, (pmap_t pmap,
vm_map_address_t start,
vm_map_address_t end,
pmap_reference, (pmap_t pmap), PMAP_REFERENCE_INDEX);
PMAP_SUPPORT_PROTOTYPES(
- int,
+ vm_map_address_t,
pmap_remove_options, (pmap_t pmap,
vm_map_address_t start,
vm_map_address_t end,
#if __ARM_RANGE_TLBI__
PMAP_SUPPORT_PROTOTYPES(
- void,
+ vm_map_address_t,
phys_attribute_clear_range, (pmap_t pmap,
vm_map_address_t start,
vm_map_address_t end,
bool,
pmap_lookup_in_loaded_trust_caches, (const uint8_t cdhash[CS_CDHASH_LEN]), PMAP_LOOKUP_IN_LOADED_TRUST_CACHES_INDEX);
+PMAP_SUPPORT_PROTOTYPES(
+ void,
+ pmap_set_compilation_service_cdhash, (const uint8_t cdhash[CS_CDHASH_LEN]),
+ PMAP_SET_COMPILATION_SERVICE_CDHASH_INDEX);
+
+PMAP_SUPPORT_PROTOTYPES(
+ bool,
+ pmap_match_compilation_service_cdhash, (const uint8_t cdhash[CS_CDHASH_LEN]),
+ PMAP_MATCH_COMPILATION_SERVICE_CDHASH_INDEX);
#if XNU_MONITOR
static void pmap_mark_page_as_ppl_page(pmap_paddr_t pa);
static void pmap_pgtrace_remove_all_clone(pmap_paddr_t pa);
#endif
+#if DEVELOPMENT || DEBUG
+PMAP_SUPPORT_PROTOTYPES(
+ kern_return_t,
+ pmap_test_text_corruption, (pmap_paddr_t),
+ PMAP_TEST_TEXT_CORRUPTION_INDEX);
+#endif /* DEVELOPMENT || DEBUG */
+
#if (__ARM_VMSA__ > 7)
/*
* The low global vector page is mapped at a fixed alias.
[PMAP_IS_TRUST_CACHE_LOADED_INDEX] = pmap_is_trust_cache_loaded_internal,
[PMAP_LOOKUP_IN_STATIC_TRUST_CACHE_INDEX] = pmap_lookup_in_static_trust_cache_internal,
[PMAP_LOOKUP_IN_LOADED_TRUST_CACHES_INDEX] = pmap_lookup_in_loaded_trust_caches_internal,
+ [PMAP_SET_COMPILATION_SERVICE_CDHASH_INDEX] = pmap_set_compilation_service_cdhash_internal,
+ [PMAP_MATCH_COMPILATION_SERVICE_CDHASH_INDEX] = pmap_match_compilation_service_cdhash_internal,
[PMAP_TRIM_INDEX] = pmap_trim_internal,
[PMAP_LEDGER_ALLOC_INIT_INDEX] = pmap_ledger_alloc_init_internal,
[PMAP_LEDGER_ALLOC_INDEX] = pmap_ledger_alloc_internal,
[PMAP_LEDGER_FREE_INDEX] = pmap_ledger_free_internal,
-#if HAS_APPLE_PAC && XNU_MONITOR
+#if HAS_APPLE_PAC
[PMAP_SIGN_USER_PTR] = pmap_sign_user_ptr_internal,
[PMAP_AUTH_USER_PTR] = pmap_auth_user_ptr_internal,
-#endif /* HAS_APPLE_PAC && XNU_MONITOR */
+#endif /* HAS_APPLE_PAC */
#if __ARM_RANGE_TLBI__
[PHYS_ATTRIBUTE_CLEAR_RANGE_INDEX] = phys_attribute_clear_range_internal,
#endif /* __ARM_RANGE_TLBI__ */
#if __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX)
[PMAP_DISABLE_USER_JOP_INDEX] = pmap_disable_user_jop_internal,
#endif /* __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX) */
+
+#if DEVELOPMENT || DEBUG
+ [PMAP_TEST_TEXT_CORRUPTION_INDEX] = pmap_test_text_corruption_internal,
+#endif /* DEVELOPMENT || DEBUG */
};
#endif
return pmap_cpu_data;
}
+#if __arm64__
+/*
+ * Disable interrupts and return previous state.
+ *
+ * The PPL has its own interrupt state facility separately from
+ * ml_set_interrupts_enable(), since that function is not part of the
+ * PPL, and so doing things like manipulating untrusted data and
+ * taking ASTs.
+ *
+ * @return The previous interrupt state, to be restored with
+ * pmap_interrupts_restore().
+ */
+static uint64_t __attribute__((warn_unused_result)) __used
+pmap_interrupts_disable(void)
+{
+ uint64_t state = __builtin_arm_rsr64("DAIF");
+
+ if ((state & DAIF_STANDARD_DISABLE) != DAIF_STANDARD_DISABLE) {
+ __builtin_arm_wsr64("DAIFSet", DAIFSC_STANDARD_DISABLE);
+ }
+
+ return state;
+}
+
+/*
+ * Restore previous interrupt state.
+ *
+ * @param state The previous interrupt state to restore.
+ */
+static void __used
+pmap_interrupts_restore(uint64_t state)
+{
+ // no unknown bits?
+ assert((state & ~DAIF_ALL) == 0);
+
+ if (state != DAIF_STANDARD_DISABLE) {
+ __builtin_arm_wsr64("DAIF", state);
+ }
+}
+
+/*
+ * Query interrupt state.
+ *
+ * ml_get_interrupts_enabled() is safe enough at the time of writing
+ * this comment, but because it is not considered part of the PPL, so
+ * could change without notice, and because it presently only checks
+ * DAIF_IRQ, we have our own version.
+ *
+ * @return true if interrupts are enable (not fully disabled).
+ */
+
+static bool __attribute__((warn_unused_result)) __used
+pmap_interrupts_enabled(void)
+{
+ return (__builtin_arm_rsr64("DAIF") & DAIF_STANDARD_DISABLE) != DAIF_STANDARD_DISABLE;
+}
+#endif /* __arm64__ */
+
#if XNU_MONITOR
/*
* pmap_set_range_xprr_perm takes a range (specified using start and end) that
pmap_simple_unlock(&pt_pages_lock);
return (pmap_paddr_t)0;
} else {
- int remove_count = 0;
bool need_strong_sync = false;
vm_map_address_t va;
pmap_t pmap;
pt_entry_t *bpte, *epte;
pt_entry_t *pte_p;
tt_entry_t *tte_p;
- uint32_t rmv_spte = 0;
pmap_simple_unlock(&pt_pages_lock);
pmap = ptdp->pmap;
* which could cause the counter to drift
* more and more.
*/
- remove_count += pmap_remove_range_options(
- pmap, va, bpte, epte,
- &rmv_spte, &need_strong_sync, PMAP_OPTIONS_REMOVE);
+ pmap_remove_range_options(
+ pmap, va, bpte, epte, NULL,
+ &need_strong_sync, PMAP_OPTIONS_REMOVE);
if (ptd_get_info(ptdp, pte_p)->refcnt != 0) {
panic("%s: ptdp %p, count %d", __FUNCTION__, ptdp, ptd_get_info(ptdp, pte_p)->refcnt);
}
- pmap_tte_deallocate(pmap, tte_p, pt_attr_twig_level(pt_attr));
-
- if (remove_count > 0) {
- pmap_get_pt_ops(pmap)->flush_tlb_region_async(va, (size_t)pt_attr_leaf_table_size(pt_attr), pmap);
- } else {
- pmap_get_pt_ops(pmap)->flush_tlb_tte_async(va, pmap);
- }
+ pmap_tte_deallocate(pmap, va, va + (size_t)pt_attr_leaf_table_size(pt_attr), need_strong_sync,
+ tte_p, pt_attr_twig_level(pt_attr));
}
}
// Undo the lock we grabbed when we found ptdp above
pmap_unlock(pmap);
- pmap_sync_tlb(need_strong_sync);
}
pmap_simple_lock(&pmap_pages_lock);
}
pmap_paddr_t pa,
unsigned size)
{
- pmap_simple_lock(&pmap_pages_lock);
-
- if (pmap_pages_request_count != 0) {
+ if (__improbable(pmap_pages_request_count != 0)) {
page_free_entry_t *page_entry;
- pmap_pages_request_count--;
- page_entry = (page_free_entry_t *)phystokv(pa);
- page_entry->next = pmap_pages_reclaim_list;
- pmap_pages_reclaim_list = page_entry;
- pmap_simple_unlock(&pmap_pages_lock);
+ pmap_simple_lock(&pmap_pages_lock);
- return;
- }
+ if (pmap_pages_request_count != 0) {
+ pmap_pages_request_count--;
+ page_entry = (page_free_entry_t *)phystokv(pa);
+ page_entry->next = pmap_pages_reclaim_list;
+ pmap_pages_reclaim_list = page_entry;
+ pmap_simple_unlock(&pmap_pages_lock);
+ return;
+ }
- pmap_simple_unlock(&pmap_pages_lock);
+ pmap_simple_unlock(&pmap_pages_lock);
+ }
#if XNU_MONITOR
(void)size;
return ret;
}
+/*
+ * Routines to track and allocate physical pages during early boot.
+ * On most systems that memory runs from first_avail through to avail_end
+ * with no gaps.
+ *
+ * However if the system supports ECC and bad_ram_pages_count > 0, we
+ * need to be careful and skip those pages.
+ */
+static unsigned int avail_page_count = 0;
+static bool need_ram_ranges_init = true;
+
+#if defined(__arm64__)
+pmap_paddr_t *bad_ram_pages = NULL;
+unsigned int bad_ram_pages_count = 0;
+
+/*
+ * We use this sub-range of bad_ram_pages for pmap_next_page()
+ */
+static pmap_paddr_t *skip_pages;
+static unsigned int skip_pages_count = 0;
+
+#define MAX_BAD_RAM_PAGE_COUNT 64
+static pmap_paddr_t bad_ram_pages_arr[MAX_BAD_RAM_PAGE_COUNT];
+
+/*
+ * XXX - temporary code to get the bad pages array from boot-args.
+ * expects a comma separated list of offsets from the start
+ * of physical memory to be considered bad.
+ *
+ * HERE JOE -- will eventually be replaced by data provided by iboot
+ */
+static void
+parse_bad_ram_pages_boot_arg(void)
+{
+ char buf[256] = {0};
+ char *s = buf;
+ char *end;
+ int count = 0;
+ pmap_paddr_t num;
+ extern uint64_t strtouq(const char *, char **, int);
+
+ if (!PE_parse_boot_arg_str("bad_ram_pages", buf, sizeof(buf))) {
+ goto done;
+ }
+
+ while (*s && count < MAX_BAD_RAM_PAGE_COUNT) {
+ num = (pmap_paddr_t)strtouq(s, &end, 0);
+ if (num == 0) {
+ break;
+ }
+ num &= ~PAGE_MASK;
+
+ bad_ram_pages_arr[count++] = gDramBase + num;
+
+ if (*end != ',') {
+ break;
+ }
+
+ s = end + 1;
+ }
+
+done:
+ bad_ram_pages = bad_ram_pages_arr;
+ bad_ram_pages_count = count;
+}
+
+/*
+ * Comparison routine for qsort of array of physical addresses.
+ */
+static int
+pmap_paddr_cmp(void *a, void *b)
+{
+ pmap_paddr_t *x = a;
+ pmap_paddr_t *y = b;
+ if (*x < *y) {
+ return -1;
+ }
+ return *x > *y;
+}
+#endif /* defined(__arm64__) */
+
+/*
+ * Look up ppn in the sorted bad_ram_pages array.
+ */
+bool
+pmap_is_bad_ram(__unused ppnum_t ppn)
+{
+#if defined(__arm64__)
+ pmap_paddr_t pa = ptoa(ppn);
+ int low = 0;
+ int high = bad_ram_pages_count - 1;
+ int mid;
+
+ while (low <= high) {
+ mid = (low + high) / 2;
+ if (bad_ram_pages[mid] < pa) {
+ low = mid + 1;
+ } else if (bad_ram_pages[mid] > pa) {
+ high = mid - 1;
+ } else {
+ return true;
+ }
+ }
+#endif /* defined(__arm64__) */
+ return false;
+}
+
+/*
+ * Initialize the count of available pages. If we have bad_ram_pages, then sort the list of them.
+ * No lock needed here, as this code is called while kernel boot up is single threaded.
+ */
+static void
+initialize_ram_ranges(void)
+{
+ pmap_paddr_t first = first_avail;
+ pmap_paddr_t end = avail_end;
+
+ assert(first <= end);
+ assert(first == (first & ~PAGE_MASK));
+ assert(end == (end & ~PAGE_MASK));
+ avail_page_count = atop(end - first);
+
+#if defined(__arm64__)
+ /*
+ * XXX Temporary code for testing, until there is iboot support
+ *
+ * Parse a list of known bad pages from a boot-args.
+ */
+ parse_bad_ram_pages_boot_arg();
+
+ /*
+ * Sort and filter the bad pages list and adjust avail_page_count.
+ */
+ if (bad_ram_pages_count != 0) {
+ qsort(bad_ram_pages, bad_ram_pages_count, sizeof(*bad_ram_pages), (cmpfunc_t)pmap_paddr_cmp);
+ skip_pages = bad_ram_pages;
+ skip_pages_count = bad_ram_pages_count;
+
+ /* ignore any pages before first */
+ while (skip_pages_count > 0 && skip_pages[0] < first) {
+ --skip_pages_count;
+ ++skip_pages;
+ }
+
+ /* ignore any pages at or after end */
+ while (skip_pages_count > 0 && skip_pages[skip_pages_count - 1] >= end) {
+ --skip_pages_count;
+ }
+
+ avail_page_count -= skip_pages_count;
+ }
+#endif /* defined(__arm64__) */
+ need_ram_ranges_init = false;
+}
+
unsigned int
pmap_free_pages(
void)
{
+ if (need_ram_ranges_init) {
+ initialize_ram_ranges();
+ }
+ return avail_page_count;
+}
+
+unsigned int
+pmap_free_pages_span(
+ void)
+{
+ if (need_ram_ranges_init) {
+ initialize_ram_ranges();
+ }
return (unsigned int)atop(avail_end - first_avail);
}
pmap_next_page(
ppnum_t *pnum)
{
+ if (need_ram_ranges_init) {
+ initialize_ram_ranges();
+ }
+
+#if defined(__arm64__)
+ /*
+ * Skip over any known bad pages.
+ */
+ while (skip_pages_count > 0 && first_avail == skip_pages[0]) {
+ first_avail += PAGE_SIZE;
+ ++skip_pages;
+ --skip_pages_count;
+ }
+#endif /* defined(__arm64__) */
+
if (first_avail != avail_end) {
*pnum = (ppnum_t)atop(first_avail);
first_avail += PAGE_SIZE;
+ assert(avail_page_count > 0);
+ --avail_page_count;
return TRUE;
}
+ assert(avail_page_count == 0);
return FALSE;
}
+void
+pmap_retire_page(
+ __unused ppnum_t pnum)
+{
+ /* XXX Justin TBD - mark the page as unusable in pmap data structures */
+}
+
/*
* Initialize the pmap module.
/* Remove the TTE. */
pmap_lock(pmap);
- pmap_tte_deallocate(pmap, ttep, level);
+ pmap_tte_deallocate(pmap, 0, 0, false, ttep, level);
pmap_unlock(pmap);
}
}
for (i = 0; i < pmap->tte_index_max; i++) {
ttep = &pmap->tte[i];
if ((*ttep & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
- pmap_tte_deallocate(pmap, ttep, PMAP_TT_L1_LEVEL);
+ pmap_tte_deallocate(pmap, 0, 0, false, ttep, PMAP_TT_L1_LEVEL);
}
}
pmap_unlock(pmap);
* must have a refcnt of zero before the TTE can be removed.
*
* @param pmap The pmap containing the page table whose TTE is being removed.
+ * @param va_start Beginning of the VA range mapped by the table being removed, for TLB maintenance
+ * @param va_end Non-inclusive end of the VA range mapped by the table being removed, for TLB maintenance
+ * @param need_strong_sync Indicates whether strong DSB should be used to synchronize TLB maintenance
* @param ttep Pointer to the TTE that should be cleared out.
* @param level The level of the page table that contains the TTE to be removed.
*/
static void
pmap_tte_remove(
pmap_t pmap,
+ vm_offset_t va_start,
+ vm_offset_t va_end,
+ bool need_strong_sync,
tt_entry_t *ttep,
unsigned int level)
{
*ttep = (tt_entry_t) 0;
FLUSH_PTE_STRONG(ttep);
#endif /* (__ARM_VMSA__ == 7) */
+ // If given a VA range, we're being asked to flush the TLB before the table in ttep is freed.
+ if (va_end > va_start) {
+#if (__ARM_VMSA__ == 7)
+ // Ensure intermediate translations are flushed for each 1MB block
+ flush_mmu_tlb_entry_async((va_start & ~ARM_TT_L1_PT_OFFMASK) | (pmap->hw_asid & 0xff));
+ flush_mmu_tlb_entry_async(((va_start & ~ARM_TT_L1_PT_OFFMASK) + ARM_TT_L1_SIZE) | (pmap->hw_asid & 0xff));
+ flush_mmu_tlb_entry_async(((va_start & ~ARM_TT_L1_PT_OFFMASK) + 2 * ARM_TT_L1_SIZE) | (pmap->hw_asid & 0xff));
+ flush_mmu_tlb_entry_async(((va_start & ~ARM_TT_L1_PT_OFFMASK) + 3 * ARM_TT_L1_SIZE) | (pmap->hw_asid & 0xff));
+#endif
+ PMAP_UPDATE_TLBS(pmap, va_start, va_end, need_strong_sync);
+ }
}
/**
* must have a refcnt of PT_DESC_REFCOUNT in their page table descriptor.
*
* @param pmap The pmap that owns the page table to be deallocated.
+ * @param va_start Beginning of the VA range mapped by the table being removed, for TLB maintenance
+ * @param va_end Non-inclusive end of the VA range mapped by the table being removed, for TLB maintenance
+ * @param need_strong_sync Indicates whether strong DSB should be used to synchronize TLB maintenance
* @param ttep Pointer to the `level` TTE to remove.
* @param level The level of the table that contains an entry pointing to the
* table to be removed. The deallocated page table will be a
static void
pmap_tte_deallocate(
pmap_t pmap,
+ vm_offset_t va_start,
+ vm_offset_t va_end,
+ bool need_strong_sync,
tt_entry_t *ttep,
unsigned int level)
{
}
#endif /* MACH_ASSERT */
- pmap_tte_remove(pmap, ttep, level);
+ pmap_tte_remove(pmap, va_start, va_end, need_strong_sync, ttep, level);
if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
uint64_t pmap_page_size = pt_attr_page_size(pmap_get_pt_attr(pmap));
* entirely within one pte-page. This is NOT checked.
* Assumes that the pte-page exists.
*
- * Returns the number of PTE changed, and sets *rmv_cnt
- * to the number of SPTE changed.
+ * Returns the number of PTE changed
*/
static int
pmap_remove_range(
pmap_t pmap,
vm_map_address_t va,
pt_entry_t *bpte,
- pt_entry_t *epte,
- uint32_t *rmv_cnt)
+ pt_entry_t *epte)
{
bool need_strong_sync = false;
- int num_changed = pmap_remove_range_options(pmap, va, bpte, epte, rmv_cnt,
+ int num_changed = pmap_remove_range_options(pmap, va, bpte, epte, NULL,
&need_strong_sync, PMAP_OPTIONS_REMOVE);
if (num_changed > 0) {
PMAP_UPDATE_TLBS(pmap, va,
vm_map_address_t va,
pt_entry_t *bpte,
pt_entry_t *epte,
- uint32_t *rmv_cnt,
+ vm_map_address_t *eva,
bool *need_strong_sync __unused,
int options)
{
pt_entry_t *cpte;
+ size_t npages = 0;
int num_removed, num_unwired;
int num_pte_changed;
int pai = 0;
int num_external, num_internal, num_reusable;
int num_alt_internal;
uint64_t num_compressed, num_alt_compressed;
+ int16_t refcnt = 0;
pmap_assert_locked_w(pmap);
const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
- uint64_t pmap_page_size = pt_attr_page_size(pt_attr);
+ uint64_t pmap_page_size = PAGE_RATIO * pt_attr_page_size(pt_attr);
if (__improbable((uintptr_t)epte > (((uintptr_t)bpte + pmap_page_size) & ~(pmap_page_size - 1)))) {
panic("%s: PTE range [%p, %p) in pmap %p crosses page table boundary", __func__, bpte, epte, pmap);
num_alt_compressed = 0;
for (cpte = bpte; cpte < epte;
- cpte += 1, va += pmap_page_size) {
+ cpte += PAGE_RATIO, va += pmap_page_size) {
pt_entry_t spte;
boolean_t managed = FALSE;
+ /*
+ * Check for pending preemption on every iteration: the PV list may be arbitrarily long,
+ * so we need to be as aggressive as possible in checking for preemption when we can.
+ */
+ if (__improbable((eva != NULL) && npages++ && pmap_pending_preemption())) {
+ *eva = va;
+ break;
+ }
spte = *((volatile pt_entry_t*)cpte);
#if CONFIG_PGTRACE
* our "compressed" markers,
* so let's update it here.
*/
- if (OSAddAtomic16(-1, (SInt16 *) &(ptep_get_info(cpte)->refcnt)) <= 0) {
- panic("pmap_remove_range_options: over-release of ptdp %p for pte %p", ptep_get_ptd(cpte), cpte);
- }
+ --refcnt;
spte = *((volatile pt_entry_t*)cpte);
}
/*
(pmap != kernel_pmap)) {
assertf(!ARM_PTE_IS_COMPRESSED(spte, cpte), "unexpected compressed pte %p (=0x%llx)", cpte, (uint64_t)spte);
assertf((spte & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE, "invalid pte %p (=0x%llx)", cpte, (uint64_t)spte);
- if (OSAddAtomic16(-1, (SInt16 *) &(ptep_get_info(cpte)->refcnt)) <= 0) {
- panic("pmap_remove_range_options: over-release of ptdp %p for pte %p", ptep_get_ptd(cpte), cpte);
- }
- if (rmv_cnt) {
- (*rmv_cnt)++;
- }
+ --refcnt;
}
if (pte_is_wired(spte)) {
* Update the counts
*/
OSAddAtomic(-num_removed, (SInt32 *) &pmap->stats.resident_count);
- pmap_ledger_debit(pmap, task_ledgers.phys_mem, num_removed * pmap_page_size * PAGE_RATIO);
+ pmap_ledger_debit(pmap, task_ledgers.phys_mem, num_removed * pmap_page_size);
if (pmap != kernel_pmap) {
+ if ((refcnt != 0) && (OSAddAtomic16(refcnt, (SInt16 *) &(ptep_get_info(bpte)->refcnt)) <= 0)) {
+ panic("pmap_remove_range_options: over-release of ptdp %p for pte [%p, %p)", ptep_get_ptd(bpte), bpte, epte);
+ }
/* update pmap stats... */
OSAddAtomic(-num_unwired, (SInt32 *) &pmap->stats.wired_count);
if (num_external) {
orig_compressed);
}
/* ... and ledgers */
- pmap_ledger_debit(pmap, task_ledgers.wired_mem, (num_unwired) * pmap_page_size * PAGE_RATIO);
- pmap_ledger_debit(pmap, task_ledgers.internal, (num_internal) * pt_attr_page_size(pt_attr) * PAGE_RATIO);
- pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, (num_alt_internal) * pt_attr_page_size(pt_attr) * PAGE_RATIO);
- pmap_ledger_debit(pmap, task_ledgers.alternate_accounting_compressed, (num_alt_compressed) * pt_attr_page_size(pt_attr) * PAGE_RATIO);
- pmap_ledger_debit(pmap, task_ledgers.internal_compressed, (num_compressed) * pt_attr_page_size(pt_attr) * PAGE_RATIO);
+ pmap_ledger_debit(pmap, task_ledgers.wired_mem, (num_unwired) * pmap_page_size);
+ pmap_ledger_debit(pmap, task_ledgers.internal, (num_internal) * pmap_page_size);
+ pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, (num_alt_internal) * pmap_page_size);
+ pmap_ledger_debit(pmap, task_ledgers.alternate_accounting_compressed, (num_alt_compressed) * pmap_page_size);
+ pmap_ledger_debit(pmap, task_ledgers.internal_compressed, (num_compressed) * pmap_page_size);
/* make needed adjustments to phys_footprint */
pmap_ledger_debit(pmap, task_ledgers.phys_footprint,
((num_internal -
num_alt_internal) +
(num_compressed -
- num_alt_compressed)) * pmap_page_size * PAGE_RATIO);
+ num_alt_compressed)) * pmap_page_size);
}
/* flush the ptable entries we have written */
pmap_remove_options(pmap, start, end, PMAP_OPTIONS_REMOVE);
}
-MARK_AS_PMAP_TEXT static int
+MARK_AS_PMAP_TEXT static vm_map_address_t
pmap_remove_options_internal(
pmap_t pmap,
vm_map_address_t start,
vm_map_address_t end,
int options)
{
- int remove_count = 0;
+ vm_map_address_t eva = end;
pt_entry_t *bpte, *epte;
pt_entry_t *pte_p;
tt_entry_t *tte_p;
- uint32_t rmv_spte = 0;
+ int remove_count = 0;
bool need_strong_sync = false;
- bool flush_tte = false;
if (__improbable(end < start)) {
panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
bpte = &pte_p[pte_index(pmap, pt_attr, start)];
epte = bpte + ((end - start) >> pt_attr_leaf_shift(pt_attr));
- remove_count += pmap_remove_range_options(pmap, start, bpte, epte,
- &rmv_spte, &need_strong_sync, options);
+ remove_count = pmap_remove_range_options(pmap, start, bpte, epte, &eva,
+ &need_strong_sync, options);
- if (rmv_spte && (ptep_get_info(pte_p)->refcnt == 0) &&
- (pmap != kernel_pmap) && (pmap->nested == FALSE)) {
- pmap_tte_deallocate(pmap, tte_p, pt_attr_twig_level(pt_attr));
- flush_tte = true;
+ if ((pmap != kernel_pmap) && (pmap->nested == FALSE) && (ptep_get_info(pte_p)->refcnt == 0)) {
+ pmap_tte_deallocate(pmap, start, eva, need_strong_sync, tte_p, pt_attr_twig_level(pt_attr));
+ remove_count = 0; // pmap_tte_deallocate has flushed the TLB for us
}
}
pmap_unlock(pmap);
if (remove_count > 0) {
- PMAP_UPDATE_TLBS(pmap, start, end, need_strong_sync);
- } else if (flush_tte) {
- pmap_get_pt_ops(pmap)->flush_tlb_tte_async(start, pmap);
- sync_tlb_flush();
+ PMAP_UPDATE_TLBS(pmap, start, eva, need_strong_sync);
}
- return remove_count;
+ return eva;
}
void
vm_map_address_t end,
int options)
{
- int remove_count = 0;
vm_map_address_t va;
if (pmap == PMAP_NULL) {
pmap, (uint64_t)start, (uint64_t)end);
}
#endif
+ assert(get_preemption_level() == 0);
/*
* Invalidate the translation buffer first
}
#if XNU_MONITOR
- remove_count += pmap_remove_options_ppl(pmap, va, l, options);
+ va = pmap_remove_options_ppl(pmap, va, l, options);
pmap_ledger_check_balance(pmap);
#else
- remove_count += pmap_remove_options_internal(pmap, va, l, options);
+ va = pmap_remove_options_internal(pmap, va, l, options);
#endif
-
- va = l;
}
PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END);
remove = FALSE;
break;
default:
+ /* PPL security model requires that we flush TLBs before we exit if the page may be recycled. */
+ options = options & ~PMAP_OPTIONS_NOFLUSH;
remove = TRUE;
break;
}
if (*pte_p != ARM_PTE_TYPE_FAULT &&
!ARM_PTE_IS_COMPRESSED(*pte_p, pte_p) &&
*pte_p != tmplate) {
- WRITE_PTE_STRONG(pte_p, tmplate);
+ if (options & PMAP_OPTIONS_NOFLUSH) {
+ WRITE_PTE_FAST(pte_p, tmplate);
+ } else {
+ WRITE_PTE_STRONG(pte_p, tmplate);
+ }
update = TRUE;
}
}
/* Invalidate TLBs for all CPUs using it */
- if (update) {
+ if (update && !(options & PMAP_OPTIONS_NOFLUSH)) {
if (remove || !flush_range ||
((flush_range->ptfr_pmap != pmap) || va >= flush_range->ptfr_end || va < flush_range->ptfr_start)) {
pmap_get_pt_ops(pmap)->flush_tlb_region_async(va,
}
}
- UNLOCK_PVH(pai);
-
if (flush_range && tlb_flush_needed) {
if (!remove) {
flush_range->ptfr_flush_needed = true;
tlb_flush_needed = FALSE;
}
}
- if (tlb_flush_needed) {
+
+ /*
+ * If we removed PV entries, ensure prior TLB flushes are complete before we drop the PVH
+ * lock to allow the backing pages to be repurposed. This is a security precaution, aimed
+ * primarily at XNU_MONITOR configurations, to reduce the likelihood of an attacker causing
+ * a page to be repurposed while it is still live in the TLBs.
+ */
+ if (remove && tlb_flush_needed) {
+ sync_tlb_flush();
+ }
+
+ UNLOCK_PVH(pai);
+
+ if (!remove && tlb_flush_needed) {
sync_tlb_flush();
}
pmap_page_protect_options_internal(
ppnum_t ppnum,
vm_prot_t prot,
- unsigned int options)
+ unsigned int options,
+ void *arg)
{
+ if (arg != NULL) {
+ /*
+ * If the argument is non-NULL, the VM layer is conveying its intention that the TLBs should
+ * ultimately be flushed. The nature of ARM TLB maintenance is such that we can flush the
+ * TLBs much more precisely if we do so inline with the pagetable updates, and PPL security
+ * model requires that we not exit the PPL without performing required TLB flushes anyway.
+ * In that case, force the flush to take place.
+ */
+ options &= ~PMAP_OPTIONS_NOFLUSH;
+ }
pmap_page_protect_options_with_flush_range(ppnum, prot, options, NULL);
}
ppnum_t ppnum,
vm_prot_t prot,
unsigned int options,
- __unused void *arg)
+ void *arg)
{
pmap_paddr_t phys = ptoa(ppnum);
PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START, ppnum, prot);
#if XNU_MONITOR
- pmap_page_protect_options_ppl(ppnum, prot, options);
+ pmap_page_protect_options_ppl(ppnum, prot, options, arg);
#else
- pmap_page_protect_options_internal(ppnum, prot, options);
+ pmap_page_protect_options_internal(ppnum, prot, options, arg);
#endif
PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END);
pmap_protect_options(pmap, b, e, prot, 0, NULL);
}
-MARK_AS_PMAP_TEXT static void
+MARK_AS_PMAP_TEXT static vm_map_address_t
pmap_protect_options_internal(
pmap_t pmap,
vm_map_address_t start,
break;
case VM_PROT_READ | VM_PROT_WRITE:
case VM_PROT_ALL:
- return; /* nothing to do */
+ return end; /* nothing to do */
default:
should_have_removed = TRUE;
}
set_NX = TRUE;
}
+ const uint64_t pmap_page_size = PAGE_RATIO * pt_attr_page_size(pt_attr);
+ vm_map_address_t va = start;
+ unsigned int npages = 0;
+
VALIDATE_PMAP(pmap);
pmap_lock(pmap);
for (pte_p = bpte_p;
pte_p < epte_p;
- pte_p += PAGE_RATIO) {
+ pte_p += PAGE_RATIO, va += pmap_page_size) {
+ ++npages;
+ if (__improbable(!(npages % PMAP_DEFAULT_PREEMPTION_CHECK_PAGE_INTERVAL) &&
+ pmap_pending_preemption())) {
+ break;
+ }
pt_entry_t spte;
#if DEVELOPMENT || DEBUG
boolean_t force_write = FALSE;
UNLOCK_PVH(pai);
}
}
- FLUSH_PTE_RANGE_STRONG(bpte_p, epte_p);
- PMAP_UPDATE_TLBS(pmap, start, end, need_strong_sync);
+ FLUSH_PTE_RANGE_STRONG(bpte_p, pte_p);
+ PMAP_UPDATE_TLBS(pmap, start, va, need_strong_sync);
+ } else {
+ va = end;
}
pmap_unlock(pmap);
+ return va;
}
void
pmap, (uint64_t)b, (uint64_t)e);
}
+ assert(get_preemption_level() == 0);
+
#if DEVELOPMENT || DEBUG
if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
if ((prot & VM_PROT_ALL) == VM_PROT_NONE) {
}
#if XNU_MONITOR
- pmap_protect_options_ppl(pmap, beg, l, prot, options, args);
+ beg = pmap_protect_options_ppl(pmap, beg, l, prot, options, args);
#else
- pmap_protect_options_internal(pmap, beg, l, prot, options, args);
+ beg = pmap_protect_options_internal(pmap, beg, l, prot, options, args);
#endif
-
- beg = l;
}
PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_END);
VALIDATE_PMAP(pmap);
+#if XNU_MONITOR
+ if (__improbable((options & PMAP_OPTIONS_NOWAIT) == 0)) {
+ panic("pmap_enter_options() called without PMAP_OPTIONS_NOWAIT set");
+ }
+#endif
+
__unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
if ((v) & pt_attr_leaf_offmask(pt_attr)) {
spte = *pte_p;
- if (ARM_PTE_IS_COMPRESSED(spte, pte_p)) {
+ if (ARM_PTE_IS_COMPRESSED(spte, pte_p) && !refcnt_updated) {
/*
* "pmap" should be locked at this point, so this should
* not race with another pmap_enter() or pmap_remove_range().
}
if ((spte != ARM_PTE_TYPE_FAULT) && (pte_to_pa(spte) != pa)) {
- pmap_remove_range(pmap, v, pte_p, pte_p + PAGE_RATIO, 0);
+ pmap_remove_range(pmap, v, pte_p, pte_p + PAGE_RATIO);
}
pte = pa_to_pte(pa) | ARM_PTE_TYPE;
vm_map_address_t nest_vaddr;
pt_entry_t *nest_pte_p;
- nest_vaddr = v - pmap->nested_region_addr + pmap->nested_region_addr;
+ nest_vaddr = v;
if ((nest_vaddr >= pmap->nested_region_addr)
&& (nest_vaddr < (pmap->nested_region_addr + pmap->nested_region_size))
#endif
if (prot & VM_PROT_WRITE) {
if (pa_valid(pa) && (!pa_test_bits(pa, PP_ATTR_MODIFIED))) {
+ assert(!pmap->nested); /* no write access in a nested pmap */
if (fault_type & VM_PROT_WRITE) {
if (set_XO) {
pte |= pt_attr_leaf_rwna(pt_attr);
} else {
pte |= pt_attr_leaf_ro(pt_attr);
}
- pa_set_bits(pa, PP_ATTR_REFERENCED);
+ /*
+ * Mark the page as MODFAULT so that a subsequent write
+ * may be handled through arm_fast_fault().
+ */
+ pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODFAULT);
pte_set_was_writeable(pte, true);
}
} else {
vm_prot_t allow_mode = VM_PROT_ALL;
#if XNU_MONITOR
- if (bits & PP_ATTR_PPL_OWNED_BITS) {
+ if (__improbable(bits & PP_ATTR_PPL_OWNED_BITS)) {
panic("%s: illegal request, "
"pn=%u, bits=%#x, options=%#x, arg=%p, flush_range=%p",
__FUNCTION__,
pn, bits, options, arg, flush_range);
}
#endif
+ if ((arg != NULL) || (flush_range != NULL)) {
+ options = options & ~PMAP_OPTIONS_NOFLUSH;
+ }
- if ((bits & PP_ATTR_MODIFIED) &&
- (options & PMAP_OPTIONS_NOFLUSH) &&
- (arg == NULL) &&
- (flush_range == NULL)) {
+ if (__improbable((bits & PP_ATTR_MODIFIED) &&
+ (options & PMAP_OPTIONS_NOFLUSH))) {
panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p,%p): "
"should not clear 'modified' without flushing TLBs\n",
pn, bits, options, arg, flush_range);
if (options & PMAP_OPTIONS_CLEAR_WRITE) {
assert(bits == PP_ATTR_MODIFIED);
- pmap_page_protect_options_with_flush_range(pn, (VM_PROT_ALL & ~VM_PROT_WRITE), 0, flush_range);
+ pmap_page_protect_options_with_flush_range(pn, (VM_PROT_ALL & ~VM_PROT_WRITE), options, flush_range);
/*
* We short circuit this case; it should not need to
* invoke arm_force_fast_fault, so just clear the modified bit.
}
#if __ARM_RANGE_TLBI__
-MARK_AS_PMAP_TEXT static void
+MARK_AS_PMAP_TEXT static vm_map_address_t
phys_attribute_clear_twig_internal(
pmap_t pmap,
vm_map_address_t start,
const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
assert(end >= start);
assert((end - start) <= pt_attr_twig_size(pt_attr));
+ const uint64_t pmap_page_size = pt_attr_page_size(pt_attr);
+ vm_map_address_t va = start;
pt_entry_t *pte_p, *start_pte_p, *end_pte_p, *curr_pte_p;
tt_entry_t *tte_p;
tte_p = pmap_tte(pmap, start);
+ unsigned int npages = 0;
if (tte_p == (tt_entry_t *) NULL) {
- return;
+ return end;
}
if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
start_pte_p = &pte_p[pte_index(pmap, pt_attr, start)];
end_pte_p = start_pte_p + ((end - start) >> pt_attr_leaf_shift(pt_attr));
assert(end_pte_p >= start_pte_p);
- for (curr_pte_p = start_pte_p; curr_pte_p < end_pte_p; curr_pte_p++) {
+ for (curr_pte_p = start_pte_p; curr_pte_p < end_pte_p; curr_pte_p++, va += pmap_page_size) {
+ if (__improbable(npages++ && pmap_pending_preemption())) {
+ return va;
+ }
pmap_paddr_t pa = pte_to_pa(*((volatile pt_entry_t*)curr_pte_p));
if (pa_valid(pa)) {
ppnum_t pn = (ppnum_t) atop(pa);
}
}
}
+ return end;
}
-MARK_AS_PMAP_TEXT static void
+MARK_AS_PMAP_TEXT static vm_map_address_t
phys_attribute_clear_range_internal(
pmap_t pmap,
vm_map_address_t start,
curr_end = end;
}
- phys_attribute_clear_twig_internal(pmap, va, curr_end, bits, options, &flush_range);
- va = curr_end;
+ va = phys_attribute_clear_twig_internal(pmap, va, curr_end, bits, options, &flush_range);
+ if ((va < curr_end) || pmap_pending_preemption()) {
+ break;
+ }
}
pmap_unlock_ro(pmap);
if (flush_range.ptfr_flush_needed) {
+ flush_range.ptfr_end = va;
pmap_get_pt_ops(pmap)->flush_tlb_region_async(
flush_range.ptfr_start,
flush_range.ptfr_end - flush_range.ptfr_start,
flush_range.ptfr_pmap);
sync_tlb_flush();
}
+ return va;
}
static void
unsigned int bits,
unsigned int options)
{
+ assert(get_preemption_level() == 0);
+
PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR_RANGE) | DBG_FUNC_START, bits);
+ while (start < end) {
#if XNU_MONITOR
- phys_attribute_clear_range_ppl(pmap, start, end, bits, options);
+ start = phys_attribute_clear_range_ppl(pmap, start, end, bits, options);
#else
- phys_attribute_clear_range_internal(pmap, start, end, bits, options);
+ start = phys_attribute_clear_range_internal(pmap, start, end, bits, options);
#endif
+ }
PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR_RANGE) | DBG_FUNC_END);
}
#endif /* MACH_ASSERT && XNU_MONITOR */
if (result && update_pte) {
- if (*pte_p != ARM_PTE_TYPE_FAULT &&
- !ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
+ if (options & PMAP_OPTIONS_NOFLUSH) {
+ WRITE_PTE_FAST(pte_p, tmplate);
+ } else {
WRITE_PTE_STRONG(pte_p, tmplate);
if (!flush_range ||
((flush_range->ptfr_pmap != pmap) || va >= flush_range->ptfr_end || va < flush_range->ptfr_start)) {
pt_attr_page_size(pt_attr) * PAGE_RATIO, pmap);
}
tlb_flush_needed = TRUE;
- } else {
- WRITE_PTE(pte_p, tmplate);
- __builtin_arm_isb(ISB_SY);
}
}
vm_prot_t allow_mode,
int options)
{
- if (__improbable((options & PMAP_OPTIONS_FF_LOCKED) != 0)) {
+ if (__improbable((options & (PMAP_OPTIONS_FF_LOCKED | PMAP_OPTIONS_NOFLUSH)) != 0)) {
panic("arm_force_fast_fault(0x%x, 0x%x, 0x%x): invalid options", ppnum, allow_mode, options);
}
return arm_force_fast_fault_with_flush_range(ppnum, allow_mode, options, NULL);
if (pmap == kernel_pmap) {
tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RWNA));
} else {
+ assert(!pmap->nested); /* no write access in a nested pmap */
tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_rw(pmap_get_pt_attr(pmap)));
}
}
pmap_paddr_t pa;
VALIDATE_PMAP(pmap);
- pmap_lock(pmap);
+ pmap_lock_ro(pmap);
/*
* If the entry doesn't exist, is completely invalid, or is already
if ((spte == ARM_PTE_TYPE_FAULT) ||
ARM_PTE_IS_COMPRESSED(spte, ptep)) {
- pmap_unlock(pmap);
+ pmap_unlock_ro(pmap);
return result;
}
if (!pa_valid(pa)) {
- pmap_unlock(pmap);
+ pmap_unlock_ro(pmap);
#if XNU_MONITOR
if (pmap_cache_attributes((ppnum_t)atop(pa)) & PP_ATTR_MONITOR) {
return KERN_PROTECTION_FAILURE;
break;
}
} else {
- pmap_unlock(pmap);
+ pmap_unlock_ro(pmap);
return result;
}
- if ((IS_REFFAULT_PAGE(pai)) ||
- ((fault_type & VM_PROT_WRITE) && IS_MODFAULT_PAGE(pai))) {
+ if ((result != KERN_SUCCESS) &&
+ ((IS_REFFAULT_PAGE(pai)) || ((fault_type & VM_PROT_WRITE) && IS_MODFAULT_PAGE(pai)))) {
/*
* An attempted access will always clear ref/mod fault state, as
* appropriate for the fault type. arm_clear_fast_fault will
}
}
+ /*
+ * If the PTE already has sufficient permissions, we can report the fault as handled.
+ * This may happen, for example, if multiple threads trigger roughly simultaneous faults
+ * on mappings of the same page
+ */
+ if ((result == KERN_FAILURE) && (spte & ARM_PTE_AF)) {
+ uintptr_t ap_ro, ap_rw, ap_x;
+ if (pmap == kernel_pmap) {
+ ap_ro = ARM_PTE_AP(AP_RONA);
+ ap_rw = ARM_PTE_AP(AP_RWNA);
+ ap_x = ARM_PTE_NX;
+ } else {
+ ap_ro = pt_attr_leaf_ro(pmap_get_pt_attr(pmap));
+ ap_rw = pt_attr_leaf_rw(pmap_get_pt_attr(pmap));
+ ap_x = pt_attr_leaf_x(pmap_get_pt_attr(pmap));
+ }
+ /*
+ * NOTE: this doesn't currently handle user-XO mappings. Depending upon the
+ * hardware they may be xPRR-protected, in which case they'll be handled
+ * by the is_pte_xprr_protected() case above. Additionally, the exception
+ * handling path currently does not call arm_fast_fault() without at least
+ * VM_PROT_READ in fault_type.
+ */
+ if (((spte & ARM_PTE_APMASK) == ap_rw) ||
+ (!(fault_type & VM_PROT_WRITE) && ((spte & ARM_PTE_APMASK) == ap_ro))) {
+ if (!(fault_type & VM_PROT_EXECUTE) || ((spte & ARM_PTE_XMASK) == ap_x)) {
+ result = KERN_SUCCESS;
+ }
+ }
+ }
+
UNLOCK_PVH(pai);
- pmap_unlock(pmap);
+ pmap_unlock_ro(pmap);
return result;
}
adjust_offmask = pt_attr_leaf_table_offmask(pt_attr);
adjusted_start = ((start + adjust_offmask) & ~adjust_offmask);
adjusted_end = end & ~adjust_offmask;
- bool modified = false;
/* Iterate over the range, trying to remove TTEs. */
for (cur = adjusted_start; (cur < adjusted_end) && (cur >= adjusted_start); cur += pt_attr_twig_size(pt_attr)) {
(pmap != kernel_pmap)) {
if (pmap->nested == TRUE) {
/* Deallocate for the nested map. */
- pmap_tte_deallocate(pmap, tte_p, pt_attr_twig_level(pt_attr));
+ pmap_tte_deallocate(pmap, cur, cur + PAGE_SIZE, false, tte_p, pt_attr_twig_level(pt_attr));
} else {
/* Just remove for the parent map. */
- pmap_tte_remove(pmap, tte_p, pt_attr_twig_level(pt_attr));
+ pmap_tte_remove(pmap, cur, cur + PAGE_SIZE, false, tte_p, pt_attr_twig_level(pt_attr));
}
-
- pmap_get_pt_ops(pmap)->flush_tlb_tte_async(cur, pmap);
- modified = true;
}
}
pmap_unlock(pmap);
}
- if (modified) {
- sync_tlb_flush();
- }
-
#if (__ARM_VMSA__ > 7)
/* Remove empty L2 TTs. */
adjusted_start = ((start + pt_attr_ln_offmask(pt_attr, PMAP_TT_L1_LEVEL)) & ~pt_attr_ln_offmask(pt_attr, PMAP_TT_L1_LEVEL));
}
if (remove_tt1e) {
- pmap_tte_deallocate(pmap, tt1e_p, PMAP_TT_L1_LEVEL);
- PMAP_UPDATE_TLBS(pmap, cur, cur + PAGE_SIZE, false);
+ pmap_tte_deallocate(pmap, cur, cur + PAGE_SIZE, false, tt1e_p, PMAP_TT_L1_LEVEL);
}
pmap_unlock(pmap);
pmap_sign_user_ptr_internal(void *value, ptrauth_key key, uint64_t discriminator, uint64_t jop_key)
{
void *res = NULL;
- boolean_t current_intr_state = ml_set_interrupts_enabled(FALSE);
+ uint64_t current_intr_state = pmap_interrupts_disable();
uint64_t saved_jop_state = ml_enable_user_jop_key(jop_key);
switch (key) {
}
ml_disable_user_jop_key(jop_key, saved_jop_state);
- ml_set_interrupts_enabled(current_intr_state);
+ pmap_interrupts_restore(current_intr_state);
return res;
}
}
void *res = NULL;
- boolean_t current_intr_state = ml_set_interrupts_enabled(FALSE);
+ uint64_t current_intr_state = pmap_interrupts_disable();
uint64_t saved_jop_state = ml_enable_user_jop_key(jop_key);
res = ml_auth_ptr_unchecked(value, key, discriminator);
ml_disable_user_jop_key(jop_key, saved_jop_state);
- ml_set_interrupts_enabled(current_intr_state);
+ pmap_interrupts_restore(current_intr_state);
return res;
}
#endif
}
-MARK_AS_PMAP_TEXT static void
-flush_mmu_tlb_tte_asid_async(vm_offset_t va, pmap_t pmap)
-{
-#if (__ARM_VMSA__ == 7)
- flush_mmu_tlb_entry_async((va & ~ARM_TT_L1_PT_OFFMASK) | (pmap->hw_asid & 0xff));
- flush_mmu_tlb_entry_async(((va & ~ARM_TT_L1_PT_OFFMASK) + ARM_TT_L1_SIZE) | (pmap->hw_asid & 0xff));
- flush_mmu_tlb_entry_async(((va & ~ARM_TT_L1_PT_OFFMASK) + 2 * ARM_TT_L1_SIZE) | (pmap->hw_asid & 0xff));
- flush_mmu_tlb_entry_async(((va & ~ARM_TT_L1_PT_OFFMASK) + 3 * ARM_TT_L1_SIZE) | (pmap->hw_asid & 0xff));
-#else
- flush_mmu_tlb_entry_async(tlbi_addr(va & ~pt_attr_twig_offmask(pmap_get_pt_attr(pmap))) | tlbi_asid(pmap->hw_asid));
-#endif
-}
-
MARK_AS_PMAP_TEXT static void
flush_mmu_tlb_full_asid_async(pmap_t pmap)
{
UNLOCK_PVH(pai);
- pmap_page_protect_options_internal((ppnum_t)atop(pa), VM_PROT_READ, 0);
+ pmap_page_protect_options_internal((ppnum_t)atop(pa), VM_PROT_READ, 0, NULL);
}
/*
#define PMAP_PGTRACE_LOCK(p) \
do { \
- *(p) = ml_set_interrupts_enabled(false); \
+ *(p) = pmap_interrupts_disable(); \
if (simple_lock_try(&(pmap_pgtrace.lock), LCK_GRP_NULL)) break; \
- ml_set_interrupts_enabled(*(p)); \
+ pmap_interrupts_restore(*(p)); \
} while (true)
#define PMAP_PGTRACE_UNLOCK(p) \
do { \
simple_unlock(&(pmap_pgtrace.lock)); \
- ml_set_interrupts_enabled(*(p)); \
+ pmap_interrupts_restore(*(p)); \
} while (0)
#define PGTRACE_WRITE_PTE(pte_p, pte_entry) \
static bool
pmap_pgtrace_enter_clone(pmap_t pmap, vm_map_offset_t va_page, vm_map_offset_t start, vm_map_offset_t end)
{
- bool ints;
+ uint64_t ints;
queue_head_t *q = &(pmap_pgtrace.pages);
pmap_paddr_t pa_page;
pt_entry_t *ptep, *cptep;
static void
pmap_pgtrace_remove_clone(pmap_t pmap, pmap_paddr_t pa, vm_map_offset_t va)
{
- bool ints, found = false;
+ uint64_t ints, found = false;
pmap_pgtrace_page_t *p;
pt_entry_t *ptep;
static void
pmap_pgtrace_remove_all_clone(pmap_paddr_t pa)
{
- bool ints;
+ uint64_t ints;
pmap_pgtrace_page_t *p;
pt_entry_t *ptep;
int ret = 0;
pt_entry_t *ptep;
queue_head_t *q = &(pmap_pgtrace.pages);
- bool ints;
+ uint64_t ints;
vm_map_offset_t cur_page, end_page;
if (start > end) {
pmap_pgtrace_delete_page(pmap_t pmap, vm_map_offset_t start, vm_map_offset_t end)
{
int ret = 0;
- bool ints;
+ uint64_t ints;
queue_head_t *q = &(pmap_pgtrace.pages);
pmap_pgtrace_page_t *p;
vm_map_offset_t cur_page, end_page;
pt_entry_t *ptep;
pgtrace_run_result_t res;
pmap_pgtrace_page_t *p;
- bool ints, found = false;
+ uint64_t ints, found = false;
pmap_paddr_t pa;
// Quick check if we are interested
#endif
}
+MARK_AS_PMAP_TEXT static void
+pmap_set_compilation_service_cdhash_internal(const uint8_t cdhash[CS_CDHASH_LEN])
+{
+ pmap_simple_lock(&pmap_compilation_service_cdhash_lock);
+ memcpy(pmap_compilation_service_cdhash, cdhash, CS_CDHASH_LEN);
+ pmap_simple_unlock(&pmap_compilation_service_cdhash_lock);
+
+ pmap_cs_log("Added Compilation Service CDHash through the PPL: 0x%02X 0x%02X 0x%02X 0x%02X", cdhash[0], cdhash[1], cdhash[2], cdhash[4]);
+}
+
+MARK_AS_PMAP_TEXT static bool
+pmap_match_compilation_service_cdhash_internal(const uint8_t cdhash[CS_CDHASH_LEN])
+{
+ bool match = false;
+
+ pmap_simple_lock(&pmap_compilation_service_cdhash_lock);
+ if (bcmp(pmap_compilation_service_cdhash, cdhash, CS_CDHASH_LEN) == 0) {
+ match = true;
+ }
+ pmap_simple_unlock(&pmap_compilation_service_cdhash_lock);
+
+ if (match) {
+ pmap_cs_log("Matched Compilation Service CDHash through the PPL");
+ }
+
+ return match;
+}
+
+void
+pmap_set_compilation_service_cdhash(const uint8_t cdhash[CS_CDHASH_LEN])
+{
+#if XNU_MONITOR
+ pmap_set_compilation_service_cdhash_ppl(cdhash);
+#else
+ pmap_set_compilation_service_cdhash_internal(cdhash);
+#endif
+}
+
+bool
+pmap_match_compilation_service_cdhash(const uint8_t cdhash[CS_CDHASH_LEN])
+{
+#if XNU_MONITOR
+ return pmap_match_compilation_service_cdhash_ppl(cdhash);
+#else
+ return pmap_match_compilation_service_cdhash_internal(cdhash);
+#endif
+}
MARK_AS_PMAP_TEXT static void
pmap_footprint_suspend_internal(
* disable interrupts and preemption to avoid any unexpected memory
* accesses.
*/
- boolean_t old_int_state = ml_set_interrupts_enabled(false);
+ uint64_t old_int_state = pmap_interrupts_disable();
pmap_t old_pmap = current_pmap();
mp_disable_preemption();
pmap_switch(pmap);
pmap_switch(old_pmap);
mp_enable_preemption();
- ml_set_interrupts_enabled(old_int_state);
+ pmap_interrupts_restore(old_int_state);
bool retval = (took_fault == should_fault);
return retval;
}
return KERN_SUCCESS;
}
#endif /* CONFIG_XNUPOST */
+
+/*
+ * The following function should never make it to RELEASE code, since
+ * it provides a way to get the PPL to modify text pages.
+ */
+#if DEVELOPMENT || DEBUG
+
+#define ARM_UNDEFINED_INSN 0xe7f000f0
+#define ARM_UNDEFINED_INSN_THUMB 0xde00
+
+/**
+ * Forcibly overwrite executable text with an illegal instruction.
+ *
+ * @note Only used for xnu unit testing.
+ *
+ * @param pa The physical address to corrupt.
+ *
+ * @return KERN_SUCCESS on success.
+ */
+kern_return_t
+pmap_test_text_corruption(pmap_paddr_t pa)
+{
+#if XNU_MONITOR
+ return pmap_test_text_corruption_ppl(pa);
+#else /* XNU_MONITOR */
+ return pmap_test_text_corruption_internal(pa);
+#endif /* XNU_MONITOR */
+}
+
+MARK_AS_PMAP_TEXT kern_return_t
+pmap_test_text_corruption_internal(pmap_paddr_t pa)
+{
+ vm_offset_t va = phystokv(pa);
+ unsigned int pai = pa_index(pa);
+
+ assert(pa_valid(pa));
+
+ LOCK_PVH(pai);
+
+ pv_entry_t **pv_h = pai_to_pvh(pai);
+ assert(!pvh_test_type(pv_h, PVH_TYPE_NULL));
+#if defined(PVH_FLAG_EXEC)
+ const bool need_ap_twiddle = pvh_get_flags(pv_h) & PVH_FLAG_EXEC;
+
+ if (need_ap_twiddle) {
+ pmap_set_ptov_ap(pai, AP_RWNA, FALSE);
+ }
+#endif /* defined(PVH_FLAG_EXEC) */
+
+ /*
+ * The low bit in an instruction address indicates a THUMB instruction
+ */
+ if (va & 1) {
+ va &= ~(vm_offset_t)1;
+ *(uint16_t *)va = ARM_UNDEFINED_INSN_THUMB;
+ } else {
+ *(uint32_t *)va = ARM_UNDEFINED_INSN;
+ }
+
+#if defined(PVH_FLAG_EXEC)
+ if (need_ap_twiddle) {
+ pmap_set_ptov_ap(pai, AP_RONA, FALSE);
+ }
+#endif /* defined(PVH_FLAG_EXEC) */
+
+ InvalidatePoU_IcacheRegion(va, sizeof(uint32_t));
+
+ UNLOCK_PVH(pai);
+
+ return KERN_SUCCESS;
+}
+
+#endif /* DEVELOPMENT || DEBUG */
#include <mach/kern_return.h>
#include <mach/machine/vm_types.h>
#include <arm/pmap_public.h>
+#include <kern/ast.h>
#include <mach/arm/thread_status.h>
#if defined(__arm64__)
#include <arm64/tlb.h>
#define PMAP_GC_WAIT 2
#if DEVELOPMENT || DEBUG
-#define pmap_cs_log_h(msg, args...) { if(pmap_cs_log_hacks) printf("PMAP_CS: " msg "\n", args); }
+#define pmap_cs_log_h(msg, args...) { if(pmap_cs_log_hacks) printf("PMAP_CS: " msg "\n", ##args); }
#define pmap_cs_log pmap_cs_log_h
#else
#if HAS_APPLE_PAC
extern void * pmap_sign_user_ptr(void *value, ptrauth_key key, uint64_t data, uint64_t jop_key);
extern void * pmap_auth_user_ptr(void *value, ptrauth_key key, uint64_t data, uint64_t jop_key);
-#endif /* HAS_APPLE_PAC && XNU_MONITOR */
+#endif /* HAS_APPLE_PAC */
/*
* Interfaces implemented as macros.
#define PMAP_LEDGER_ALLOC_INDEX 58
#define PMAP_LEDGER_FREE_INDEX 59
-#if HAS_APPLE_PAC && XNU_MONITOR
+#if HAS_APPLE_PAC
#define PMAP_SIGN_USER_PTR 60
#define PMAP_AUTH_USER_PTR 61
-#endif /* HAS_APPLE_PAC && XNU_MONITOR */
+#endif /* HAS_APPLE_PAC */
#define PHYS_ATTRIBUTE_CLEAR_RANGE_INDEX 66
#define PMAP_SET_VM_MAP_CS_ENFORCED_INDEX 72
+#define PMAP_SET_COMPILATION_SERVICE_CDHASH_INDEX 73
+#define PMAP_MATCH_COMPILATION_SERVICE_CDHASH_INDEX 74
+
-#define PMAP_COUNT 74
+#if DEVELOPMENT || DEBUG
+#define PMAP_TEST_TEXT_CORRUPTION_INDEX 76
+#endif /* DEVELOPMENT || DEBUG */
+
+#define PMAP_COUNT 77
#define PMAP_INVALID_CPU_NUM (~0U)
/* Get the pmap per-CPU data for the current CPU. */
extern pmap_cpu_data_t * pmap_get_cpu_data(void);
+/*
+ * For most batched page operations, we pick a sane default page count
+ * interval at which to check for pending preemption and exit the PPL if found.
+ */
+#define PMAP_DEFAULT_PREEMPTION_CHECK_PAGE_INTERVAL 64
+
+inline bool
+pmap_pending_preemption(void)
+{
+ return !!(*((volatile ast_t*)ast_pending()) & AST_URGENT);
+}
+
#if XNU_MONITOR
extern boolean_t pmap_ppl_locked_down;
#define pmap_unlock_bit(l, i) hw_unlock_bit(l, i)
#endif
+#if DEVELOPMENT || DEBUG
+extern kern_return_t pmap_test_text_corruption(pmap_paddr_t);
+#endif /* DEVELOPMENT || DEBUG */
+
#endif /* #ifndef ASSEMBLER */
#if __ARM_KERNEL_PROTECT__
#define ARM_PTE_PNX 0x00000000 /* no privilege execute. not impl */
#define ARM_PTE_PNX_MASK (0<<ARM_PTE_NXSHIFT)
+#define ARM_PTE_XMASK (ARM_PTE_PNX_MASK | ARM_PTE_NX_MASK)
+
#define ARM_PTE_TEX0SHIFT 6
#define ARM_PTE_TEX0 (1<<ARM_PTE_TEX0SHIFT)
#define ARM_PTE_TEX0_MASK (1<<ARM_PTE_TEX0SHIFT)
cpu_data_t * cdp;
clock_timebase_init();
- ml_init_lock_timeout();
+
+ if (cpu_number() == master_cpu) {
+ ml_init_lock_timeout();
+ }
cdp = getCpuDatap();
+#define TASK_ADDITIONS_UEXC uint64_t uexc[4];
#define MACHINE_TASK \
void* task_debug; \
TASK_ADDITIONS_PAC \
-
+\
+ TASK_ADDITIONS_UEXC
tlb.h \
$(ARM_HEADER_FILES)
+# Headers installed into System.framework/PrivateHeaders
+INSTALL_MD_LCL_LIST = \
+ $(ARM_PRIVATE_HEADERS)
+
# TODO: Is there a reason that machine_machdep.h is not in this list? If not, these lists can be consolidated.
# Headers used to compile xnu
EXPORT_MD_LIST = \
# These headers will be available with #include <arm64/header_file.h>
EXPORT_MD_DIR = arm64
+INSTALL_MD_DIR = arm64
+
else # $(PLATFORM),MacOSX
extern unsigned long segSizeLAST;
extern unsigned long segSizeLASTDATACONST;
extern unsigned long segSizeTEXTEXEC;
+extern unsigned long segSizeKLD;
typedef struct lock_reg {
uint32_t reg_offset; // Register offset
SECURITY_READ_ONLY_LATE(bool) csr_unsafe_kernel_text = false;
#endif
-#if defined(KERNEL_INTEGRITY_KTRR)
-#define CTRR_LOCK_MSR ARM64_REG_KTRR_LOCK_EL1
-#elif defined(KERNEL_INTEGRITY_CTRR)
-#define CTRR_LOCK_MSR ARM64_REG_CTRR_LOCK_EL1
-#endif
-
/*
* lock_group_t - describes all the parameters xnu needs to know to
* lock down the AMCC/IOA (Lock Group) Read Only Region(s) on cold start.
* +------------------+-----------+-----------------------------------+
* | Largest Address | LAST | <- AMCC RO Region End (rorgn_end) |
* +------------------+-----------+-----------------------------------+
- * | | TEXT_EXEC | <- KTRR RO Region End (ctrr_end) |
+ * | | KLD | <- KTRR RO Region End (ctrr_end) |
+ * | | TEXT_EXEC | |
* +------------------+-----------+-----------------------------------+
* | | ... | |
* +------------------+-----------+-----------------------------------+
assert(segSizeLAST == PAGE_SIZE);
/* assert that segLAST is contiguous and just after/above/numerically higher than KTRR end */
- assert((ctrr_end + 1) == kvtophys(segTEXTEXECB) + segSizeTEXTEXEC);
+ assert((ctrr_end + 1) == kvtophys(segTEXTEXECB) + segSizeTEXTEXEC + segSizeKLD);
/* ensure that iboot and xnu agree on the amcc rorgn range */
assert((rorgn_begin == ctrr_begin) && (rorgn_end == (ctrr_end + segSizeLASTDATACONST + segSizeLAST)));
* | Largest Address | LAST | <- CTRR/AMCC RO Region End |
* | | | (ctrr_end/rorgn_end) |
* +------------------+-----------+------------------------------+
+ * | | PPLDATA_CONST |
+ * | | PPLTEXT | |
+ * | | KLD | |
* | | TEXT_EXEC | |
* +------------------+-----------+------------------------------+
* | | ... | |
#endif
}
-#if DEVELOPMENT || DEBUG
-static void
-assert_all_lock_groups_unlocked(lock_group_t const *lock_groups)
-{
- uint64_t reg_addr;
- uint64_t ctrr_lock = 0;
- bool locked = false;
- bool write_disabled = false;;
-
- assert(lock_groups);
-
- for (unsigned int lg = 0; lg < MAX_LOCK_GROUPS; lg++) {
- for (unsigned int aperture = 0; aperture < lock_groups[lg].aperture_count; aperture++) {
-#if HAS_IOA
- // Does the lock group define a master lock register?
- if (lock_groups[lg].master_lock_reg.reg_mask != 0) {
- reg_addr = lock_group_va[lg][aperture] + lock_groups[lg].master_lock_reg.reg_offset;
- locked |= ((*(volatile uint32_t *)reg_addr & lock_groups[lg].master_lock_reg.reg_mask) == lock_groups[lg].master_lock_reg.reg_value);
- }
-#endif
- for (unsigned int plane = 0; plane < lock_groups[lg].plane_count; plane++) {
- // Does the lock group define a write disable register?
- if (lock_groups[lg].ctrr_a.write_disable_reg.reg_mask != 0) {
- reg_addr = lock_group_va[lg][aperture] + (plane * lock_groups[lg].plane_stride) + lock_groups[lg].ctrr_a.write_disable_reg.reg_offset;
- write_disabled |= ((*(volatile uint32_t *)reg_addr & lock_groups[lg].ctrr_a.write_disable_reg.reg_mask) == lock_groups[lg].ctrr_a.write_disable_reg.reg_value);
- }
-
- // Does the lock group define a lock register?
- if (lock_groups[lg].ctrr_a.lock_reg.reg_mask != 0) {
- reg_addr = lock_group_va[lg][aperture] + (plane * lock_groups[lg].plane_stride) + lock_groups[lg].ctrr_a.lock_reg.reg_offset;
- locked |= ((*(volatile uint32_t *)reg_addr & lock_groups[lg].ctrr_a.lock_reg.reg_mask) == lock_groups[lg].ctrr_a.lock_reg.reg_value);
- }
- }
- }
- }
-
- ctrr_lock = __builtin_arm_rsr64(CTRR_LOCK_MSR);
-
- assert(!ctrr_lock);
- assert(!write_disabled && !locked);
-}
-#endif
-
static void
lock_all_lock_groups(lock_group_t const *lock_group, vm_offset_t begin, vm_offset_t end)
{
}
}
-static void
-lock_mmu(uint64_t begin, uint64_t end)
-{
-#if defined(KERNEL_INTEGRITY_KTRR)
-
- __builtin_arm_wsr64(ARM64_REG_KTRR_LOWER_EL1, begin);
- __builtin_arm_wsr64(ARM64_REG_KTRR_UPPER_EL1, end);
- __builtin_arm_wsr64(ARM64_REG_KTRR_LOCK_EL1, 1ULL);
-
- /* flush TLB */
-
- __builtin_arm_isb(ISB_SY);
- flush_mmu_tlb();
-
-#elif defined (KERNEL_INTEGRITY_CTRR)
- /* this will lock the entire bootstrap cluster. non bootstrap clusters
- * will be locked by respective cluster master in start.s */
-
- __builtin_arm_wsr64(ARM64_REG_CTRR_A_LWR_EL1, begin);
- __builtin_arm_wsr64(ARM64_REG_CTRR_A_UPR_EL1, end);
-
-#if !defined(APPLEVORTEX)
- /* H12+ changed sequence, must invalidate TLB immediately after setting CTRR bounds */
- __builtin_arm_isb(ISB_SY); /* ensure all prior MSRs are complete */
- flush_mmu_tlb();
-#endif /* !defined(APPLEVORTEX) */
-
- __builtin_arm_wsr64(ARM64_REG_CTRR_CTL_EL1, CTRR_CTL_EL1_A_PXN | CTRR_CTL_EL1_A_MMUON_WRPROTECT);
- __builtin_arm_wsr64(ARM64_REG_CTRR_LOCK_EL1, 1ULL);
-
- uint64_t current_el = __builtin_arm_rsr64("CurrentEL");
- if (current_el == PSR64_MODE_EL2) {
- // CTRR v2 has explicit registers for cluster config. they can only be written in EL2
-
- __builtin_arm_wsr64(ACC_CTRR_A_LWR_EL2, begin);
- __builtin_arm_wsr64(ACC_CTRR_A_UPR_EL2, end);
- __builtin_arm_wsr64(ACC_CTRR_CTL_EL2, CTRR_CTL_EL1_A_PXN | CTRR_CTL_EL1_A_MMUON_WRPROTECT);
- __builtin_arm_wsr64(ACC_CTRR_LOCK_EL2, 1ULL);
- }
-
- __builtin_arm_isb(ISB_SY); /* ensure all prior MSRs are complete */
-#if defined(APPLEVORTEX)
- flush_mmu_tlb();
-#endif /* defined(APPLEVORTEX) */
-
-#else /* defined(KERNEL_INTEGRITY_KTRR) */
-#error KERNEL_INTEGRITY config error
-#endif /* defined(KERNEL_INTEGRITY_KTRR) */
-}
-
#if DEVELOPMENT || DEBUG
static void
assert_amcc_cache_disabled(lock_group_t const *lock_group)
lock_group_t const * const lock_group = find_lock_group_data();
#if DEVELOPMENT || DEBUG
- assert_all_lock_groups_unlocked(lock_group);
-
printf("RO Region Begin: %p End: %p\n", (void *)rorgn_begin, (void *)rorgn_end);
printf("CTRR (MMU) Begin: %p End: %p, setting lockdown\n", (void *)ctrr_begin, (void *)ctrr_end);
// Lock the AMCC/IOA PIO lock registers.
lock_all_lock_groups(lock_group, phystokv(rorgn_begin), phystokv(rorgn_end));
- /*
- * KTRR/CTRR registers are inclusive of the smallest page size granule supported by processor MMU
- * rather than the actual page size in use. Load the last byte of the end page, and let the HW
- * truncate per the smallest page granule supported. Must use same treament in start.s for warm
- * start of APs.
- */
- lock_mmu(ctrr_begin, ctrr_end);
-
// Unmap and free PIO VA space needed to lockdown the lock groups.
for (unsigned int lg = 0; lg < MAX_LOCK_GROUPS; lg++) {
for (unsigned int aperture = 0; aperture < lock_group[lg].aperture_count; aperture++) {
SECURITY_READ_ONLY_LATE(static unsigned long) segSizeLINK;
SECURITY_READ_ONLY_LATE(static vm_offset_t) segKLDB;
-SECURITY_READ_ONLY_LATE(static unsigned long) segSizeKLD;
+SECURITY_READ_ONLY_LATE(unsigned long) segSizeKLD;
+SECURITY_READ_ONLY_LATE(static vm_offset_t) segKLDDATAB;
+SECURITY_READ_ONLY_LATE(static unsigned long) segSizeKLDDATA;
SECURITY_READ_ONLY_LATE(vm_offset_t) segLASTB;
SECURITY_READ_ONLY_LATE(unsigned long) segSizeLAST;
SECURITY_READ_ONLY_LATE(vm_offset_t) segLASTDATACONSTB;
arm_vm_page_granular_RNX((vm_offset_t)&excepstack_high_guard, PAGE_MAX_SIZE, 0);
arm_vm_page_granular_ROX(segKLDB, segSizeKLD, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
+ arm_vm_page_granular_RNX(segKLDDATAB, segSizeKLDDATA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
arm_vm_page_granular_RWNX(segLINKB, segSizeLINK, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
arm_vm_page_granular_RWNX(segPLKLINKEDITB, segSizePLKLINKEDIT, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); // Coalesced kext LINKEDIT segment
arm_vm_page_granular_ROX(segLASTB, segSizeLAST, ARM64_GRANULE_ALLOW_BLOCK); // __LAST may be empty, but we cannot assume this
// Slid region between gPhysBase and beginning of protected text
arm_vm_physmap_slide(temp_ptov_table, gVirtBase, segLOWEST - gVirtBase, AP_RWNA, 0);
- // kext bootstrap segment
+ // kext bootstrap segments
+#if !defined(KERNEL_INTEGRITY_KTRR) && !defined(KERNEL_INTEGRITY_CTRR)
+ /* __KLD,__text is covered by the rorgn */
arm_vm_physmap_slide(temp_ptov_table, segKLDB, segSizeKLD, AP_RONA, 0);
+#endif
+ arm_vm_physmap_slide(temp_ptov_table, segKLDDATAB, segSizeKLDDATA, AP_RONA, 0);
// Early-boot data
arm_vm_physmap_slide(temp_ptov_table, segBOOTDATAB, segSizeBOOTDATA, AP_RONA, 0);
#endif /* __ARM_KERNEL_PROTECT__ */
#if XNU_MONITOR
+#if !defined(KERNEL_INTEGRITY_KTRR) && !defined(KERNEL_INTEGRITY_CTRR)
+ /* __KLD,__text is covered by the rorgn */
for (vm_offset_t va = segKLDB; va < (segKLDB + segSizeKLD); va += ARM_PGBYTES) {
pt_entry_t *pte = arm_kva_to_pte(va);
*pte = ARM_PTE_EMPTY;
}
+#endif
+ for (vm_offset_t va = segKLDDATAB; va < (segKLDDATAB + segSizeKLDDATA); va += ARM_PGBYTES) {
+ pt_entry_t *pte = arm_kva_to_pte(va);
+ *pte = ARM_PTE_EMPTY;
+ }
/* Clear the original stack mappings; these pages should be mapped through ptov_table. */
for (vm_offset_t va = segBOOTDATAB; va < (segBOOTDATAB + segSizeBOOTDATA); va += ARM_PGBYTES) {
pt_entry_t *pte = arm_kva_to_pte(va);
arm_vm_page_granular_RNX(segLASTDATACONSTB, segSizeLASTDATACONST, ARM64_GRANULE_ALLOW_BLOCK);
}
+ /*
+ * __KLD,__text should no longer be executable.
+ */
+ arm_vm_page_granular_RNX(segKLDB, segSizeKLD, ARM64_GRANULE_ALLOW_BLOCK);
+
/*
* Must wait until all other region permissions are set before locking down DATA_CONST
* as the kernel static page tables live in DATA_CONST on KTRR enabled systems
segBOOTDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__BOOTDATA", &segSizeBOOTDATA);
segLINKB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__LINKEDIT", &segSizeLINK);
segKLDB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__KLD", &segSizeKLD);
+ segKLDDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__KLDDATA", &segSizeKLDDATA);
segPRELINKDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_DATA", &segSizePRELINKDATA);
segPRELINKINFOB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_INFO", &segSizePRELINKINFO);
segPLKLLVMCOVB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_LLVM_COV", &segSizePLKLLVMCOV);
// fileset has kext PLK_TEXT_EXEC under kernel collection TEXT_EXEC following kernel's LAST
segKCTEXTEXECB = (vm_offset_t) getsegdatafromheader(kc_mh, "__TEXT_EXEC", &segSizeKCTEXTEXEC);
assert(segPLKTEXTEXECB && !segSizePLKTEXTEXEC); // kernel PLK_TEXT_EXEC must be empty
- assert(segLASTB && segSizeLAST); // kernel LAST must not be empty
+
+ assert(segLASTB); // kernel LAST can be empty, but it must have
+ // a valid address for computations below.
+
assert(segKCTEXTEXECB <= segLASTB); // KC TEXT_EXEC must contain kernel LAST
assert(segKCTEXTEXECB + segSizeKCTEXTEXEC >= segLASTB + segSizeLAST);
segPLKTEXTEXECB = segLASTB + segSizeLAST;
#include <mach/mach_traps.h>
#include <mach/vm_param.h>
-#include <kern/counters.h>
#include <kern/cpu_data.h>
#include <arm/cpu_data_internal.h>
#include <kern/mach_param.h>
return getCpuDatap()->intstack_top;
}
#endif /* CONFIG_DTRACE */
-extern const char *mach_syscall_name_table[];
+extern const char *const mach_syscall_name_table[];
/* ARM64_TODO: remove this. still TODO?*/
extern struct proc* current_proc(void);
#include <machine/asm.h>
#include <arm64/proc_reg.h>
+#include <pexpert/arm64/board_config.h>
#include <arm/pmap.h>
#include <sys/errno.h>
#include "assym.s"
.endmacro
/*
- * Detects the presence of an L2 cache and returns 1 if implemented,
- * zero otherwise.
- *
+ * Returns the cache configuration for the specified level
* $0: Output register
+ * $1: Cache level register
+ * $2: Scratch register
*/
-.macro HAS_L2_CACHE
+.macro CACHE_AT_LEVEL
mrs $0, CLIDR_EL1
- ubfx $0, $0, #3, #3 // extract L2 cache Ctype
- cmp $0, #0x1
- cset $0, hi
+ add $2, $1, $1, lsl #1
+ lsr $0, $0, $2
+ and $0, $0, #7 // extract cache type
+.endmacro
+
+/*
+ * Perform set/way maintenance to the desired cache level
+ * $0: 'dc' set/way variant, e.g. csw or cisw
+ * x0: maximum cache level, 0-based, inclusive
+ */
+.macro DCACHE_SET_WAY
+ dmb sy
+ mov x1, #0
+1:
+ CACHE_AT_LEVEL x2, x1, x3
+ cbz x2, 5f // No cache at this level, all higher levels may be skipped
+ cmp x2, #2
+ b.lt 4f // No data cache at this level, skip to next level
+ mov x2, x1
+ GET_CACHE_CONFIG x2, x9, x10, x11
+ lsl x2, x1, #1 // level field for cisw/csw, bits 1:3
+2:
+3:
+ dc $0, x2 // clean dcache line by way/set
+ add x2, x2, x9 // increment set index
+ tst x2, x10 // look for overflow
+ b.eq 3b
+ bic x2, x2, x10 // clear set overflow
+ adds w2, w2, w11 // increment way
+ b.cc 2b // loop
+ dsb sy // ensure completion of prior level maintenance
+4:
+ add x1, x1, #1
+ cmp x1, x0
+ b.ls 1b // next level
+5:
+ ret
.endmacro
/*
.globl EXT(clean_mmu_dcache)
LEXT(CleanPoC_Dcache)
#if defined(APPLE_ARM64_ARCH_FAMILY)
+ dsb sy
+ ret
/* "Fully Coherent." */
#else /* !defined(APPLE_ARM64_ARCH_FAMILY) */
- mov x0, #0
- GET_CACHE_CONFIG x0, x9, x10, x11
-
- dmb sy
- mov x0, #0
-L_cpcd_dcacheway:
-L_cpcd_dcacheline:
- dc csw, x0 // clean dcache line by way/set
- add x0, x0, x9 // increment set index
- tst x0, x10 // look for overflow
- b.eq L_cpcd_dcacheline
- bic x0, x0, x10 // clear set overflow
- adds w0, w0, w11 // increment way
- b.cc L_cpcd_dcacheway // loop
-
- HAS_L2_CACHE x0
- cbz x0, L_cpcd_skipl2dcache
- mov x0, #1
- GET_CACHE_CONFIG x0, x9, x10, x11
-
- dsb sy
- mov x0, #2
-L_cpcd_l2dcacheway:
-L_cpcd_l2dcacheline:
- dc csw, x0 // clean dcache line by way/set
- add x0, x0, x9 // increment set index
- tst x0, x10 // look for overflow
- b.eq L_cpcd_l2dcacheline
- bic x0, x0, x10 // clear set overflow
- adds w0, w0, w11 // increment way
- b.cc L_cpcd_l2dcacheway // loop
-L_cpcd_skipl2dcache:
+ mrs x0, CLIDR_EL1
+ ubfx x0, x0, #24, #3 // extract CLIDR_EL1.LoC
+ DCACHE_SET_WAY csw
#endif /* defined(APPLE_ARM64_ARCH_FAMILY) */
- dsb sy
- ret
/*
* void CleanPoU_Dcache(void)
.globl EXT(CleanPoU_Dcache)
LEXT(CleanPoU_Dcache)
#if defined(APPLE_ARM64_ARCH_FAMILY)
- /* "Fully Coherent." */
-#else /* !defined(APPLE_ARM64_ARCH_FAMILY) */
- mov x0, #0
- GET_CACHE_CONFIG x0, x9, x10, x11
-
- dmb sy
- mov x0, #0
-L_cpud_dcacheway:
-L_cpud_dcacheline:
- dc csw, x0 // clean dcache line by way/set
- add x0, x0, x9 // increment set index
- tst x0, x10 // look for overflow
- b.eq L_cpud_dcacheline
- bic x0, x0, x10 // clear set overflow
- adds w0, w0, w11 // increment way
- b.cc L_cpud_dcacheway // loop
- #endif /* defined(APPLE_ARM64_ARCH_FAMILY) */
dsb sy
ret
+ /* "Fully Coherent." */
+#else /* !defined(APPLE_ARM64_ARCH_FAMILY) */
+ mrs x0, CLIDR_EL1
+ ubfx x0, x0, #21, 3 // extract CLIDR_EL1.LoUIS
+ DCACHE_SET_WAY csw
+#endif /* defined(APPLE_ARM64_ARCH_FAMILY) */
/*
* void CleanPoU_DcacheRegion(vm_offset_t va, unsigned length)
.text
.align 2
LEXT(CleanPoC_DcacheRegion_internal)
- mov x9, #((1<<MMU_CLINE)-1)
+ mov x10, #(MMU_CLINE)
+
+ /* Stash (1 << cache_line_size) in x11 for easy access. */
+ mov x11, #1
+ lsl x11, x11, x10
+
+ sub x9, x11, #1
and x2, x0, x9
bic x0, x0, x9 // Cached aligned
add x1, x1, x2
sub x1, x1, #1
- lsr x1, x1, #MMU_CLINE // Set cache line counter
+ lsr x1, x1, x10 // Set cache line counter
dsb sy
L_cpcdr_loop:
#if defined(APPLE_ARM64_ARCH_FAMILY)
// It may be tempting to clean the cache (dc cvac),
// but see Cyclone UM 5.3.8.3 -- it's always a NOP on Cyclone.
//
- // Clean & Invalidate, however, will work as long as HID4.DisDCMvaOps isn't set.
+ // Clean & Invalidate, however, will work as long as S3_0_C15_C4_0.DisDCMvaOps isn't set.
dc civac, x0 // Clean & Invalidate dcache line to PoC
#else
dc cvac, x0 // Clean dcache line to PoC
#endif
- add x0, x0, #(1<<MMU_CLINE) // Get next cache aligned addr
+ add x0, x0, x11 // Get next cache aligned addr
subs x1, x1, #1 // Decrementer cache line counter
b.pl L_cpcdr_loop // Loop in counter not null
dsb sy
PUSH_FRAME
isb sy
ARM64_IS_PCORE x15
- ARM64_READ_EP_SPR x15, x14, ARM64_REG_EHID4, ARM64_REG_HID4
+ ARM64_READ_EP_SPR x15, x14, S3_0_C15_C4_1, S3_0_C15_C4_0
and x14, x14, (~ARM64_REG_HID4_DisDcMVAOps)
- ARM64_WRITE_EP_SPR x15, x14, ARM64_REG_EHID4, ARM64_REG_HID4
+ ARM64_WRITE_EP_SPR x15, x14, S3_0_C15_C4_1, S3_0_C15_C4_0
isb sy
bl EXT(CleanPoC_DcacheRegion_internal)
isb sy
orr x14, x14, ARM64_REG_HID4_DisDcMVAOps
- ARM64_WRITE_EP_SPR x15, x14, ARM64_REG_EHID4, ARM64_REG_HID4
+ ARM64_WRITE_EP_SPR x15, x14, S3_0_C15_C4_1, S3_0_C15_C4_0
isb sy
POP_FRAME
ARM64_STACK_EPILOG
.globl EXT(FlushPoC_Dcache)
LEXT(FlushPoC_Dcache)
#if defined(APPLE_ARM64_ARCH_FAMILY)
+ dsb sy
+ ret
/* "Fully Coherent." */
#else /* !defined(APPLE_ARM64_ARCH_FAMILY) */
- mov x0, #0
- GET_CACHE_CONFIG x0, x9, x10, x11
-
- dmb sy
- mov x0, #0
-L_fpcd_dcacheway:
-L_fpcd_dcacheline:
- dc cisw, x0 // clean invalidate dcache line by way/set
- add x0, x0, x9 // increment set index
- tst x0, x10 // look for overflow
- b.eq L_fpcd_dcacheline
- bic x0, x0, x10 // clear set overflow
- adds w0, w0, w11 // increment way
- b.cc L_fpcd_dcacheway // loop
-
- HAS_L2_CACHE x0
- cbz x0, L_fpcd_skipl2dcache
- dsb sy
- mov x0, #1
- GET_CACHE_CONFIG x0, x9, x10, x11
-
- mov x0, #2
-L_fpcd_l2dcacheway:
-L_fpcd_l2dcacheline:
- dc cisw, x0 // clean invalide dcache line by way/set
- add x0, x0, x9 // increment set index
- tst x0, x10 // look for overflow
- b.eq L_fpcd_l2dcacheline
- bic x0, x0, x10 // clear set overflow
- adds w0, w0, w11 // increment way
- b.cc L_fpcd_l2dcacheway // loop
-L_fpcd_skipl2dcache:
+ mrs x0, CLIDR_EL1
+ ubfx x0, x0, #24, #3 // extract CLIDR_EL1.LoC
+ DCACHE_SET_WAY cisw
#endif /* defined(APPLE_ARM64_ARCH_FAMILY) */
- dsb sy
- ret
+
+/*
+ * void Flush_Dcache(void)
+ *
+ * Clean and invalidate D-cache, all levels
+ */
+ .text
+ .align 2
+ .globl EXT(Flush_Dcache)
+LEXT(Flush_Dcache)
+ mov x0, #6 // Maximum allowable caching level (0-based)
+ DCACHE_SET_WAY cisw
/*
* void FlushPoU_Dcache(void)
.globl EXT(FlushPoU_Dcache)
LEXT(FlushPoU_Dcache)
#if defined(APPLE_ARM64_ARCH_FAMILY)
+ dsb sy
+ ret
/* "Fully Coherent." */
#else /* !defined(APPLE_ARM64_ARCH_FAMILY) */
- mov x0, #0
- GET_CACHE_CONFIG x0, x9, x10, x11
-
- dmb sy
- mov x0, #0
-L_fpud_way:
-L_fpud_line:
- dc cisw, x0 // clean invalidate dcache line by way/set
- add x0, x0, x9 // increment set index
- tst x0, x10 // look for overflow
- b.eq L_fpud_line
- bic x0, x0, x10 // clear set overflow
- adds w0, w0, w11 // increment way
- b.cc L_fpud_way // loop
+ mrs x0, CLIDR_EL1
+ ubfx x0, x0, #21, 3 // extract CLIDR_EL1.LoUIS
+ DCACHE_SET_WAY cisw
#endif /* defined(APPLE_ARM64_ARCH_FAMILY) */
- dsb sy
- ret
/*
* void FlushPoC_DcacheRegion(vm_offset_t va, unsigned length)
extern int copyoutstr_prevalidate(const void *kaddr, user_addr_t uaddr, size_t len);
-extern pmap_t kernel_pmap;
-
extern const vm_map_address_t physmap_base;
extern const vm_map_address_t physmap_end;
* Size of elements in the permanent zone is not saved as a part of the
* zone's info
*/
- if (__improbable(src_zone && !src_zone->permanent &&
+ if (__improbable(src_zone && !src_zone->z_permanent &&
kernel_buf_size < nbytes)) {
panic("copyio_preflight: kernel buffer 0x%lx has size %lu < nbytes %lu",
kernel_addr, kernel_buf_size, nbytes);
#endif
+#if CSWITCH_ROP_KEYS
+ ldr \new_key, [\thread, TH_ROP_PID]
+ REPROGRAM_ROP_KEYS Lskip_rop_keys_\@, \new_key, \cpudatap, \tmp_key
+ mov \wsync, #1
+Lskip_rop_keys_\@:
+#endif /* CSWITCH_ROP_KEYS */
+
+#if CSWITCH_JOP_KEYS
+ ldr \new_key, [\thread, TH_JOP_PID]
+ REPROGRAM_JOP_KEYS Lskip_jop_keys_\@, \new_key, \cpudatap, \tmp_key
+ mov \wsync, #1
+Lskip_jop_keys_\@:
+#endif /* CSWITCH_JOP_KEYS */
cbz \wsync, 1f
isb sy
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
-#include <arm64/pac_asm.h>
#include <pexpert/arm64/board_config.h>
#include "assym.s"
pal_hib_patchup(pal_hib_ctx_t *ctx)
{
+ /* Reinit the ppl hib lock as it was saved to the hibernation image held. */
+ ppl_hib_lock_reinit();
+
// DRAM pages are captured from a PPL context, so here we restore all cpu_data structures to a non-PPL context
for (int i = 0; i < MAX_CPUS; i++) {
pmap_cpu_data_array[i].cpu_data.ppl_state = PPL_STATE_KERNEL;
#define PMESR_EVT_ENCODE(EVT, PMC, OFF) \
(((EVT) & PMESR_PMC_MASK) << PMESR_SHIFT(PMC, OFF))
-/* system registers in the CPMU */
-
-#define SREG_PMCR0 "S3_1_c15_c0_0"
-#define SREG_PMCR1 "S3_1_c15_c1_0"
-#define SREG_PMCR2 "S3_1_c15_c2_0"
-#define SREG_PMCR3 "S3_1_c15_c3_0"
-#define SREG_PMCR4 "S3_1_c15_c4_0"
-#define SREG_PMESR0 "S3_1_c15_c5_0"
-#define SREG_PMESR1 "S3_1_c15_c6_0"
-#define SREG_PMSR "S3_1_c15_c13_0"
-#define SREG_OPMAT0 "S3_1_c15_c7_0"
-#define SREG_OPMAT1 "S3_1_c15_c8_0"
-#define SREG_OPMSK0 "S3_1_c15_c9_0"
-#define SREG_OPMSK1 "S3_1_c15_c10_0"
-
-#define SREG_PMC0 "S3_2_c15_c0_0"
-#define SREG_PMC1 "S3_2_c15_c1_0"
-#define SREG_PMC2 "S3_2_c15_c2_0"
-#define SREG_PMC3 "S3_2_c15_c3_0"
-#define SREG_PMC4 "S3_2_c15_c4_0"
-#define SREG_PMC5 "S3_2_c15_c5_0"
-#define SREG_PMC6 "S3_2_c15_c6_0"
-#define SREG_PMC7 "S3_2_c15_c7_0"
-#define SREG_PMC8 "S3_2_c15_c9_0"
-#define SREG_PMC9 "S3_2_c15_c10_0"
-
-#define SREG_PMMMAP "S3_2_c15_c15_0"
-#define SREG_PMTRHLD2 "S3_2_c15_c14_0"
-#define SREG_PMTRHLD4 "S3_2_c15_c13_0"
-#define SREG_PMTRHLD6 "S3_2_c15_c12_0"
-
/*
* The low 8 bits of a configuration words select the event to program on
* PMESR{0,1}. Bits 16-19 are mapped to PMCR1 bits.
dump_regs(void)
{
uint64_t val;
- kprintf("PMCR0 = 0x%" PRIx64 "\n", SREG_READ(SREG_PMCR0));
- kprintf("PMCR1 = 0x%" PRIx64 "\n", SREG_READ(SREG_PMCR1));
- kprintf("PMCR2 = 0x%" PRIx64 "\n", SREG_READ(SREG_PMCR2));
- kprintf("PMCR3 = 0x%" PRIx64 "\n", SREG_READ(SREG_PMCR3));
- kprintf("PMCR4 = 0x%" PRIx64 "\n", SREG_READ(SREG_PMCR4));
- kprintf("PMESR0 = 0x%" PRIx64 "\n", SREG_READ(SREG_PMESR0));
- kprintf("PMESR1 = 0x%" PRIx64 "\n", SREG_READ(SREG_PMESR1));
-
- kprintf("PMC0 = 0x%" PRIx64 "\n", SREG_READ(SREG_PMC0));
- kprintf("PMC1 = 0x%" PRIx64 "\n", SREG_READ(SREG_PMC1));
- kprintf("PMC2 = 0x%" PRIx64 "\n", SREG_READ(SREG_PMC2));
- kprintf("PMC3 = 0x%" PRIx64 "\n", SREG_READ(SREG_PMC3));
- kprintf("PMC4 = 0x%" PRIx64 "\n", SREG_READ(SREG_PMC4));
- kprintf("PMC5 = 0x%" PRIx64 "\n", SREG_READ(SREG_PMC5));
- kprintf("PMC6 = 0x%" PRIx64 "\n", SREG_READ(SREG_PMC6));
- kprintf("PMC7 = 0x%" PRIx64 "\n", SREG_READ(SREG_PMC7));
+ kprintf("PMCR0 = 0x%" PRIx64 "\n", SREG_READ("S3_1_C15_C0_0"));
+ kprintf("PMCR1 = 0x%" PRIx64 "\n", SREG_READ("S3_1_C15_C1_0"));
+ kprintf("PMCR2 = 0x%" PRIx64 "\n", SREG_READ("S3_1_C15_C2_0"));
+ kprintf("PMCR3 = 0x%" PRIx64 "\n", SREG_READ("S3_1_C15_C3_0"));
+ kprintf("PMCR4 = 0x%" PRIx64 "\n", SREG_READ("S3_1_C15_C4_0"));
+ kprintf("PMESR0 = 0x%" PRIx64 "\n", SREG_READ("S3_1_C15_C5_0"));
+ kprintf("PMESR1 = 0x%" PRIx64 "\n", SREG_READ("S3_1_C15_C6_0"));
+
+ kprintf("PMC0 = 0x%" PRIx64 "\n", SREG_READ("PMC0"));
+ kprintf("PMC1 = 0x%" PRIx64 "\n", SREG_READ("PMC1"));
+ kprintf("S3_2_C15_C2_0 = 0x%" PRIx64 "\n", SREG_READ("S3_2_C15_C2_0"));
+ kprintf("S3_2_C15_C3_0 = 0x%" PRIx64 "\n", SREG_READ("S3_2_C15_C3_0"));
+ kprintf("S3_2_C15_C4_0 = 0x%" PRIx64 "\n", SREG_READ("S3_2_C15_C4_0"));
+ kprintf("S3_2_C15_C5_0 = 0x%" PRIx64 "\n", SREG_READ("S3_2_C15_C5_0"));
+ kprintf("S3_2_C15_C6_0 = 0x%" PRIx64 "\n", SREG_READ("S3_2_C15_C6_0"));
+ kprintf("S3_2_C15_C7_0 = 0x%" PRIx64 "\n", SREG_READ("S3_2_C15_C7_0"));
#if (KPC_ARM64_CONFIGURABLE_COUNT > 6)
- kprintf("PMC8 = 0x%" PRIx64 "\n", SREG_READ(SREG_PMC8));
- kprintf("PMC9 = 0x%" PRIx64 "\n", SREG_READ(SREG_PMC9));
+ kprintf("S3_2_C15_C9_0 = 0x%" PRIx64 "\n", SREG_READ("S3_2_C15_C9_0"));
+ kprintf("S3_2_C15_C10_0 = 0x%" PRIx64 "\n", SREG_READ("S3_2_C15_C10_0"));
#endif
}
#endif
uint64_t pmcr0 = 0;
boolean_t counter_running, pmi_enabled, enabled;
- pmcr0 = SREG_READ(SREG_PMCR0) | 0x3 /* leave the fixed counters enabled for monotonic */;
+ pmcr0 = SREG_READ("S3_1_C15_C0_0") | 0x3 /* leave the fixed counters enabled for monotonic */;
counter_running = (pmcr0 & PMCR0_PMC_ENABLE_MASK(counter)) != 0;
pmi_enabled = (pmcr0 & PMCR0_PMI_ENABLE_MASK(counter)) != 0;
if (!enabled) {
pmcr0 |= PMCR0_PMC_ENABLE_MASK(counter);
pmcr0 |= PMCR0_PMI_ENABLE_MASK(counter);
- SREG_WRITE(SREG_PMCR0, pmcr0);
+ SREG_WRITE("S3_1_C15_C0_0", pmcr0);
}
return enabled;
return true;
}
- pmcr0 = SREG_READ(SREG_PMCR0) | 0x3;
+ pmcr0 = SREG_READ("S3_1_C15_C0_0") | 0x3;
enabled = (pmcr0 & PMCR0_PMC_ENABLE_MASK(counter)) != 0;
if (enabled) {
pmcr0 &= PMCR0_PMC_DISABLE_MASK(counter);
- SREG_WRITE(SREG_PMCR0, pmcr0);
+ SREG_WRITE("S3_1_C15_C0_0", pmcr0);
}
return enabled;
bits = PMCR1_EL_ALL_ENABLE_MASK(counter);
}
- uint64_t pmcr1 = SREG_READ(SREG_PMCR1);
+ uint64_t pmcr1 = SREG_READ("S3_1_C15_C1_0");
pmcr1 &= PMCR1_EL_ALL_DISABLE_MASK(counter);
pmcr1 |= bits;
pmcr1 |= 0x30303; /* monotonic compatibility */
- SREG_WRITE(SREG_PMCR1, pmcr1);
+ SREG_WRITE("S3_1_C15_C1_0", pmcr1);
saved_PMCR[cpuid][1] = pmcr1;
}
read_counter(uint32_t counter)
{
switch (counter) {
- // case 0: return SREG_READ(SREG_PMC0);
- // case 1: return SREG_READ(SREG_PMC1);
- case 2: return SREG_READ(SREG_PMC2);
- case 3: return SREG_READ(SREG_PMC3);
- case 4: return SREG_READ(SREG_PMC4);
- case 5: return SREG_READ(SREG_PMC5);
- case 6: return SREG_READ(SREG_PMC6);
- case 7: return SREG_READ(SREG_PMC7);
+ // case 0: return SREG_READ("PMC0");
+ // case 1: return SREG_READ("PMC1");
+ case 2: return SREG_READ("S3_2_C15_C2_0");
+ case 3: return SREG_READ("S3_2_C15_C3_0");
+ case 4: return SREG_READ("S3_2_C15_C4_0");
+ case 5: return SREG_READ("S3_2_C15_C5_0");
+ case 6: return SREG_READ("S3_2_C15_C6_0");
+ case 7: return SREG_READ("S3_2_C15_C7_0");
#if (KPC_ARM64_CONFIGURABLE_COUNT > 6)
- case 8: return SREG_READ(SREG_PMC8);
- case 9: return SREG_READ(SREG_PMC9);
+ case 8: return SREG_READ("S3_2_C15_C9_0");
+ case 9: return SREG_READ("S3_2_C15_C10_0");
#endif
default: return 0;
}
write_counter(uint32_t counter, uint64_t value)
{
switch (counter) {
- // case 0: SREG_WRITE(SREG_PMC0, value); break;
- // case 1: SREG_WRITE(SREG_PMC1, value); break;
- case 2: SREG_WRITE(SREG_PMC2, value); break;
- case 3: SREG_WRITE(SREG_PMC3, value); break;
- case 4: SREG_WRITE(SREG_PMC4, value); break;
- case 5: SREG_WRITE(SREG_PMC5, value); break;
- case 6: SREG_WRITE(SREG_PMC6, value); break;
- case 7: SREG_WRITE(SREG_PMC7, value); break;
+ // case 0: SREG_WRITE("PMC0", value); break;
+ // case 1: SREG_WRITE("PMC1", value); break;
+ case 2: SREG_WRITE("S3_2_C15_C2_0", value); break;
+ case 3: SREG_WRITE("S3_2_C15_C3_0", value); break;
+ case 4: SREG_WRITE("S3_2_C15_C4_0", value); break;
+ case 5: SREG_WRITE("S3_2_C15_C5_0", value); break;
+ case 6: SREG_WRITE("S3_2_C15_C6_0", value); break;
+ case 7: SREG_WRITE("S3_2_C15_C7_0", value); break;
#if (KPC_ARM64_CONFIGURABLE_COUNT > 6)
- case 8: SREG_WRITE(SREG_PMC8, value); break;
- case 9: SREG_WRITE(SREG_PMC9, value); break;
+ case 8: SREG_WRITE("S3_2_C15_C9_0", value); break;
+ case 9: SREG_WRITE("S3_2_C15_C10_0", value); break;
#endif
default: break;
}
int
kpc_get_rawpmu_config(kpc_config_t *configv)
{
- configv[0] = SREG_READ(SREG_PMCR2);
- configv[1] = SREG_READ(SREG_PMCR3);
- configv[2] = SREG_READ(SREG_PMCR4);
- configv[3] = SREG_READ(SREG_OPMAT0);
- configv[4] = SREG_READ(SREG_OPMAT1);
- configv[5] = SREG_READ(SREG_OPMSK0);
- configv[6] = SREG_READ(SREG_OPMSK1);
+ configv[0] = SREG_READ("S3_1_C15_C2_0");
+ configv[1] = SREG_READ("S3_1_C15_C3_0");
+ configv[2] = SREG_READ("S3_1_C15_C4_0");
+ configv[3] = SREG_READ("S3_1_C15_C7_0");
+ configv[4] = SREG_READ("S3_1_C15_C8_0");
+ configv[5] = SREG_READ("S3_1_C15_C9_0");
+ configv[6] = SREG_READ("S3_1_C15_C10_0");
#if RAWPMU_CONFIG_COUNT > 7
- configv[7] = SREG_READ(SREG_PMMMAP);
- configv[8] = SREG_READ(SREG_PMTRHLD2);
- configv[9] = SREG_READ(SREG_PMTRHLD4);
- configv[10] = SREG_READ(SREG_PMTRHLD6);
+ configv[7] = SREG_READ("S3_2_C15_C15_0");
+ configv[8] = SREG_READ("S3_2_C15_C14_0");
+ configv[9] = SREG_READ("S3_2_C15_C13_0");
+ configv[10] = SREG_READ("S3_2_C15_C12_0");
#endif
return 0;
}
static int
kpc_set_rawpmu_config(kpc_config_t *configv)
{
- SREG_WRITE(SREG_PMCR2, configv[0]);
- SREG_WRITE(SREG_PMCR3, configv[1]);
- SREG_WRITE(SREG_PMCR4, configv[2]);
- SREG_WRITE(SREG_OPMAT0, configv[3]);
- SREG_WRITE(SREG_OPMAT1, configv[4]);
- SREG_WRITE(SREG_OPMSK0, configv[5]);
- SREG_WRITE(SREG_OPMSK1, configv[6]);
+ SREG_WRITE("S3_1_C15_C2_0", configv[0]);
+ SREG_WRITE("S3_1_C15_C3_0", configv[1]);
+ SREG_WRITE("S3_1_C15_C4_0", configv[2]);
+ SREG_WRITE("S3_1_C15_C7_0", configv[3]);
+ SREG_WRITE("S3_1_C15_C8_0", configv[4]);
+ SREG_WRITE("S3_1_C15_C9_0", configv[5]);
+ SREG_WRITE("S3_1_C15_C10_0", configv[6]);
#if RAWPMU_CONFIG_COUNT > 7
- SREG_WRITE(SREG_PMMMAP, configv[7]);
- SREG_WRITE(SREG_PMTRHLD2, configv[8]);
- SREG_WRITE(SREG_PMTRHLD4, configv[9]);
- SREG_WRITE(SREG_PMTRHLD6, configv[10]);
+ SREG_WRITE("S3_2_C15_C15_0", configv[7]);
+ SREG_WRITE("S3_2_C15_C14_0", configv[8]);
+ SREG_WRITE("S3_2_C15_C13_0", configv[9]);
+ SREG_WRITE("S3_2_C15_C12_0", configv[10]);
#endif
return 0;
}
assert(ml_get_interrupts_enabled() == FALSE);
/* Save event selections. */
- saved_PMESR[cpuid][0] = SREG_READ(SREG_PMESR0);
- saved_PMESR[cpuid][1] = SREG_READ(SREG_PMESR1);
+ saved_PMESR[cpuid][0] = SREG_READ("S3_1_C15_C5_0");
+ saved_PMESR[cpuid][1] = SREG_READ("S3_1_C15_C6_0");
kpc_get_rawpmu_config(saved_RAWPMU[cpuid]);
/* Disable the counters. */
- // SREG_WRITE(SREG_PMCR0, clear);
+ // SREG_WRITE("S3_1_C15_C0_0", clear);
/* Finally, save state for each counter*/
for (int i = 2; i < KPC_ARM64_PMC_COUNT; i++) {
int cpuid = cpu_number();
/* Restore PMESR values. */
- SREG_WRITE(SREG_PMESR0, saved_PMESR[cpuid][0]);
- SREG_WRITE(SREG_PMESR1, saved_PMESR[cpuid][1]);
+ SREG_WRITE("S3_1_C15_C5_0", saved_PMESR[cpuid][0]);
+ SREG_WRITE("S3_1_C15_C6_0", saved_PMESR[cpuid][1]);
kpc_set_rawpmu_config(saved_RAWPMU[cpuid]);
}
/* Restore PMCR0/1 values (with PMCR0 last to enable). */
- SREG_WRITE(SREG_PMCR1, saved_PMCR[cpuid][1] | 0x30303);
+ SREG_WRITE("S3_1_C15_C1_0", saved_PMCR[cpuid][1] | 0x30303);
}
static uint64_t
case 3: /* FALLTHROUGH */
case 4: /* FALLTHROUGH */
case 5:
- pmesr = PMESR_EVT_DECODE(SREG_READ(SREG_PMESR0), counter, 2);
+ pmesr = PMESR_EVT_DECODE(SREG_READ("S3_1_C15_C5_0"), counter, 2);
break;
case 6: /* FALLTHROUGH */
case 7:
case 8: /* FALLTHROUGH */
case 9:
#endif
- pmesr = PMESR_EVT_DECODE(SREG_READ(SREG_PMESR1), counter, 6);
+ pmesr = PMESR_EVT_DECODE(SREG_READ("S3_1_C15_C6_0"), counter, 6);
break;
default:
pmesr = 0;
kpc_config_t config = pmesr;
- uint64_t pmcr1 = SREG_READ(SREG_PMCR1);
+ uint64_t pmcr1 = SREG_READ("S3_1_C15_C1_0");
if (pmcr1 & PMCR1_EL0_A32_ENABLE_MASK(counter)) {
config |= CFGWORD_EL0A32EN_MASK;
case 3: /* FALLTHROUGH */
case 4: /* FALLTHROUGH */
case 5:
- pmesr = SREG_READ(SREG_PMESR0);
+ pmesr = SREG_READ("S3_1_C15_C5_0");
pmesr &= PMESR_EVT_CLEAR(counter, 2);
pmesr |= PMESR_EVT_ENCODE(config, counter, 2);
- SREG_WRITE(SREG_PMESR0, pmesr);
+ SREG_WRITE("S3_1_C15_C5_0", pmesr);
saved_PMESR[cpuid][0] = pmesr;
break;
case 8: /* FALLTHROUGH */
case 9:
#endif
- pmesr = SREG_READ(SREG_PMESR1);
+ pmesr = SREG_READ("S3_1_C15_C6_0");
pmesr &= PMESR_EVT_CLEAR(counter, 6);
pmesr |= PMESR_EVT_ENCODE(config, counter, 6);
- SREG_WRITE(SREG_PMESR1, pmesr);
+ SREG_WRITE("S3_1_C15_C6_0", pmesr);
saved_PMESR[cpuid][1] = pmesr;
break;
default:
*/
#include <machine/asm.h>
+#include <arm64/machine_machdep.h>
#include <arm64/machine_routines_asm.h>
#include <arm64/proc_reg.h>
#include <pexpert/arm64/board_config.h>
.macro COMPARE_BRANCH_FUSION
#if defined(APPLE_ARM64_ARCH_FAMILY)
- mrs $1, ARM64_REG_HID1
+ mrs $1, HID1
.if $0 == CBF_DISABLE
orr $1, $1, ARM64_REG_HID1_disCmpBrFusion
.else
mov $2, ARM64_REG_HID1_disCmpBrFusion
bic $1, $1, $2
.endif
- msr ARM64_REG_HID1, $1
+ msr HID1, $1
.if $0 == CBF_DISABLE
isb sy
.endif
ARM64_IS_PCORE x12 // if we're not a pCORE, also do nothing
cbz x12, 1f
-#endif
-
-#if defined(APPLELIGHTNING) || defined(APPLEFIRESTORM)
-
- mrs x12, ARM64_REG_HID1 // if any debug session ever existed, set forceNexL3ClkOn
+ mrs x12, HID1 // if any debug session ever existed, set forceNexL3ClkOn
orr x12, x12, ARM64_REG_HID1_forceNexL3ClkOn
- msr ARM64_REG_HID1, x12
+ msr HID1, x12
1:
#endif
MRS(local_mpidr, "MPIDR_EL1");
if (MPIDR_CLUSTER_ID(local_mpidr) == MPIDR_CLUSTER_ID(cpu_mpidr)) {
uint64_t x = type | MPIDR_CPU_ID(cpu_mpidr);
- MSR(ARM64_REG_IPI_RR_LOCAL, x);
+ MSR("S3_5_C15_C0_0", x);
} else {
#define IPI_RR_TARGET_CLUSTER_SHIFT 16
uint64_t x = type | (MPIDR_CLUSTER_ID(cpu_mpidr) << IPI_RR_TARGET_CLUSTER_SHIFT) | MPIDR_CPU_ID(cpu_mpidr);
- MSR(ARM64_REG_IPI_RR_GLOBAL, x);
+ MSR("S3_5_C15_C0_1", x);
}
#else
uint64_t x = type | MPIDR_CPU_ID(cpu_mpidr);
- MSR(ARM64_REG_IPI_RR, x);
+ MSR("S3_5_C15_C0_1", x);
#endif
}
#endif
/* update deferred_ipi_timer_ns with the new clamped value */
absolutetime_to_nanoseconds(abstime, &deferred_ipi_timer_ns);
- MSR(ARM64_REG_IPI_CR, abstime);
+ MSR("S3_5_C15_C3_1", abstime);
#else
(void)nanosecs;
panic("Platform does not support ACC Fast IPI");
return Shutdown_context(doshutdown, processor);
}
+
/*
* Routine: ml_init_lock_timeout
* Function:
}
MutexSpin = abstime;
low_MutexSpin = MutexSpin;
+
+
/*
* high_MutexSpin should be initialized as low_MutexSpin * real_ncpus, but
* real_ncpus is not set at this time
nanoseconds_to_absolutetime(MAX_WFE_HINT_INTERVAL_US * NSEC_PER_USEC, &ml_wfe_hint_max_interval);
}
+/*
+ * This is called when all of the ml_processor_info_t structures have been
+ * initialized and all the processors have been started through processor_start().
+ *
+ * Required by the scheduler subsystem.
+ */
+void
+ml_cpu_init_completed(void)
+{
+}
+
/*
* This is called from the machine-independent routine cpu_up()
* to perform machine-dependent info updates.
#endif
}
-static boolean_t
-ml_parse_interrupt_prop(const DTEntry entry, ml_topology_cpu_t *cpu)
-{
- uint32_t const *prop;
- unsigned int propSize;
-
- if (SecureDTGetProperty(entry, "interrupts", (void const **)&prop, &propSize) != kSuccess) {
- return FALSE;
- }
-
- if (propSize == sizeof(uint32_t) * 1) {
- cpu->pmi_irq = prop[0];
- return TRUE;
- } else if (propSize == sizeof(uint32_t) * 3) {
- cpu->self_ipi_irq = prop[0];
- cpu->pmi_irq = prop[1];
- cpu->other_ipi_irq = prop[2];
- return TRUE;
- } else {
- return FALSE;
- }
-}
-
void
ml_parse_cpu_topology(void)
{
cpu->l3_cache_size = (uint32_t)ml_readprop(child, "l3-cache-size", 0);
cpu->l3_cache_id = (uint32_t)ml_readprop(child, "l3-cache-id", 0);
- ml_parse_interrupt_prop(child, cpu);
ml_read_reg_range(child, "cpu-uttdbg-reg", &cpu->cpu_UTTDBG_pa, &cpu->cpu_UTTDBG_len);
ml_read_reg_range(child, "cpu-impl-reg", &cpu->cpu_IMPL_pa, &cpu->cpu_IMPL_len);
ml_read_reg_range(child, "coresight-reg", &cpu->coresight_pa, &cpu->coresight_len);
this_cpu_datap->cluster_master = is_boot_cpu;
#endif /* HAS_CLUSTER */
+#if !defined(RC_HIDE_XNU_FIRESTORM) && (MAX_CPU_CLUSTERS > 2)
+ {
+ /* Workaround for the existing scheduler
+ * code, which only supports a limited number of psets.
+ *
+ * To get around that limitation, we distribute all cores into
+ * two psets according to their cluster type, instead of
+ * having a dedicated pset per cluster ID.
+ */
+
+ pset_cluster_type_t pset_cluster_type;
+
+ /* For this workaround, we don't expect seeing anything else
+ * than E or P clusters. */
+ switch (in_processor_info->cluster_type) {
+ case CLUSTER_TYPE_E:
+ pset_cluster_type = PSET_AMP_E;
+ break;
+ case CLUSTER_TYPE_P:
+ pset_cluster_type = PSET_AMP_P;
+ break;
+ default:
+ panic("unknown/unsupported cluster type %d", in_processor_info->cluster_type);
+ }
+
+ pset = pset_find_first_by_cluster_type(pset_cluster_type);
+
+ if (pset == NULL) {
+ panic("no pset for cluster type %d/%d", in_processor_info->cluster_type, pset_cluster_type);
+ }
+
+ kprintf("%s>chosen pset with cluster id %d cluster type %d for core:\n",
+ __FUNCTION__, pset->pset_cluster_id, pset->pset_cluster_type);
+ }
+#else /* !defined(RC_HIDE_XNU_FIRESTORM) && (MAX_CPU_CLUSTERS > 2) */
pset = pset_find(in_processor_info->cluster_id, processor_pset(master_processor));
+#endif /* !defined(RC_HIDE_XNU_FIRESTORM) && (MAX_CPU_CLUSTERS > 2) */
assert(pset != NULL);
kprintf("%s>cpu_id %p cluster_id %d cpu_number %d is type %d\n", __FUNCTION__, in_processor_info->cpu_id, in_processor_info->cluster_id, this_cpu_datap->cpu_number, in_processor_info->cluster_type);
void
ml_static_mfree(
vm_offset_t vaddr,
- vm_size_t size)
+ vm_size_t size)
{
- vm_offset_t vaddr_cur;
- ppnum_t ppn;
- uint32_t freed_pages = 0;
- uint32_t freed_kernelcache_pages = 0;
+ vm_offset_t vaddr_cur;
+ ppnum_t ppn;
+ uint32_t freed_pages = 0;
+ uint32_t bad_page_cnt = 0;
+ uint32_t freed_kernelcache_pages = 0;
+
+#if defined(__arm64__) && (DEVELOPMENT || DEBUG)
+ /* For testing hitting a bad ram page */
+ static int count = 0;
+ static int bad_at_cnt = -1;
+ static bool first = true;
+
+ if (first) {
+ (void)PE_parse_boot_argn("bad_static_mfree", &bad_at_cnt, sizeof(bad_at_cnt));
+ first = false;
+ }
+#endif /* defined(__arm64__) && (DEVELOPMENT || DEBUG) */
/* It is acceptable (if bad) to fail to free. */
if (vaddr < VM_MIN_KERNEL_ADDRESS) {
panic("Failed ml_static_mfree on %p", (void *) vaddr_cur);
}
+#if defined(__arm64__)
+ bool is_bad = pmap_is_bad_ram(ppn);
+#if DEVELOPMENT || DEBUG
+ is_bad |= (count++ == bad_at_cnt);
+#endif /* DEVELOPMENT || DEBUG */
+
+ if (is_bad) {
+ ++bad_page_cnt;
+ vm_page_create_retired(ppn);
+ continue;
+ }
+#endif /* defined(__arm64__) */
+
vm_page_create(ppn, (ppn + 1));
freed_pages++;
if (vaddr_cur >= segLOWEST && vaddr_cur < end_kern) {
vm_page_kernelcache_count -= freed_kernelcache_pages;
vm_page_unlock_queues();
#if DEBUG
- kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x\n", freed_pages, (void *)vaddr, (uint64_t)size, ppn);
+ kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x, +%d bad\n", freed_pages, (void *)vaddr, (uint64_t)size, ppn, bad_page_cnt);
#endif
}
}
static void
-cache_trap_recover()
+cache_trap_recover(void)
{
vm_map_address_t fault_addr;
set_cache_trap_recover(thread_t thread)
{
#if defined(HAS_APPLE_PAC)
- thread->recover = (vm_address_t)ptrauth_auth_and_resign(&cache_trap_recover,
+ void *fun = &cache_trap_recover;
+ thread->recover = (vm_address_t)ptrauth_auth_and_resign(fun,
ptrauth_key_function_pointer, 0,
ptrauth_key_function_pointer, ptrauth_blend_discriminator(&thread->recover, PAC_DISCRIMINATOR_RECOVER));
#else /* defined(HAS_APPLE_PAC) */
#include "assym.s"
+#if defined(HAS_APPLE_PAC)
+
+
+.macro LOAD_CPU_JOP_KEY dst, tmp
+ mrs \tmp, TPIDR_EL1
+ ldr \tmp, [\tmp, ACT_CPUDATAP]
+ ldr \dst, [\tmp, CPU_JOP_KEY]
+.endmacro
+
+/*
+ * uint64_t ml_enable_user_jop_key(uint64_t user_jop_key)
+ */
+ .align 2
+ .globl EXT(ml_enable_user_jop_key)
+LEXT(ml_enable_user_jop_key)
+
+/*
+ * void ml_disable_user_jop_key(uint64_t user_jop_key, uint64_t saved_jop_state)
+ */
+ .align 2
+ .globl EXT(ml_disable_user_jop_key)
+LEXT(ml_disable_user_jop_key)
+
+#endif /* defined(HAS_APPLE_PAC) */
#if HAS_BP_RET
add x14, x14, EXT(bp_ret)@pageoff
ldr w14, [x14]
- mrs x13, ARM64_REG_ACC_CFG
+ mrs x13, CPU_CFG
and x13, x13, (~(ARM64_REG_ACC_CFG_bpSlp_mask << ARM64_REG_ACC_CFG_bpSlp_shift))
and x14, x14, #(ARM64_REG_ACC_CFG_bpSlp_mask)
orr x13, x13, x14, lsl #(ARM64_REG_ACC_CFG_bpSlp_shift)
- msr ARM64_REG_ACC_CFG, x13
+ msr CPU_CFG, x13
ret
#endif // HAS_BP_RET
cbz x14, Lnex_pg_done
// Set the SEG-recommended value of 12 additional reset cycles
- HID_INSERT_BITS ARM64_REG_HID13, ARM64_REG_HID13_RstCyc_mask, ARM64_REG_HID13_RstCyc_val, x13
- HID_SET_BITS ARM64_REG_HID14, ARM64_REG_HID14_NexPwgEn, x13
+ HID_INSERT_BITS HID13, ARM64_REG_HID13_RstCyc_mask, ARM64_REG_HID13_RstCyc_val, x13
+ HID_SET_BITS HID14, ARM64_REG_HID14_NexPwgEn, x13
Lnex_pg_done:
ret
#else
#if defined(HAS_VMSA_LOCK)
#if DEBUG || DEVELOPMENT
- mrs x1, ARM64_REG_VMSA_LOCK_EL1
+ mrs x1, VMSA_LOCK_EL1
and x1, x1, #(VMSA_LOCK_TTBR1_EL1)
cbnz x1, L_set_locked_reg_panic
#endif /* DEBUG || DEVELOPMENT */
mov x0, #(VMSA_LOCK_TTBR1_EL1 | VMSA_LOCK_TCR_EL1 | VMSA_LOCK_VBAR_EL1)
#endif
orr x0, x0, x1
- msr ARM64_REG_VMSA_LOCK_EL1, x0
+ msr VMSA_LOCK_EL1, x0
isb sy
ret
#endif /* defined(HAS_VMSA_LOCK) */
#if defined(HAS_VMSA_LOCK)
#if DEBUG || DEVELOPMENT
// assert TCR unlocked
- mrs x1, ARM64_REG_VMSA_LOCK_EL1
+ mrs x1, VMSA_LOCK_EL1
and x1, x1, #(VMSA_LOCK_TCR_EL1)
cbnz x1, L_set_locked_reg_panic
#endif /* DEBUG || DEVELOPMENT */
#if defined(APPLETYPHOON)
// <rdar://problem/15827409>
- HID_SET_BITS ARM64_REG_HID2, ARM64_REG_HID2_disMMUmtlbPrefetch, x9
+ HID_SET_BITS HID2, ARM64_REG_HID2_disMMUmtlbPrefetch, x9
dsb sy
isb sy
#endif
#if HAS_CLUSTER
cbnz x0, 1f // Skip if deep_sleep == true
// Mask FIQ and IRQ to avoid spurious wakeups
- mrs x9, ARM64_REG_CYC_OVRD
+ mrs x9, CPU_OVRD
and x9, x9, #(~(ARM64_REG_CYC_OVRD_irq_mask | ARM64_REG_CYC_OVRD_fiq_mask))
mov x10, #(ARM64_REG_CYC_OVRD_irq_disable | ARM64_REG_CYC_OVRD_fiq_disable)
orr x9, x9, x10
- msr ARM64_REG_CYC_OVRD, x9
+ msr CPU_OVRD, x9
isb
1:
#endif
cbz x0, 1f // Skip if deep_sleep == false
#if __ARM_GLOBAL_SLEEP_BIT__
// Enable deep sleep
- mrs x1, ARM64_REG_ACC_OVRD
+ mrs x1, ACC_OVRD
orr x1, x1, #(ARM64_REG_ACC_OVRD_enDeepSleep)
and x1, x1, #(~(ARM64_REG_ACC_OVRD_disL2Flush4AccSlp_mask))
orr x1, x1, #( ARM64_REG_ACC_OVRD_disL2Flush4AccSlp_deepsleep)
#if HAS_RETENTION_STATE
orr x1, x1, #(ARM64_REG_ACC_OVRD_disPioOnWfiCpu)
#endif
- msr ARM64_REG_ACC_OVRD, x1
+ msr ACC_OVRD, x1
+#if defined(APPLEMONSOON)
+ // Skye has an ACC_OVRD register for EBLK and PBLK. Same bitfield layout for these bits
+ mrs x1, EBLK_OVRD
+ orr x1, x1, #(ARM64_REG_ACC_OVRD_enDeepSleep)
+ and x1, x1, #(~(ARM64_REG_ACC_OVRD_disL2Flush4AccSlp_mask))
+ orr x1, x1, #( ARM64_REG_ACC_OVRD_disL2Flush4AccSlp_deepsleep)
+ and x1, x1, #(~(ARM64_REG_ACC_OVRD_ok2PwrDnSRM_mask))
+ orr x1, x1, #( ARM64_REG_ACC_OVRD_ok2PwrDnSRM_deepsleep)
+ and x1, x1, #(~(ARM64_REG_ACC_OVRD_ok2TrDnLnk_mask))
+ orr x1, x1, #( ARM64_REG_ACC_OVRD_ok2TrDnLnk_deepsleep)
+ and x1, x1, #(~(ARM64_REG_ACC_OVRD_ok2PwrDnCPM_mask))
+ orr x1, x1, #( ARM64_REG_ACC_OVRD_ok2PwrDnCPM_deepsleep)
+ msr EBLK_OVRD, x1
+
+#endif
#else
+#if defined(APPLETYPHOON) || defined(APPLETWISTER)
// Enable deep sleep
mov x1, ARM64_REG_CYC_CFG_deepSleep
- msr ARM64_REG_CYC_CFG, x1
+ msr CPU_CFG, x1
+#endif
#endif
1:
// Set "OK to power down" (<rdar://problem/12390433>)
- mrs x9, ARM64_REG_CYC_OVRD
+ mrs x9, CPU_OVRD
orr x9, x9, #(ARM64_REG_CYC_OVRD_ok2pwrdn_force_down)
#if HAS_RETENTION_STATE
orr x9, x9, #(ARM64_REG_CYC_OVRD_disWfiRetn)
#endif
- msr ARM64_REG_CYC_OVRD, x9
+ msr CPU_OVRD, x9
#if defined(APPLEMONSOON) || defined(APPLEVORTEX)
ARM64_IS_PCORE x9
mrs x9, MIDR_EL1
EXEC_COREALL_REVLO CPU_VERSION_B0, x9, x10
#endif
- mrs x9, ARM64_REG_HID10
+ mrs x9, HID10
orr x9, x9, #(ARM64_REG_HID10_DisHwpGups)
- msr ARM64_REG_HID10, x9
+ msr HID10, x9
isb sy
and x9, x9, #(~(ARM64_REG_HID10_DisHwpGups))
- msr ARM64_REG_HID10, x9
+ msr HID10, x9
isb sy
#endif
EXEC_END
ARM64_STACK_PROLOG
PUSH_FRAME
- mrs x0, ARM64_REG_CYC_OVRD
+ mrs x0, CPU_OVRD
orr x0, x0, #(ARM64_REG_CYC_OVRD_ok2pwrdn_force_up)
- msr ARM64_REG_CYC_OVRD, x0
+ msr CPU_OVRD, x0
POP_FRAME
ARM64_STACK_EPILOG
PUSH_FRAME
// <rdar://problem/15827409>
- HID_SET_BITS ARM64_REG_HID2, ARM64_REG_HID2_disMMUmtlbPrefetch, x0
+ HID_SET_BITS HID2, ARM64_REG_HID2_disMMUmtlbPrefetch, x0
dsb sy
isb sy
PUSH_FRAME
// <rdar://problem/15827409>
- HID_CLEAR_BITS ARM64_REG_HID2, ARM64_REG_HID2_disMMUmtlbPrefetch, x0
+ HID_CLEAR_BITS HID2, ARM64_REG_HID2_disMMUmtlbPrefetch, x0
dsb sy
isb sy
cmp x0, #1
b.ne cpu_defeatures_set_ret
LOAD_UINT64 x1, HID0_DEFEATURES_1
- mrs x0, ARM64_REG_HID0
+ mrs x0, HID0
orr x0, x0, x1
- msr ARM64_REG_HID0, x0
+ msr HID0, x0
LOAD_UINT64 x1, HID1_DEFEATURES_1
- mrs x0, ARM64_REG_HID1
+ mrs x0, HID1
orr x0, x0, x1
- msr ARM64_REG_HID1, x0
+ msr HID1, x0
LOAD_UINT64 x1, HID2_DEFEATURES_1
- mrs x0, ARM64_REG_HID2
+ mrs x0, HID2
orr x0, x0, x1
- msr ARM64_REG_HID2, x0
+ msr HID2, x0
LOAD_UINT64 x1, HID3_DEFEATURES_1
- mrs x0, ARM64_REG_HID3
+ mrs x0, HID3
orr x0, x0, x1
- msr ARM64_REG_HID3, x0
+ msr HID3, x0
LOAD_UINT64 x1, HID4_DEFEATURES_1
- mrs x0, ARM64_REG_HID4
+ mrs x0, S3_0_C15_C4_0
orr x0, x0, x1
- msr ARM64_REG_HID4, x0
+ msr S3_0_C15_C4_0, x0
LOAD_UINT64 x1, HID7_DEFEATURES_1
- mrs x0, ARM64_REG_HID7
+ mrs x0, HID7
orr x0, x0, x1
- msr ARM64_REG_HID7, x0
+ msr HID7, x0
dsb sy
isb sy
b cpu_defeatures_set_ret
cpu_defeatures_set_2:
LOAD_UINT64 x1, HID0_DEFEATURES_2
- mrs x0, ARM64_REG_HID0
+ mrs x0, HID0
orr x0, x0, x1
- msr ARM64_REG_HID0, x0
+ msr HID0, x0
LOAD_UINT64 x1, HID1_DEFEATURES_2
- mrs x0, ARM64_REG_HID1
+ mrs x0, HID1
orr x0, x0, x1
- msr ARM64_REG_HID1, x0
+ msr HID1, x0
LOAD_UINT64 x1, HID2_DEFEATURES_2
- mrs x0, ARM64_REG_HID2
+ mrs x0, HID2
orr x0, x0, x1
- msr ARM64_REG_HID2, x0
+ msr HID2, x0
LOAD_UINT64 x1, HID3_DEFEATURES_2
- mrs x0, ARM64_REG_HID3
+ mrs x0, HID3
orr x0, x0, x1
- msr ARM64_REG_HID3, x0
+ msr HID3, x0
LOAD_UINT64 x1, HID4_DEFEATURES_2
- mrs x0, ARM64_REG_HID4
+ mrs x0, S3_0_C15_C4_0
orr x0, x0, x1
- msr ARM64_REG_HID4, x0
+ msr S3_0_C15_C4_0, x0
LOAD_UINT64 x1, HID7_DEFEATURES_2
- mrs x0, ARM64_REG_HID7
+ mrs x0, HID7
orr x0, x0, x1
- msr ARM64_REG_HID7, x0
+ msr HID7, x0
dsb sy
isb sy
b cpu_defeatures_set_ret
__BEGIN_DECLS
-#define PMCR0 "s3_1_c15_c0_0"
-
/* set by hardware if a PMI was delivered */
#define PMCR0_PMAI (UINT64_C(1) << 11)
#define PMCR0_PMI(REG) ((REG) & PMCR0_PMAI)
#if HAS_UNCORE_CTRS
-#define UPMSR "s3_7_c15_c6_4"
#define UPMSR_PMI(REG) ((REG) & 0x1)
#endif /* HAS_UNCORE_CTRS */
mt_pmi_pending(uint64_t * restrict pmcr0_out,
uint64_t * restrict upmsr_out)
{
- uint64_t pmcr0 = __builtin_arm_rsr64(PMCR0);
+ uint64_t pmcr0 = __builtin_arm_rsr64("PMCR0_EL1");
bool pmi = PMCR0_PMI(pmcr0);
if (pmi) {
/*
* Acknowledge the PMI by clearing the pmai bit.
*/
- __builtin_arm_wsr64(PMCR0, pmcr0 & ~PMCR0_PMAI);
+ __builtin_arm_wsr64("PMCR0_EL1", pmcr0 & ~PMCR0_PMAI);
}
*pmcr0_out = pmcr0;
#if HAS_UNCORE_CTRS
extern bool mt_uncore_enabled;
if (mt_uncore_enabled) {
- uint64_t upmsr = __builtin_arm_rsr64(UPMSR);
+ uint64_t upmsr = __builtin_arm_rsr64("UPMSR_EL1");
if (UPMSR_PMI(upmsr)) {
pmi = true;
}
*
* PMC2+ are currently handled by kpc.
*/
-
-#define PMC0 "s3_2_c15_c0_0"
-#define PMC1 "s3_2_c15_c1_0"
-#define PMC2 "s3_2_c15_c2_0"
-#define PMC3 "s3_2_c15_c3_0"
-#define PMC4 "s3_2_c15_c4_0"
-#define PMC5 "s3_2_c15_c5_0"
-#define PMC6 "s3_2_c15_c6_0"
-#define PMC7 "s3_2_c15_c7_0"
-
#define PMC_0_7(X, A) X(0, A); X(1, A); X(2, A); X(3, A); X(4, A); X(5, A); \
X(6, A); X(7, A)
#if CORE_NCTRS > 8
-#define PMC8 "s3_2_c15_c9_0"
-#define PMC9 "s3_2_c15_c10_0"
#define PMC_8_9(X, A) X(8, A); X(9, A)
#else // CORE_NCTRS > 8
#define PMC_8_9(X, A)
/*
* PMCR1 controls which execution modes count events.
*/
-
-#define PMCR1 "s3_1_c15_c1_0"
-
#define PMCR1_EL0A32_EN(CTR) (UINT64_C(1) << (0 + CTR_POS(CTR)))
#define PMCR1_EL0A64_EN(CTR) (UINT64_C(1) << (8 + CTR_POS(CTR)))
#define PMCR1_EL1A64_EN(CTR) (UINT64_C(1) << (16 + CTR_POS(CTR)))
{
uint64_t pmcr1;
- pmcr1 = __builtin_arm_rsr64(PMCR1);
+ pmcr1 = __builtin_arm_rsr64("PMCR1_EL1");
pmcr1 |= PMCR1_INIT;
- __builtin_arm_wsr64(PMCR1, pmcr1);
+ __builtin_arm_wsr64("PMCR1_EL1", pmcr1);
}
-/*
- * PMCR2 controls watchpoint registers.
- *
- * PMCR3 controls breakpoints and address matching.
- *
- * PMCR4 controls opcode matching.
- */
-
-#define PMCR2 "s3_1_c15_c2_0"
-#define PMCR3 "s3_1_c15_c3_0"
-#define PMCR4 "s3_1_c15_c4_0"
-
-#define PMSR "s3_1_c15_c13_0"
-
#define PMSR_OVF(CTR) (1ULL << (CTR))
-#define PMESR0 "S3_1_c15_c5_0"
-#define PMESR1 "S3_1_c15_c6_0"
-
static int
core_init(__unused mt_device_t dev)
{
mt_core_snap(unsigned int ctr)
{
switch (ctr) {
-#define PMC_RD(CTR, UNUSED) case (CTR): return __builtin_arm_rsr64(PMC ## CTR)
+#define PMC_RD(CTR, UNUSED) case (CTR): return __builtin_arm_rsr64(__MSR_STR(PMC ## CTR))
PMC_ALL(PMC_RD, 0);
#undef PMC_RD
default:
{
switch (ctr) {
case 0:
- __builtin_arm_wsr64(PMC0, count);
+ __builtin_arm_wsr64("PMC0", count);
break;
case 1:
- __builtin_arm_wsr64(PMC1, count);
+ __builtin_arm_wsr64("PMC1", count);
break;
default:
panic("monotonic: invalid core counter %u write %llu", ctr, count);
static void
core_set_enabled(void)
{
- uint64_t pmcr0 = __builtin_arm_rsr64(PMCR0);
+ uint64_t pmcr0 = __builtin_arm_rsr64("PMCR0_EL1");
pmcr0 |= PMCR0_INIT | PMCR0_FIXED_EN;
if (kpc_get_running() & KPC_CLASS_CONFIGURABLE_MASK) {
pmcr0 |= kpc_ctrs;
}
- __builtin_arm_wsr64(PMCR0, pmcr0);
+ __builtin_arm_wsr64("PMCR0_EL1", pmcr0);
#if MACH_ASSERT
/*
* Only check for the values that were ORed in.
*/
- uint64_t pmcr0_check = __builtin_arm_rsr64(PMCR0);
+ uint64_t pmcr0_check = __builtin_arm_rsr64("PMCR0_EL1");
if ((pmcr0_check & (PMCR0_INIT | PMCR0_FIXED_EN)) != (PMCR0_INIT | PMCR0_FIXED_EN)) {
panic("monotonic: hardware ignored enable (read %llx, wrote %llx)",
pmcr0_check, pmcr0);
assert(ml_get_interrupts_enabled() == FALSE);
#if DEBUG
- uint64_t pmcr0 = __builtin_arm_rsr64(PMCR0);
+ uint64_t pmcr0 = __builtin_arm_rsr64("PMCR0_EL1");
if ((pmcr0 & PMCR0_FIXED_EN) == 0) {
panic("monotonic: counters disabled before idling, pmcr0 = 0x%llx\n", pmcr0);
}
- uint64_t pmcr1 = __builtin_arm_rsr64(PMCR1);
+ uint64_t pmcr1 = __builtin_arm_rsr64("PMCR1_EL1");
if ((pmcr1 & PMCR1_INIT) == 0) {
panic("monotonic: counter modes disabled before idling, pmcr1 = 0x%llx\n", pmcr1);
}
#endif /* DEBUG */
/* disable counters before updating */
- __builtin_arm_wsr64(PMCR0, PMCR0_INIT);
+ __builtin_arm_wsr64("PMCR0_EL1", PMCR0_INIT);
mt_update_fixed_counts();
}
#define UPMSR_OVF(R, CTR) ((R) >> ((CTR) + UPMSR_OVF_POS) & 0x1)
#define UPMSR_OVF_MASK (((UINT64_C(1) << UNCORE_NCTRS) - 1) << UPMSR_OVF_POS)
-#define UPMPCM "s3_7_c15_c5_4"
#define UPMPCM_CORE(ID) (UINT64_C(1) << (ID))
/*
* UPMCR0 controls which counters are enabled and how interrupts are generated
* for overflows.
*/
-#define UPMCR0 "s3_7_c15_c0_4"
- __builtin_arm_wsr64(UPMCR0, UPMCR0_INIT | enctrmask);
+ __builtin_arm_wsr64("UPMCR0_EL1", UPMCR0_INIT | enctrmask);
}
#if UNCORE_PER_CLUSTER
* would be indexing into an array of strings.
*/
-#define UPMC0 "s3_7_c15_c7_4"
-#define UPMC1 "s3_7_c15_c8_4"
-#define UPMC2 "s3_7_c15_c9_4"
-#define UPMC3 "s3_7_c15_c10_4"
-#define UPMC4 "s3_7_c15_c11_4"
-#define UPMC5 "s3_7_c15_c12_4"
-#define UPMC6 "s3_7_c15_c13_4"
-#define UPMC7 "s3_7_c15_c14_4"
-#if UNCORE_NCTRS > 8
-#define UPMC8 "s3_7_c15_c0_5"
-#define UPMC9 "s3_7_c15_c1_5"
-#define UPMC10 "s3_7_c15_c2_5"
-#define UPMC11 "s3_7_c15_c3_5"
-#define UPMC12 "s3_7_c15_c4_5"
-#define UPMC13 "s3_7_c15_c5_5"
-#define UPMC14 "s3_7_c15_c6_5"
-#define UPMC15 "s3_7_c15_c7_5"
-#endif /* UNCORE_NCTRS > 8 */
-
#define UPMC_0_7(X, A) X(0, A); X(1, A); X(2, A); X(3, A); X(4, A); X(5, A); \
X(6, A); X(7, A)
#if UNCORE_NCTRS <= 8
{
assert(ctr < UNCORE_NCTRS);
switch (ctr) {
-#define UPMC_RD(CTR, UNUSED) case (CTR): return __builtin_arm_rsr64(UPMC ## CTR)
+#define UPMC_RD(CTR, UNUSED) case (CTR): return __builtin_arm_rsr64(__MSR_STR(UPMC ## CTR))
UPMC_ALL(UPMC_RD, 0);
#undef UPMC_RD
default:
assert(ctr < UNCORE_NCTRS);
switch (ctr) {
#define UPMC_WR(CTR, COUNT) case (CTR): \
- return __builtin_arm_wsr64(UPMC ## CTR, (COUNT))
+ return __builtin_arm_wsr64(__MSR_STR(UPMC ## CTR), (COUNT))
UPMC_ALL(UPMC_WR, count);
#undef UPMC_WR
default:
* UPMESR[01] is the event selection register that determines which event a
* counter will count.
*/
-#define UPMESR0 "s3_7_c15_c1_4"
- CTRL_REG_SET(UPMESR0, uncore_config.uc_events.uce_regs[0]);
+ CTRL_REG_SET("UPMESR0_EL1", uncore_config.uc_events.uce_regs[0]);
#if UNCORE_NCTRS > 8
-#define UPMESR1 "s3_7_c15_c11_5"
- CTRL_REG_SET(UPMESR1, uncore_config.uc_events.uce_regs[1]);
+ CTRL_REG_SET("UPMESR1_EL1", uncore_config.uc_events.uce_regs[1]);
#endif /* UNCORE_NCTRS > 8 */
/*
* has a CPU ID of 4, it might be the first CPU in a cluster. Shift the
* registers right by the ID of the first CPU in the cluster.
*/
-#define UPMECM0 "s3_7_c15_c3_4"
-#define UPMECM1 "s3_7_c15_c4_4"
-
- CTRL_REG_SET(UPMECM0,
+ CTRL_REG_SET("UPMECM0_EL1",
uncore_config.uc_cpu_masks[monid].uccm_regs[0]);
- CTRL_REG_SET(UPMECM1,
+ CTRL_REG_SET("UPMECM1_EL1",
uncore_config.uc_cpu_masks[monid].uccm_regs[1]);
#if UNCORE_NCTRS > 8
-#define UPMECM2 "s3_7_c15_c8_5"
-#define UPMECM3 "s3_7_c15_c9_5"
-
- CTRL_REG_SET(UPMECM2,
+ CTRL_REG_SET("UPMECM2_EL1",
uncore_config.uc_cpu_masks[monid].uccm_regs[2]);
- CTRL_REG_SET(UPMECM3,
+ CTRL_REG_SET("UPMECM3_EL1",
uncore_config.uc_cpu_masks[monid].uccm_regs[3]);
#endif /* UNCORE_NCTRS > 8 */
}
static void
uncmon_clear_int_locked_l(__unused unsigned int monid)
{
- __builtin_arm_wsr64(UPMSR, 0);
+ __builtin_arm_wsr64("UPMSR_EL1", 0);
}
#if UNCORE_PER_CLUSTER
* UPMPCM defines the PMI core mask for the UPMCs -- which cores should
* receive interrupts on overflow.
*/
- CTRL_REG_SET(UPMPCM, uncmon_get_pmi_mask(monid));
+ CTRL_REG_SET("UPMPCM_EL1", uncmon_get_pmi_mask(monid));
uncmon_set_counting_locked_l(monid,
mt_uncore_enabled ? uncore_active_ctrs : 0);
}
#endif /* UNCORE_PER_CLUSTER */
struct uncore_monitor *mon = &uncore_monitors[monid];
- lck_spin_init(&mon->um_lock, mt_lock_grp, NULL);
+ lck_spin_init(&mon->um_lock, &mt_lock_grp, LCK_ATTR_NULL);
int intrs_en = uncmon_lock(mon);
if (monid != curmonid) {
assert(cpu != NULL);
assert(ml_get_interrupts_enabled() == FALSE);
- __builtin_arm_wsr64(PMCR0, PMCR0_INIT);
+ __builtin_arm_wsr64("PMCR0_EL1", PMCR0_INIT);
/*
* Ensure the CPMU has flushed any increments at this point, so PMSR is up
* to date.
#pragma unused(pmcr0)
#endif /* !MONOTONIC_DEBUG */
- uint64_t pmsr = __builtin_arm_rsr64(PMSR);
+ uint64_t pmsr = __builtin_arm_rsr64("PMSR_EL1");
#if MONOTONIC_DEBUG
printf("monotonic: cpu = %d, PMSR = 0x%llx, PMCR0 = 0x%llx\n",
}
#if MACH_ASSERT
- uint64_t pmsr_after_handling = __builtin_arm_rsr64(PMSR);
+ uint64_t pmsr_after_handling = __builtin_arm_rsr64("PMSR_EL1");
if (pmsr_after_handling != 0) {
unsigned int first_ctr_ovf = __builtin_ffsll(pmsr_after_handling) - 1;
uint64_t count = 0;
panic("monotonic: PMI status not cleared on exit from handler, "
"PMSR = 0x%llx HANDLE -> -> 0x%llx, handled 0x%llx, "
"PMCR0 = 0x%llx, PMC%d = 0x%llx%s", pmsr, pmsr_after_handling,
- handled, __builtin_arm_rsr64(PMCR0), first_ctr_ovf, count, extra);
+ handled, __builtin_arm_rsr64("PMCR0_EL1"), first_ctr_ovf, count, extra);
}
#endif /* MACH_ASSERT */
panic("monotonic: PMI from IOCPU %p delivered to %p", source,
curcpu->interrupt_nub);
}
- mt_cpu_pmi(curcpu, __builtin_arm_rsr64(PMCR0));
+ mt_cpu_pmi(curcpu, __builtin_arm_rsr64("PMCR0_EL1"));
}
#endif /* CPMU_AIC_PMI */
{
cpu_data_t *cpu = getCpuDatap();
- __builtin_arm_wsr64(PMCR0, PMCR0_INIT);
+ __builtin_arm_wsr64("PMCR0_EL1", PMCR0_INIT);
for (int i = 0; i < MT_CORE_NFIXED; i++) {
uint64_t count = mt_cpu_update_count(cpu, i);
log_t *logs; // Protect
uint32_t size; // Protect
uint64_t rdidx, wridx; // Protect
- decl_simple_lock_data(, loglock);
uint64_t id;
uint32_t option;
uint32_t bytes;
queue_head_t probes; // Protect
+} pgtrace;
- lck_grp_t *lock_grp;
- lck_grp_attr_t *lock_grp_attr;
- lck_attr_t *lock_attr;
- lck_mtx_t probelock;
-} pgtrace = {};
+static LCK_GRP_DECLARE(pgtrace_lock_grp, "pgtrace_lock");
+static LCK_MTX_DECLARE(pgtrace_probelock, &pgtrace_lock_grp);
+static SIMPLE_LOCK_DECLARE(pgtrace_loglock, 0);
//--------------------------------------------
// Globals
void
pgtrace_init(void)
{
- simple_lock_init(&pgtrace.loglock, 0);
-
- pgtrace.lock_attr = lck_attr_alloc_init();
- pgtrace.lock_grp_attr = lck_grp_attr_alloc_init();
- pgtrace.lock_grp = lck_grp_alloc_init("pgtrace_lock", pgtrace.lock_grp_attr);
-
- lck_mtx_init(&pgtrace.probelock, pgtrace.lock_grp, pgtrace.lock_attr);
-
queue_init(&pgtrace.probes);
pgtrace.size = RBUF_DEFAULT_SIZE;
probe_t *p, *next;
queue_head_t *q = &pgtrace.probes;
- lck_mtx_lock(&pgtrace.probelock);
+ lck_mtx_lock(&pgtrace_probelock);
p = (probe_t *)queue_first(q);
while (!queue_end(q, (queue_entry_t)p)) {
p = next;
}
- lck_mtx_unlock(&pgtrace.probelock);
-
- return;
+ lck_mtx_unlock(&pgtrace_probelock);
}
int
p->pmap = vm_map_pmap(thread->map);
}
- lck_mtx_lock(&pgtrace.probelock);
+ lck_mtx_lock(&pgtrace_probelock);
queue_enter(q, p, probe_t *, chain);
- lck_mtx_unlock(&pgtrace.probelock);
+ lck_mtx_unlock(&pgtrace_probelock);
return 0;
}
pgtrace.enabled = 1;
- lck_mtx_lock(&pgtrace.probelock);
+ lck_mtx_lock(&pgtrace_probelock);
queue_iterate(q, p, probe_t *, chain) {
pmap_pgtrace_add_page(p->pmap, p->start, p->end);
}
- lck_mtx_unlock(&pgtrace.probelock);
-
- return;
+ lck_mtx_unlock(&pgtrace_probelock);
}
void
kprintf("%s\n", __func__);
- lck_mtx_lock(&pgtrace.probelock);
+ lck_mtx_lock(&pgtrace_probelock);
queue_iterate(q, p, probe_t *, chain) {
pmap_pgtrace_delete_page(p->pmap, p->start, p->end);
}
- lck_mtx_unlock(&pgtrace.probelock);
+ lck_mtx_unlock(&pgtrace_probelock);
pgtrace.enabled = 0;
}
pgtrace_stop();
- simple_lock(&pgtrace.loglock);
+ simple_lock(&pgtrace_loglock);
old_buf = pgtrace.logs;
old_size = pgtrace.size;
pgtrace.logs = new_buf;
pgtrace.size = new_size;
pgtrace.rdidx = pgtrace.wridx = 0;
- simple_unlock(&pgtrace.loglock);
+ simple_unlock(&pgtrace_loglock);
if (old_buf) {
kfree(old_buf, old_size * sizeof(log_t));
void
pgtrace_clear_trace(void)
{
- simple_lock(&pgtrace.loglock);
+ simple_lock(&pgtrace_loglock);
pgtrace.rdidx = pgtrace.wridx = 0;
- simple_unlock(&pgtrace.loglock);
+ simple_unlock(&pgtrace_loglock);
}
boolean_t
pgtrace.bytes += sizeof(log);
- simple_lock(&pgtrace.loglock);
+ simple_lock(&pgtrace_loglock);
pgtrace.logs[RBUF_IDX(pgtrace.wridx, pgtrace.size - 1)] = log;
thread_wakeup(pgtrace.logs);
}
- simple_unlock(&pgtrace.loglock);
-
- return;
+ simple_unlock(&pgtrace_loglock);
}
// pgtrace_read_log() is in user thread
}
ints = ml_set_interrupts_enabled(FALSE);
- simple_lock(&pgtrace.loglock);
+ simple_lock(&pgtrace_loglock);
// Wait if ring is empty
if (pgtrace.rdidx == pgtrace.wridx) {
assert_wait(pgtrace.logs, THREAD_ABORTSAFE);
- simple_unlock(&pgtrace.loglock);
+ simple_unlock(&pgtrace_loglock);
ml_set_interrupts_enabled(ints);
wr = thread_block(NULL);
}
ints = ml_set_interrupts_enabled(FALSE);
- simple_lock(&pgtrace.loglock);
+ simple_lock(&pgtrace_loglock);
}
// Trim the size
pgtrace.rdidx += total;
- simple_unlock(&pgtrace.loglock);
+ simple_unlock(&pgtrace_loglock);
ml_set_interrupts_enabled(ints);
return total * sizeof(log_t);
decoder_t *decoder;
logger_t *logger;
queue_head_t probes;
+} pgtrace;
- lck_grp_t *lock_grp;
- lck_grp_attr_t *lock_grp_attr;
- lck_attr_t *lock_attr;
- lck_mtx_t probelock;
-} pgtrace = {};
+static LCK_GRP_DECLARE(pgtrace_lock_grp, "pgtrace_lock");
+static LCK_MTX_DECLARE(pgtrace_probelock, &pgtrace_lock_grp);
//------------------------------------
// functions for pmap fault handler
return EINVAL;
}
- pgtrace.lock_attr = lck_attr_alloc_init();
- pgtrace.lock_grp_attr = lck_grp_attr_alloc_init();
- pgtrace.lock_grp = lck_grp_alloc_init("pgtrace_lock", pgtrace.lock_grp_attr);
-
- lck_mtx_init(&pgtrace.probelock, pgtrace.lock_grp, pgtrace.lock_attr);
-
queue_init(&pgtrace.probes);
pgtrace.decoder = decoder;
pgtrace.logger = logger;
p->pmap = vm_map_pmap(thread->map);
}
- lck_mtx_lock(&pgtrace.probelock);
+ lck_mtx_lock(&pgtrace_probelock);
queue_enter(q, p, probe_t *, chain);
- lck_mtx_unlock(&pgtrace.probelock);
+ lck_mtx_unlock(&pgtrace_probelock);
return 0;
}
kprintf("%s\n", __func__);
- lck_mtx_lock(&pgtrace.probelock);
+ lck_mtx_lock(&pgtrace_probelock);
p = (probe_t *)queue_first(q);
while (!queue_end(q, (queue_entry_t)p)) {
p = next;
}
- lck_mtx_unlock(&pgtrace.probelock);
-
- return;
+ lck_mtx_unlock(&pgtrace_probelock);
}
void
pgtrace.active = true;
- lck_mtx_lock(&pgtrace.probelock);
+ lck_mtx_lock(&pgtrace_probelock);
queue_iterate(q, p, probe_t *, chain) {
pmap_pgtrace_add_page(p->pmap, p->start, p->end);
}
- lck_mtx_unlock(&pgtrace.probelock);
-
- return;
+ lck_mtx_unlock(&pgtrace_probelock);
}
void
kprintf("%s\n", __func__);
- lck_mtx_lock(&pgtrace.probelock);
+ lck_mtx_lock(&pgtrace_probelock);
queue_iterate(q, p, probe_t *, chain) {
pmap_pgtrace_delete_page(p->pmap, p->start, p->end);
}
- lck_mtx_unlock(&pgtrace.probelock);
+ lck_mtx_unlock(&pgtrace_probelock);
pgtrace.active = false;
}
{MT_FUNC(munge_wws), 3, 3, {MT_W_VAL, MT_W_VAL, MT_S_VAL}},
{MT_FUNC(munge_wwwsw), 5, 5, {MT_W_VAL, MT_W_VAL, MT_W_VAL, MT_S_VAL, MT_W_VAL}},
{MT_FUNC(munge_llllll), 12, 6, {MT_L_VAL, MT_L_VAL, MT_L_VAL, MT_L_VAL, MT_L_VAL, MT_L_VAL}},
+ {MT_FUNC(munge_llll), 8, 4, {MT_L_VAL, MT_L_VAL, MT_L_VAL, MT_L_VAL}},
{MT_FUNC(munge_l), 2, 1, {MT_L_VAL}},
{MT_FUNC(munge_lw), 3, 2, {MT_L_VAL, MT_W_VAL}},
{MT_FUNC(munge_lwww), 5, 4, {MT_L_VAL, MT_W_VAL, MT_W_VAL, MT_W_VAL}},
if (config_jop_enabled) {
/* jop key */
- uint64_t apiakey_hi = __builtin_arm_rsr64(ARM64_REG_APIAKEYHI_EL1);
- uint64_t apiakey_lo = __builtin_arm_rsr64(ARM64_REG_APIAKEYLO_EL1);
+ uint64_t apiakey_hi = __builtin_arm_rsr64("APIAKEYHI_EL1");
+ uint64_t apiakey_lo = __builtin_arm_rsr64("APIAKEYLO_EL1");
T_EXPECT(apiakey_hi != 0 && apiakey_lo != 0, NULL);
}
if (config_rop_enabled) {
/* rop key */
- uint64_t apibkey_hi = __builtin_arm_rsr64(ARM64_REG_APIBKEYHI_EL1);
- uint64_t apibkey_lo = __builtin_arm_rsr64(ARM64_REG_APIBKEYLO_EL1);
+ uint64_t apibkey_hi = __builtin_arm_rsr64("APIBKEYHI_EL1");
+ uint64_t apibkey_lo = __builtin_arm_rsr64("APIBKEYLO_EL1");
T_EXPECT(apibkey_hi != 0 && apibkey_lo != 0, NULL);
thread_block(THREAD_CONTINUE_NULL);
T_LOG("Running SPR lock test on cpu %d\n", p->cpu_id);
- uint64_t orig_value = __builtin_arm_rsr64(STR(ARM64_REG_HID8));
+ uint64_t orig_value = __builtin_arm_rsr64(STR(S3_0_C15_C8_0));
spr_lock_test_addr = (vm_offset_t)VM_KERNEL_STRIP_PTR(arm64_msr_lock_test);
spr_lock_exception_esr = 0;
arm64_msr_lock_test(~orig_value);
T_EXPECT(spr_lock_exception_esr != 0, "MSR write generated synchronous abort");
- uint64_t new_value = __builtin_arm_rsr64(STR(ARM64_REG_HID8));
+ uint64_t new_value = __builtin_arm_rsr64(STR(S3_0_C15_C8_0));
T_EXPECT(orig_value == new_value, "MSR write did not succeed");
spr_lock_test_addr = 0;
.align 2
.globl EXT(arm64_msr_lock_test)
LEXT(arm64_msr_lock_test)
- msr ARM64_REG_HID8, x0
+ msr S3_0_C15_C8_0, x0
ret
#endif
#define ARM_PTE_NX 0x0040000000000000ULL /* value for no execute bit */
#define ARM_PTE_NXMASK 0x0040000000000000ULL /* no execute mask */
+#define ARM_PTE_XMASK (ARM_PTE_PNXMASK | ARM_PTE_NXMASK)
+
#define ARM_PTE_WIRED 0x0400000000000000ULL /* value for software wired bit */
#define ARM_PTE_WIRED_MASK 0x0400000000000000ULL /* software wired mask */
1:
.endmacro
+/*
+ * Wedges CPUs with a specified core that are below a specified revision. This
+ * macro is intended for CPUs that have been deprecated in iBoot and may have
+ * incorrect behavior if they continue running xnu.
+ */
+.macro DEPRECATE_COREEQ_REVLO core, rev, midr_el1, scratch
+EXEC_COREEQ_REVLO \core, \rev, \midr_el1, \scratch
+/* BEGIN IGNORE CODESTYLE */
+b .
+/* END IGNORE CODESTYLE */
+EXEC_END
+.endmacro
+
/*
* Sets bits in an SPR register.
* arg0: Name of the register to be accessed.
+
#ifndef __arm64__
#error Should only be compiling for arm64.
#endif
void sleh_synchronous_sp1(arm_context_t *, uint32_t, vm_offset_t) __abortlike;
void sleh_synchronous(arm_context_t *, uint32_t, vm_offset_t);
+
+
+
void sleh_irq(arm_saved_state_t *);
void sleh_fiq(arm_saved_state_t *);
void sleh_serror(arm_context_t *context, uint32_t esr, vm_offset_t far);
#if defined(NO_ECORE)
uint64_t l2c_err_sts, l2c_err_adr, l2c_err_inf;
- mmu_err_sts = __builtin_arm_rsr64(STR(ARM64_REG_MMU_ERR_STS));
- l2c_err_sts = __builtin_arm_rsr64(STR(ARM64_REG_L2C_ERR_STS));
- l2c_err_adr = __builtin_arm_rsr64(STR(ARM64_REG_L2C_ERR_ADR));
- l2c_err_inf = __builtin_arm_rsr64(STR(ARM64_REG_L2C_ERR_INF));
- lsu_err_sts = __builtin_arm_rsr64(STR(ARM64_REG_LSU_ERR_STS));
- fed_err_sts = __builtin_arm_rsr64(STR(ARM64_REG_FED_ERR_STS));
+ mmu_err_sts = __builtin_arm_rsr64(STR(S3_6_C15_C0_0));
+ l2c_err_sts = __builtin_arm_rsr64(STR(S3_3_C15_C8_0));
+ l2c_err_adr = __builtin_arm_rsr64(STR(S3_3_C15_C9_0));
+ l2c_err_inf = __builtin_arm_rsr64(STR(S3_3_C15_C10_0));
+ lsu_err_sts = __builtin_arm_rsr64(STR(S3_3_C15_C0_0));
+ fed_err_sts = __builtin_arm_rsr64(STR(S3_4_C15_C0_0));
panic_plain("Unhandled " CPU_NAME
" implementation specific error. state=%p esr=%#x far=%p\n"
uint64_t l2c_err_sts, l2c_err_adr, l2c_err_inf, mpidr, migsts;
mpidr = __builtin_arm_rsr64("MPIDR_EL1");
- migsts = __builtin_arm_rsr64(STR(ARM64_REG_MIGSTS_EL1));
- mmu_err_sts = __builtin_arm_rsr64(STR(ARM64_REG_MMU_ERR_STS));
- l2c_err_sts = __builtin_arm_rsr64(STR(ARM64_REG_L2C_ERR_STS));
- l2c_err_adr = __builtin_arm_rsr64(STR(ARM64_REG_L2C_ERR_ADR));
- l2c_err_inf = __builtin_arm_rsr64(STR(ARM64_REG_L2C_ERR_INF));
- lsu_err_sts = __builtin_arm_rsr64(STR(ARM64_REG_LSU_ERR_STS));
- fed_err_sts = __builtin_arm_rsr64(STR(ARM64_REG_FED_ERR_STS));
+ migsts = __builtin_arm_rsr64(STR(MIGSTS_EL1));
+ mmu_err_sts = __builtin_arm_rsr64(STR(S3_6_C15_C0_0));
+ l2c_err_sts = __builtin_arm_rsr64(STR(S3_3_C15_C8_0));
+ l2c_err_adr = __builtin_arm_rsr64(STR(S3_3_C15_C9_0));
+ l2c_err_inf = __builtin_arm_rsr64(STR(S3_3_C15_C10_0));
+ lsu_err_sts = __builtin_arm_rsr64(STR(S3_3_C15_C0_0));
+ fed_err_sts = __builtin_arm_rsr64(STR(S3_4_C15_C0_0));
panic_plain("Unhandled " CPU_NAME
" implementation specific error. state=%p esr=%#x far=%p p-core?%d migsts=%p\n"
#else // !defined(NO_ECORE) && !defined(HAS_MIGSTS)
uint64_t llc_err_sts, llc_err_adr, llc_err_inf, mpidr;
#if defined(HAS_DPC_ERR)
- uint64_t dpc_err_sts = __builtin_arm_rsr64(STR(ARM64_REG_DPC_ERR_STS));
+ uint64_t dpc_err_sts = __builtin_arm_rsr64(STR(S3_5_C15_C0_5));
#endif // defined(HAS_DPC_ERR)
mpidr = __builtin_arm_rsr64("MPIDR_EL1");
if (mpidr & MPIDR_PNE) {
- mmu_err_sts = __builtin_arm_rsr64(STR(ARM64_REG_MMU_ERR_STS));
- lsu_err_sts = __builtin_arm_rsr64(STR(ARM64_REG_LSU_ERR_STS));
- fed_err_sts = __builtin_arm_rsr64(STR(ARM64_REG_FED_ERR_STS));
+ mmu_err_sts = __builtin_arm_rsr64(STR(S3_6_C15_C0_0));
+ lsu_err_sts = __builtin_arm_rsr64(STR(S3_3_C15_C0_0));
+ fed_err_sts = __builtin_arm_rsr64(STR(S3_4_C15_C0_0));
} else {
- mmu_err_sts = __builtin_arm_rsr64(STR(ARM64_REG_E_MMU_ERR_STS));
- lsu_err_sts = __builtin_arm_rsr64(STR(ARM64_REG_E_LSU_ERR_STS));
- fed_err_sts = __builtin_arm_rsr64(STR(ARM64_REG_E_FED_ERR_STS));
+ mmu_err_sts = __builtin_arm_rsr64(STR(S3_6_C15_C2_0));
+ lsu_err_sts = __builtin_arm_rsr64(STR(S3_3_C15_C2_0));
+ fed_err_sts = __builtin_arm_rsr64(STR(S3_4_C15_C0_2));
}
- llc_err_sts = __builtin_arm_rsr64(STR(ARM64_REG_L2C_ERR_STS));
- llc_err_adr = __builtin_arm_rsr64(STR(ARM64_REG_L2C_ERR_ADR));
- llc_err_inf = __builtin_arm_rsr64(STR(ARM64_REG_L2C_ERR_INF));
+ llc_err_sts = __builtin_arm_rsr64(STR(S3_3_C15_C8_0));
+ llc_err_adr = __builtin_arm_rsr64(STR(S3_3_C15_C9_0));
+ llc_err_inf = __builtin_arm_rsr64(STR(S3_3_C15_C10_0));
panic_plain("Unhandled " CPU_NAME
" implementation specific error. state=%p esr=%#x far=%p p-core?%d"
static inline void
task_vtimer_check(thread_t thread)
{
- if (__improbable(thread->task->vtimers)) {
+ if (__improbable((thread->task != NULL) && thread->task->vtimers)) {
thread->ast |= AST_BSD;
thread->machine.CpuDatap->cpu_pending_ast |= AST_BSD;
}
*/
DebuggerCall(exception, state);
+ current_thread()->machine.kpcb = NULL;
(void) ml_set_interrupts_enabled(interrupt_state);
return;
} else {
thread->iotier_override = THROTTLE_LEVEL_NONE; /* Reset IO tier override before handling abort from userspace */
if (is_vm_fault(fault_code)) {
- kern_return_t result = KERN_FAILURE;
vm_map_t map = thread->map;
vm_offset_t vm_fault_addr = fault_addr;
+ kern_return_t result = KERN_FAILURE;
assert(map != kernel_map);
}
}
#endif
-
/* check to see if it is just a pmap ref/modify fault */
- if ((result != KERN_SUCCESS) && !is_translation_fault(fault_code)) {
+ if (!is_translation_fault(fault_code)) {
result = arm_fast_fault(map->pmap,
vm_fault_addr,
fault_type, (fault_code == FSC_ACCESS_FLAG_FAULT_L3), TRUE);
}
- if (result != KERN_SUCCESS) {
- {
- /* We have to fault the page in */
- result = vm_fault(map, vm_fault_addr, fault_type,
- /* change_wiring */ FALSE, VM_KERN_MEMORY_NONE, THREAD_ABORTSAFE,
- /* caller_pmap */ NULL, /* caller_pmap_addr */ 0);
- }
+ if (result == KERN_SUCCESS) {
+ return;
+ }
+
+ {
+ /* We have to fault the page in */
+ result = vm_fault(map, vm_fault_addr, fault_type,
+ /* change_wiring */ FALSE, VM_KERN_MEMORY_NONE, THREAD_ABORTSAFE,
+ /* caller_pmap */ NULL, /* caller_pmap_addr */ 0);
}
if (result == KERN_SUCCESS || result == KERN_ABORTED) {
return;
interruptible = THREAD_UNINT;
} else {
map = thread->map;
- interruptible = THREAD_ABORTSAFE;
+
+ /**
+ * In the case that the recovery handler is set (e.g., during copyio
+ * and dtrace probes), we don't want the vm_fault() operation to be
+ * aborted early. Those code paths can't handle restarting the
+ * vm_fault() operation so don't allow it to return early without
+ * creating the wanted mapping.
+ */
+ interruptible = (recover) ? THREAD_UNINT : THREAD_ABORTSAFE;
}
#if CONFIG_PGTRACE
mach_kauth_cred_uthread_update();
if (trap_no < 0) {
- if (trap_no == MACH_ARM_TRAP_ABSTIME) {
+ switch (trap_no) {
+ case MACH_ARM_TRAP_ABSTIME:
handle_mach_absolute_time_trap(state);
return;
- } else if (trap_no == MACH_ARM_TRAP_CONTTIME) {
+ case MACH_ARM_TRAP_CONTTIME:
handle_mach_continuous_time_trap(state);
return;
}
saved_state64(state)->x[0] = now;
}
+
__attribute__((noreturn))
static void
handle_msr_trap(arm_saved_state_t *state, uint32_t esr)
uint64_t ipi_sr = 0;
if (gFastIPI) {
- MRS(ipi_sr, ARM64_REG_IPI_SR);
+ MRS(ipi_sr, "S3_5_C15_C1_1");
if (ipi_sr & 1) {
is_ipi = TRUE;
sleh_interrupt_handler_prologue(state, type);
+
#if defined(HAS_IPI)
if (is_ipi) {
/*
* IPI to this CPU may be lost. ISB is required to ensure the msr
* is retired before execution of cpu_signal_handler().
*/
- MSR(ARM64_REG_IPI_SR, ipi_sr);
+ MSR("S3_5_C15_C1_1", ipi_sr);
__builtin_arm_isb(ISB_SY);
cpu_signal_handler();
} else
INTERRUPT_MASKED_DEBUG_END();
}
+
sleh_interrupt_handler_epilogue();
#if MACH_ASSERT
if (preemption_level != get_preemption_level()) {
--- /dev/null
+/*
+ * Copyright (c) 2020 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _ARM64_SMCCC_ASM_H_
+#define _ARM64_SMCCC_ASM_H_
+
+#ifndef __ASSEMBLER__
+#error "This header should only be used in .s files"
+#endif
+
+/*
+ * SAVE_SMCCC_CLOBBERED_REGISTERS
+ *
+ * Saves x0-x3 to stack in preparation for an hvc/smc call.
+ */
+
+.macro SAVE_SMCCC_CLOBBERED_REGISTERS
+stp x0, x1, [sp, #- 16]!
+stp x2, x3, [sp, #- 16]!
+.endmacro
+
+/*
+ * LOAD_SMCCC_CLOBBERED_REGISTERS
+ *
+ * Loads x0-x3 from stack after an hvc/smc call.
+ */
+
+.macro LOAD_SMCCC_CLOBBERED_REGISTERS
+ldp x2, x3, [sp], #16
+ldp x0, x1, [sp], #16
+.endmacro
+
+#endif /* _ARM64_SMCCC_ASM_H_ */
+
+/* vim: set ts=4 ft=asm: */
#endif
-#if defined(KERNEL_INTEGRITY_KTRR)
- /*
- * Set KTRR registers immediately after wake/resume
- *
- * During power on reset, XNU stashed the kernel text region range values
- * into __DATA,__const which should be protected by AMCC RoRgn at this point.
- * Read this data and program/lock KTRR registers accordingly.
- * If either values are zero, we're debugging kernel so skip programming KTRR.
- */
-
- /* refuse to boot if machine_lockdown() hasn't completed */
- adrp x17, EXT(lockdown_done)@page
- ldr w17, [x17, EXT(lockdown_done)@pageoff]
- cbz w17, .
-
- // load stashed rorgn_begin
- adrp x17, EXT(ctrr_begin)@page
- add x17, x17, EXT(ctrr_begin)@pageoff
- ldr x17, [x17]
-#if DEBUG || DEVELOPMENT || CONFIG_DTRACE
- // if rorgn_begin is zero, we're debugging. skip enabling ktrr
- cbz x17, Lskip_ktrr
-#else
- cbz x17, .
-#endif
-
- // load stashed rorgn_end
- adrp x19, EXT(ctrr_end)@page
- add x19, x19, EXT(ctrr_end)@pageoff
- ldr x19, [x19]
-#if DEBUG || DEVELOPMENT || CONFIG_DTRACE
- cbz x19, Lskip_ktrr
-#else
- cbz x19, .
-#endif
-
- msr ARM64_REG_KTRR_LOWER_EL1, x17
- msr ARM64_REG_KTRR_UPPER_EL1, x19
- mov x17, #1
- msr ARM64_REG_KTRR_LOCK_EL1, x17
-Lskip_ktrr:
-#endif /* defined(KERNEL_INTEGRITY_KTRR) */
// Process reset handlers
adrp x19, EXT(ResetHandlerData)@page // Get address of the reset handler data
b.eq Lskip_cpu_reset_handler // Not found
b Lcheck_cpu_data_entry // loop
Lfound_cpu_data_entry:
-#if defined(KERNEL_INTEGRITY_CTRR)
- /*
- * Program and lock CTRR if this CPU is non-boot cluster master. boot cluster will be locked
- * in machine_lockdown. pinst insns protected by VMSA_LOCK
- * A_PXN and A_MMUON_WRPROTECT options provides something close to KTRR behavior
- */
-
- /* refuse to boot if machine_lockdown() hasn't completed */
- adrp x17, EXT(lockdown_done)@page
- ldr w17, [x17, EXT(lockdown_done)@pageoff]
- cbz w17, .
-
- // load stashed rorgn_begin
- adrp x17, EXT(ctrr_begin)@page
- add x17, x17, EXT(ctrr_begin)@pageoff
- ldr x17, [x17]
-#if DEBUG || DEVELOPMENT || CONFIG_DTRACE
- // if rorgn_begin is zero, we're debugging. skip enabling ctrr
- cbz x17, Lskip_ctrr
-#else
- cbz x17, .
-#endif
-
- // load stashed rorgn_end
- adrp x19, EXT(ctrr_end)@page
- add x19, x19, EXT(ctrr_end)@pageoff
- ldr x19, [x19]
-#if DEBUG || DEVELOPMENT || CONFIG_DTRACE
- cbz x19, Lskip_ctrr
-#else
- cbz x19, .
-#endif
-
- mrs x18, ARM64_REG_CTRR_LOCK_EL1
- cbnz x18, Lskip_ctrr /* don't touch if already locked */
- msr ARM64_REG_CTRR_A_LWR_EL1, x17
- msr ARM64_REG_CTRR_A_UPR_EL1, x19
- mov x18, #(CTRR_CTL_EL1_A_PXN | CTRR_CTL_EL1_A_MMUON_WRPROTECT)
- msr ARM64_REG_CTRR_CTL_EL1, x18
- mov x18, #1
- msr ARM64_REG_CTRR_LOCK_EL1, x18
-
-
- isb
- tlbi vmalle1
- dsb ish
- isb
-Lspin_ctrr_unlocked:
- /* we shouldn't ever be here as cpu start is serialized by cluster in cpu_start(),
- * and first core started in cluster is designated cluster master and locks
- * both core and cluster. subsequent cores in same cluster will run locked from
- * from reset vector */
- mrs x18, ARM64_REG_CTRR_LOCK_EL1
- cbz x18, Lspin_ctrr_unlocked
-Lskip_ctrr:
-#endif
adrp x20, EXT(const_boot_args)@page
add x20, x20, EXT(const_boot_args)@pageoff
ldr x0, [x21, CPU_RESET_HANDLER] // Call CPU reset handler
#if defined(APPLEHURRICANE)
// <rdar://problem/26726624> Increase Snoop reservation in EDB to reduce starvation risk
// Needs to be done before MMU is enabled
- HID_INSERT_BITS ARM64_REG_HID5, ARM64_REG_HID5_CrdEdbSnpRsvd_mask, ARM64_REG_HID5_CrdEdbSnpRsvd_VALUE, x12
+ HID_INSERT_BITS HID5, ARM64_REG_HID5_CrdEdbSnpRsvd_mask, ARM64_REG_HID5_CrdEdbSnpRsvd_VALUE, x12
#endif
#if defined(BCM2837)
#if defined(APPLE_ARM64_ARCH_FAMILY)
- // Initialization common to all Apple targets
+ // Initialization common to all non-virtual Apple targets
ARM64_IS_PCORE x15
- ARM64_READ_EP_SPR x15, x12, ARM64_REG_EHID4, ARM64_REG_HID4
+ ARM64_READ_EP_SPR x15, x12, S3_0_C15_C4_1, S3_0_C15_C4_0
orr x12, x12, ARM64_REG_HID4_DisDcMVAOps
orr x12, x12, ARM64_REG_HID4_DisDcSWL2Ops
- ARM64_WRITE_EP_SPR x15, x12, ARM64_REG_EHID4, ARM64_REG_HID4
+ ARM64_WRITE_EP_SPR x15, x12, S3_0_C15_C4_1, S3_0_C15_C4_0
#endif // APPLE_ARM64_ARCH_FAMILY
// Read MIDR before start of per-SoC tunables
mrs x12, MIDR_EL1
-#if defined(APPLELIGHTNING)
- // Cebu <B0 is deprecated and unsupported (see rdar://problem/42835678)
- EXEC_COREEQ_REVLO MIDR_CEBU_LIGHTNING, CPU_VERSION_B0, x12, x13
- b .
- EXEC_END
- EXEC_COREEQ_REVLO MIDR_CEBU_THUNDER, CPU_VERSION_B0, x12, x13
- b .
- EXEC_END
-#endif
-
APPLY_TUNABLES x12, x13
#if HAS_CLUSTER
// Unmask external IRQs if we're restarting from non-retention WFI
- mrs x9, ARM64_REG_CYC_OVRD
+ mrs x9, CPU_OVRD
and x9, x9, #(~(ARM64_REG_CYC_OVRD_irq_mask | ARM64_REG_CYC_OVRD_fiq_mask))
- msr ARM64_REG_CYC_OVRD, x9
+ msr CPU_OVRD, x9
#endif
// If x21 != 0, we're doing a warm reset, so we need to trampoline to the kernel pmap.
+++ /dev/null
-/*
- * Copyright (c) 2019 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- *
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- *
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-#include <pexpert/arm64/board_config.h>
-
-#if defined(APPLETYPHOON)
-#include "tunables_h7.s"
-#elif defined(APPLETWISTER)
-#include "tunables_h8.s"
-#elif defined(APPLEHURRICANE)
-#include "tunables_h9.s"
-#elif defined(APPLEMONSOON)
-#include "tunables_h10.s"
-#elif defined(APPLEVORTEX)
-#include "tunables_h11.s"
-#elif defined(APPLELIGHTNING)
-#include "tunables_h12.s"
-#elif defined(APPLEFIRESTORM)
-#include "tunables_h13.s"
-#else
-.macro APPLY_TUNABLES
-.endmacro
-#endif
+++ /dev/null
-/*
- * Copyright (c) 2019 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- *
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- *
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-.macro APPLY_TUNABLES
- /***** Tunables that apply to all cores, all revisions *****/
-
- // <rdar://problem/28512310> SW WAR/eval: WKdm write ack lost when bif_wke_colorWrAck_XXaH asserts concurrently for both colors
- HID_SET_BITS ARM64_REG_HID8, ARM64_REG_HID8_WkeForceStrictOrder, $1
-
- /***** Tunables that apply to all P cores, all revisions *****/
- /* N/A */
-
- /***** Tunables that apply to all E cores, all revisions *****/
- /* N/A */
-
- /***** Tunables that apply to specific cores, all revisions *****/
- EXEC_COREEQ_REVALL MIDR_SKYE_MISTRAL, $0, $1
- // <rdar://problem/30423928>: Atomic launch eligibility is erroneously taken away when a store at SMB gets invalidated
- HID_CLEAR_BITS ARM64_REG_EHID11, ARM64_REG_EHID11_SmbDrainThresh_mask, $1
- EXEC_END
-
- /***** Tunables that apply to specific cores and revisions *****/
- EXEC_COREEQ_REVLO MIDR_SKYE_MISTRAL, CPU_VERSION_B0, $0, $1
-
- // Disable downstream fill bypass logic
- // <rdar://problem/28545159> [Tunable] Skye - L2E fill bypass collision from both pipes to ecore
- HID_SET_BITS ARM64_REG_EHID5, ARM64_REG_EHID5_DisFillByp, $1
-
- // Disable forwarding of return addresses to the NFP
- // <rdar://problem/30387067> Skye: FED incorrectly taking illegal va exception
- HID_SET_BITS ARM64_REG_EHID0, ARM64_REG_EHID0_nfpRetFwdDisb, $1
-
- EXEC_END
-
- EXEC_COREALL_REVLO CPU_VERSION_B0, $0, $1
-
- // Disable clock divider gating
- // <rdar://problem/30854420> [Tunable/Errata][cpu_1p_1e] [CPGV2] ACC power down issue when link FSM switches from GO_DN to CANCEL and at the same time upStreamDrain request is set.
- HID_SET_BITS ARM64_REG_HID6, ARM64_REG_HID6_DisClkDivGating, $1
-
- // Disable clock dithering
- // <rdar://problem/29022199> [Tunable] Skye A0: Linux: LLC PIO Errors
- HID_SET_BITS ARM64_REG_ACC_OVRD, ARM64_REG_ACC_OVRD_dsblClkDtr, $1
- HID_SET_BITS ARM64_REG_ACC_EBLK_OVRD, ARM64_REG_ACC_OVRD_dsblClkDtr, $1
-
- EXEC_END
-
- EXEC_COREALL_REVHS CPU_VERSION_B0, $0, $1
- // <rdar://problem/32512836>: Disable refcount syncing between E and P
- HID_INSERT_BITS ARM64_REG_CYC_OVRD, ARM64_REG_CYC_OVRD_dsblSnoopTime_mask, ARM64_REG_CYC_OVRD_dsblSnoopPTime, $1
- EXEC_END
-.endmacro
\ No newline at end of file
+++ /dev/null
-/*
- * Copyright (c) 2019 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- *
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- *
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-.macro APPLY_TUNABLES
- /***** Tunables that apply to all cores, all revisions *****/
- /* N/A */
-
- /***** Tunables that apply to all P cores, all revisions *****/
- EXEC_PCORE_REVALL $0, $1
- // rdar://problem/34435356: segfaults due to IEX clock-gating
- HID_SET_BITS ARM64_REG_HID1, ARM64_REG_HID1_rccForceAllIexL3ClksOn, $1
-
- // Prevent ordered loads from being dispatched from LSU until all prior loads have completed.
- // rdar://problem/34095873: AF2 ordering rules allow ARM device ordering violations
- HID_SET_BITS ARM64_REG_HID4, ARM64_REG_HID4_ForceNsOrdLdReqNoOlderLd, $1
-
- // rdar://problem/38482968: [Cyprus Tunable] Poisoned cache line crossing younger load is not redirected by older load-barrier
- HID_SET_BITS ARM64_REG_HID3, ARM64_REG_HID3_DisColorOpt, $1
-
- // rdar://problem/41056604: disable faster launches of uncacheable unaligned stores to workaround load/load ordering violation
- HID_SET_BITS ARM64_REG_HID11, ARM64_REG_HID11_DisX64NTLnchOpt, $1
-
- EXEC_END
-
- /***** Tunables that apply to all E cores, all revisions *****/
- EXEC_ECORE_REVALL $0, $1
- // Prevent ordered loads from being dispatched from LSU until all prior loads have completed.
- // rdar://problem/34095873: AF2 ordering rules allow ARM device ordering violations
- HID_SET_BITS ARM64_REG_EHID4, ARM64_REG_HID4_ForceNsOrdLdReqNoOlderLd, $1
-
- // rdar://problem/36595004: Poisoned younger load is not redirected by older load-acquire
- HID_SET_BITS ARM64_REG_EHID3, ARM64_REG_EHID3_DisColorOpt, $1
-
- // rdar://problem/37949166: Disable the extension of prefetcher training pipe clock gating, revert to default gating
- HID_SET_BITS ARM64_REG_EHID10, ARM64_REG_EHID10_rccDisPwrSavePrfClkOff, $1
-
- EXEC_END
-
- /***** Tunables that apply to specific cores, all revisions *****/
- // Should be applied to all Aruba variants, but only Cyprus variants B0 and later
- EXEC_COREEQ_REVALL MIDR_ARUBA_VORTEX, $0, $1
- // rdar://problem/36716477: data corruption due to incorrect branch predictor resolution
- HID_SET_BITS ARM64_REG_HID1, ARM64_REG_HID1_enaBrKillLimit, $1
- EXEC_END
-
- /***** Tunables that apply to specific cores and revisions *****/
- EXEC_COREEQ_REVHS MIDR_CYPRUS_VORTEX, CPU_VERSION_A1, $0, $1
- // rdar://problem/36716477: data corruption due to incorrect branch predictor resolution
- HID_SET_BITS ARM64_REG_HID1, ARM64_REG_HID1_enaBrKillLimit, $1
- EXEC_END
-
- EXEC_COREEQ_REVEQ MIDR_ARUBA_VORTEX, CPU_VERSION_A1, $0, $1
- // rdar://problem/40695685: Enable BIF fill buffer stall logic to prevent skid buffer overflow (Aruba A1 only)
- HID_SET_BITS ARM64_REG_HID5, ARM64_REG_HID5_EnableDnFIFORdStall, $1
- EXEC_END
-.endmacro
\ No newline at end of file
+++ /dev/null
-/*
- * Copyright (c) 2019 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- *
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- *
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-.macro APPLY_TUNABLES
- /***** Tunables that apply to all cores, all revisions *****/
- /* N/A */
-
- /***** Tunables that apply to all P cores, all revisions *****/
- /* N/A */
-
- /***** Tunables that apply to all E cores, all revisions *****/
- /* N/A */
-
- /***** Tunables that apply to specific cores, all revisions *****/
- EXEC_COREEQ_REVALL MIDR_CEBU_LIGHTNING, $0, $1
- // rdar://53907283 ([Cebu ACC Errata] Sibling Merge in LLC can cause UC load to violate ARM Memory Ordering Rules.)
- HID_SET_BITS ARM64_REG_HID5, ARM64_REG_HID5_DisFill2cMerge, $1
-
- // rdar://problem/54615539: [Cebu ACC Tunable]Cross-beat Crypto(AES/PMUL) ICache fusion is not disabled for branch uncondtional recoded instruction.
- HID_SET_BITS ARM64_REG_HID0, ARM64_REG_HID0_CacheFusionDisable, $1
-
- // rdar://problem/50664291: [Cebu B0/B1 Tunables][PerfVerif][LSU] Post-silicon tuning of STNT widget contiguous counter threshold
- HID_INSERT_BITS ARM64_REG_HID4, ARM64_REG_HID4_CnfCntrThresh_mask, ARM64_REG_HID4_CnfCntrThresh_VALUE, $1
-
- // rdar://problem/47744434: Barrier Load Ordering property is not satisfied for x64-loads
- HID_SET_BITS ARM64_REG_HID9, ARM64_REG_HID9_EnableFixBug47221499, $1
-
- // rdar://problem/50664291: [Cebu B0/B1 Tunables][PerfVerif][LSU] Post-silicon tuning of STNT widget contiguous counter threshold
- HID_SET_BITS ARM64_REG_HID9, ARM64_REG_HID9_DisSTNTWidgetForUnalign, $1
-
- // rdar://problem/47865629: RF bank and Multipass conflict forward progress widget does not handle 3+ cycle livelock
- HID_SET_BITS ARM64_REG_HID16, ARM64_REG_HID16_EnRs4Sec, $1
- HID_CLEAR_BITS ARM64_REG_HID16, ARM64_REG_HID16_DisxPickRs45, $1
- HID_SET_BITS ARM64_REG_HID16, ARM64_REG_HID16_EnMPxPick45, $1
- HID_SET_BITS ARM64_REG_HID16, ARM64_REG_HID16_EnMPCyc7, $1
-
- // Prevent ordered loads from being dispatched from LSU until all prior loads have completed.
- // rdar://problem/34095873: AF2 ordering rules allow ARM device ordering violations
- HID_SET_BITS ARM64_REG_HID4, ARM64_REG_HID4_ForceNsOrdLdReqNoOlderLd, $1
-
- // rdar://problem/51690962: Disable Store-Non-Temporal downgrade widget
- HID_SET_BITS ARM64_REG_HID4, ARM64_REG_HID4_DisSTNTWidget, $1
-
- // rdar://problem/41056604: disable faster launches of uncacheable unaligned stores to workaround load/load ordering violation
- HID_SET_BITS ARM64_REG_HID11, ARM64_REG_HID11_DisX64NTLnchOpt, $1
-
- // rdar://problem/45024523: enable aggressive LEQ throttling to work around LEQ credit leak
- HID_SET_BITS ARM64_REG_HID16, ARM64_REG_HID16_leqThrottleAggr, $1
-
- // rdar://problem/41029832: configure dummy cycles to work around incorrect temp sensor readings on NEX power gating
- HID_INSERT_BITS ARM64_REG_HID13, ARM64_REG_HID13_PreCyc_mask, ARM64_REG_HID13_PreCyc_VALUE, $1
- EXEC_END
-
- EXEC_COREEQ_REVALL MIDR_CEBU_THUNDER, $0, $1
- // rdar://53907283 ([Cebu ACC Errata] Sibling Merge in LLC can cause UC load to violate ARM Memory Ordering Rules.)
- HID_SET_BITS ARM64_REG_HID5, ARM64_REG_HID5_DisFill2cMerge, $1
-
- // rdar://problem/48476033: Prevent store-to-load forwarding for UC memory to avoid barrier ordering violation
- HID_SET_BITS ARM64_REG_EHID10, ARM64_REG_EHID10_ForceWStDrainUc, $1
-
- // Prevent ordered loads from being dispatched from LSU until all prior loads have completed.
- // rdar://problem/34095873: AF2 ordering rules allow ARM device ordering violations
- HID_SET_BITS ARM64_REG_EHID4, ARM64_REG_HID4_ForceNsOrdLdReqNoOlderLd, $1
-
- // rdar://problem/37949166: Disable the extension of prefetcher training pipe clock gating, revert to default gating
- HID_SET_BITS ARM64_REG_EHID10, ARM64_REG_EHID10_rccDisPwrSavePrfClkOff, $1
- EXEC_END
-
- EXEC_COREEQ_REVALL MIDR_TURKS, $0, $1
- // rdar://problem/53506680: [MP_CHECKER] Load STLFs from a completed UC/NC/NT store causing barrier ordering violation
- HID_SET_BITS ARM64_REG_EHID10, ARM64_REG_EHID10_ForceWStDrainUc, $1
- EXEC_END
-
- /***** Tunables that apply to specific cores and revisions *****/
- /* N/A */
-.endmacro
\ No newline at end of file
+++ /dev/null
-/*
- * Copyright (c) 2019 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- *
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- *
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-.macro APPLY_TUNABLES
-.endmacro
+++ /dev/null
-/*
- * Copyright (c) 2019 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- *
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- *
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-.macro APPLY_TUNABLES
- /***** Tunables that apply to all cores, all revisions *****/
-
- // Disable LSP flush with context switch to work around bug in LSP
- // that can cause Typhoon to wedge when CONTEXTIDR is written.
- // <rdar://problem/12387704>
- HID_SET_BITS ARM64_REG_HID0, ARM64_REG_HID0_LoopBuffDisb, $1
- HID_SET_BITS ARM64_REG_HID1, ARM64_REG_HID1_rccDisStallInactiveIexCtl, $1
- HID_SET_BITS ARM64_REG_HID3, ARM64_REG_HID3_DisXmonSnpEvictTriggerL2StarvationMode, $1
- HID_CLEAR_BITS ARM64_REG_HID5, (ARM64_REG_HID5_DisHwpLd | ARM64_REG_HID5_DisHwpSt), $1
-
- // Change the default memcache data set ID from 0 to 15 for all agents
- HID_SET_BITS ARM64_REG_HID8, (ARM64_REG_HID8_DataSetID0_VALUE | ARM64_REG_HID8_DataSetID1_VALUE), $1
-
- /***** Tunables that apply to all P cores, all revisions *****/
- /* N/A */
-
- /***** Tunables that apply to all E cores, all revisions *****/
- /* N/A */
-
- /***** Tunables that apply to specific cores, all revisions *****/
- EXEC_COREEQ_REVALL MIDR_CAPRI, $0, $1
- HID_SET_BITS ARM64_REG_HID8, ARM64_REG_HID8_DataSetID2_VALUE, $1
- EXEC_END
-
- /***** Tunables that apply to specific cores and revisions *****/
- /* N/A */
-
- isb sy
-.endmacro
\ No newline at end of file
+++ /dev/null
-/*
- * Copyright (c) 2019 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- *
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- *
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-.macro APPLY_TUNABLES
- /***** Tunables that apply to all cores, all revisions *****/
- HID_CLEAR_BITS ARM64_REG_HID11, ARM64_REG_HID11_DisFillC1BubOpt, $1
-
- // Change the default memcache data set ID from 0 to 15 for all agents
- HID_SET_BITS ARM64_REG_HID8, (ARM64_REG_HID8_DataSetID0_VALUE | ARM64_REG_HID8_DataSetID1_VALUE), $1
- HID_SET_BITS ARM64_REG_HID8, (ARM64_REG_HID8_DataSetID2_VALUE | ARM64_REG_HID8_DataSetID3_VALUE), $1
-
- // Use 4-cycle MUL latency to avoid denormal stalls
- HID_SET_BITS ARM64_REG_HID7, ARM64_REG_HID7_disNexFastFmul, $1
-
- // disable reporting of TLB-multi-hit-error
- // <rdar://problem/22163216>
- HID_CLEAR_BITS ARM64_REG_LSU_ERR_STS, ARM64_REG_LSU_ERR_STS_L1DTlbMultiHitEN, $1
-
- /***** Tunables that apply to all P cores, all revisions *****/
- /* N/A */
-
- /***** Tunables that apply to all E cores, all revisions *****/
- /* N/A */
-
- /***** Tunables that apply to specific cores, all revisions *****/
- /* N/A */
-
- /***** Tunables that apply to specific cores and revisions *****/
-
- // rdar://problem/36112905: Set CYC_CFG:skipInit to pull in isAlive by one DCLK
- // to work around potential hang. Must only be applied to Maui C0.
- EXEC_COREEQ_REVEQ MIDR_MAUI, CPU_VERSION_C0, $0, $1
- HID_SET_BITS ARM64_REG_CYC_CFG, ARM64_REG_CYC_CFG_skipInit, $1
- EXEC_END
- isb sy
-.endmacro
\ No newline at end of file
+++ /dev/null
-/*
- * Copyright (c) 2019 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- *
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- *
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-.macro APPLY_TUNABLES
- /***** Tunables that apply to all cores, all revisions *****/
-
- // IC prefetch configuration
- // <rdar://problem/23019425>
- HID_INSERT_BITS ARM64_REG_HID0, ARM64_REG_HID0_ICPrefDepth_bmsk, ARM64_REG_HID0_ICPrefDepth_VALUE, $1
- HID_SET_BITS ARM64_REG_HID0, ARM64_REG_HID0_ICPrefLimitOneBrn, $1
-
- // disable reporting of TLB-multi-hit-error
- // <rdar://problem/22163216>
- HID_CLEAR_BITS ARM64_REG_LSU_ERR_CTL, ARM64_REG_LSU_ERR_CTL_L1DTlbMultiHitEN, $1
-
- // disable crypto fusion across decode groups
- // <rdar://problem/27306424>
- HID_SET_BITS ARM64_REG_HID1, ARM64_REG_HID1_disAESFuseAcrossGrp, $1
-
- /***** Tunables that apply to all P cores, all revisions *****/
- /* N/A */
-
- /***** Tunables that apply to all E cores, all revisions *****/
- /* N/A */
-
- /***** Tunables that apply to specific cores, all revisions *****/
- EXEC_COREEQ_REVALL MIDR_MYST, $0, $1
- // Clear DisDcZvaCmdOnly
- // Per Myst A0/B0 tunables document
- // <rdar://problem/27627428> Myst: Confirm ACC Per-CPU Tunables
- HID_CLEAR_BITS ARM64_REG_HID3, ARM64_REG_HID3_DisDcZvaCmdOnly, $1
- HID_CLEAR_BITS ARM64_REG_EHID3, ARM64_REG_HID3_DisDcZvaCmdOnly, $1
- EXEC_END
-
- /***** Tunables that apply to specific cores and revisions *****/
- /* N/A */
-.endmacro
\ No newline at end of file
video_console.o_CFLAGS_ADD += -Wno-implicit-int-conversion
xcpm_dvfs.o_CFLAGS_ADD += -Wno-implicit-int-conversion
xcpm_ioctl.o_CFLAGS_ADD += -Wno-implicit-int-conversion
-zalloc.o_CFLAGS_ADD += -Wno-implicit-int-conversion
# -Wno-shorten-64-to-32
arm_vm_init.o_CFLAGS_ADD += -Wno-shorten-64-to-32
backtrace.o_CFLAGS_ADD += -Wno-shorten-64-to-32
vm_shared_region_pager.o_CFLAGS_ADD += -Wno-shorten-64-to-32
vm_swapfile_pager.o_CFLAGS_ADD += -Wno-shorten-64-to-32
vm_user.o_CFLAGS_ADD += -Wno-shorten-64-to-32
-zalloc.o_CFLAGS_ADD += -Wno-shorten-64-to-32
# -Wno-sign-conversion
Diagnostics.o_CFLAGS_ADD += -Wno-sign-conversion
acpi.o_CFLAGS_ADD += -Wno-sign-conversion
xcpm_fi.o_CFLAGS_ADD += -Wno-sign-conversion
xcpm_idle.o_CFLAGS_ADD += -Wno-sign-conversion
xcpm_ioctl.o_CFLAGS_ADD += -Wno-sign-conversion
-zalloc.o_CFLAGS_ADD += -Wno-sign-conversion
-zcache.o_CFLAGS_ADD += -Wno-sign-conversion
# Rebuild if per-file overrides change
${OBJS}: $(firstword $(MAKEFILE_LIST))
OPTIONS/mach_page_hash_stats optional mach_page_hash_stats
OPTIONS/mig_debug optional mig_debug
OPTIONS/vm_cpm optional vm_cpm
-OPTIONS/task_swapper optional task_swapper
OPTIONS/stack_usage optional stack_usage
OPTIONS/importance_inheritance optional importance_inheritance
OPTIONS/importance_debug optional importance_debug
#
# UserNotification files
#
-./UserNotification/UNDRequest.c standard
-./UserNotification/UNDReplyServer.c standard
+./UserNotification/UNDRequest.c optional config_user_notification
+./UserNotification/UNDReplyServer.c optional config_user_notification
osfmk/UserNotification/KUNCUserNotifications.c standard
osfmk/kdp/kdp.c optional config_kdp_interactive_debugging
osfmk/kern/clock.c standard
osfmk/kern/clock_oldops.c standard
osfmk/kern/coalition.c optional config_coalitions
-osfmk/kern/counters.c standard
+osfmk/kern/counter_common.c standard
osfmk/kern/cpu_quiesce.c optional config_quiesce_counter
osfmk/kern/debug.c standard
osfmk/kern/ecc_logging.c optional config_ecc_logging
osfmk/kern/sysdiagnose.c optional config_sysdiagnose
osfmk/kern/task.c standard
osfmk/kern/task_policy.c standard
-osfmk/kern/task_swap.c standard
osfmk/kern/test_lock.c optional development
osfmk/kern/test_lock.c optional debug
osfmk/kern/test_mpsc_queue.c optional development
osfmk/kern/waitq.c standard
osfmk/kern/work_interval.c standard
osfmk/kern/zalloc.c standard
-osfmk/kern/zcache.c optional config_zcache
osfmk/kern/gzalloc.c optional config_gzalloc
osfmk/kern/bsd_kern.c optional mach_bsd
osfmk/kern/hibernate.c optional hibernation
osfmk/kern/memset_s.c standard
osfmk/kern/copyout_shim.c optional copyout_shim
osfmk/kern/suid_cred.c standard
+osfmk/kern/task_ident.c standard
./mach/clock_server.c standard
./mach/clock_priv_server.c standard
./mach/fairplayd_notification_user.c optional config_arcade
./mach/arcade_upcall_user.c optional config_arcade
./mach/arcade_register_server.c optional config_arcade
+./mach/iocompressionstats_notification_user.c optional config_io_compression_stats
#
# For now, no external pagers
osfmk/arm/model_dep.c standard
osfmk/arm/pcb.c standard
osfmk/arm/rtclock.c standard
+osfmk/arm/counter.c standard
osfmk/arm/status.c standard
osfmk/arm/status_shared.c standard
osfmk/arm/trap.c standard
osfmk/arm64/pgtrace_decoder.c optional config_pgtrace_nonkext
osfmk/arm64/machine_remote_time.c optional config_mach_bridge_recv_time
osfmk/arm64/corecrypto/sha256_compress_arm64.s standard
+
+osfmk/arm/counter.c standard
osfmk/i386/vmx/vmx_cpu.c optional config_vmx
osfmk/i386/vmx/vmx_shims.c optional config_vmx
+osfmk/i386/x86_hypercall.c optional development
osfmk/kern/hv_support_kext.c optional hypervisor
+osfmk/kern/hv_io_notifier.c optional hypervisor
# DUMMIES TO FORCE GENERATION OF .h FILES
#osfmk/OPTIONS/ln optional ln
osfmk/i386/panic_hooks.c standard
osfmk/i386/panic_notify.c standard
osfmk/x86_64/machine_remote_time.c optional config_mach_bridge_send_time
+osfmk/x86_64/counter.c standard
static inline void
_cnputs(char * c, int size)
{
- uint32_t idx = get_cons_ops_index();
+ extern int disableConsoleOutput;
+
+ if (disableConsoleOutput) {
+ return;
+ }
+
+ assert(c != NULL);
+
+ const uint32_t idx = get_cons_ops_index();
while (size-- > 0) {
if (*c == '\n') {
int
switch_to_serial_console(void)
{
+ extern bool serial_console_enabled;
int old_cons_ops = cons_ops_index;
- cons_ops_index = SERIAL_CONS_OPS;
+
+ if (serial_console_enabled) {
+ cons_ops_index = SERIAL_CONS_OPS;
+ }
+
return old_cons_ops;
}
#include <kern/kern_cdata.h>
#include <mach/mach_vm.h>
#include <kern/exc_guard.h>
+#include <os/log.h>
#if CONFIG_MACF
#include <security/mac_mach_internal.h>
return gate.corpses;
}
+extern char *proc_best_name(struct proc *);
+extern int proc_pid(struct proc *);
+
/*
* Routine: task_crashinfo_get_ref()
* Grab a slot at creating a corpse.
task_crashinfo_get_ref(corpse_flags_t kcd_u_flags)
{
union corpse_creation_gate oldgate, newgate;
+ struct proc *p = (void *)current_proc();
assert(kcd_u_flags & CORPSE_CRASHINFO_HAS_REF);
newgate = oldgate;
if (kcd_u_flags & CORPSE_CRASHINFO_USER_FAULT) {
if (newgate.user_faults++ >= TOTAL_USER_FAULTS_ALLOWED) {
+ os_log(OS_LOG_DEFAULT, "%s[%d] Corpse failure, too many faults %d\n",
+ proc_best_name(p), proc_pid(p), newgate.user_faults);
return KERN_RESOURCE_SHORTAGE;
}
}
if (newgate.corpses++ >= TOTAL_CORPSES_ALLOWED) {
+ os_log(OS_LOG_DEFAULT, "%s[%d] Corpse failure, too many %d\n",
+ proc_best_name(p), proc_pid(p), newgate.corpses);
return KERN_RESOURCE_SHORTAGE;
}
if (atomic_compare_exchange_strong_explicit(&inflight_corpses,
&oldgate.value, newgate.value, memory_order_relaxed,
memory_order_relaxed)) {
+ os_log(OS_LOG_DEFAULT, "%s[%d] Corpse allowed %d of %d\n",
+ proc_best_name(p), proc_pid(p), newgate.corpses, TOTAL_CORPSES_ALLOWED);
return KERN_SUCCESS;
}
}
if (atomic_compare_exchange_strong_explicit(&inflight_corpses,
&oldgate.value, newgate.value, memory_order_relaxed,
memory_order_relaxed)) {
+ os_log(OS_LOG_DEFAULT, "Corpse released, count at %d\n", newgate.corpses);
return KERN_SUCCESS;
}
}
/* Terminate all the other threads in the task. */
queue_iterate(&new_task->threads, thread_next, thread_t, task_threads)
{
- thread_terminate_internal(thread_next);
+ thread_terminate_internal(thread_next, TH_TERMINATE_OPTION_NONE);
}
/* wait for all the threads in the task to terminate */
task_wait_till_threads_terminate_locked(new_task);
#include <kern/clock.h>
#include <kern/spl.h>
-#include <kern/counters.h>
#include <kern/queue.h>
#include <kern/zalloc.h>
#include <kern/thread.h>
ipc_port_release( port );
}
+EXTERN void
+iokit_make_port_send( ipc_port_t port )
+{
+ ipc_port_make_send( port );
+}
+
EXTERN void
iokit_release_port_send( ipc_port_t port )
{
// thread-argument-passing and its value should not be garbage
current_thread()->ith_knote = ITH_KNOTE_NULL;
kr = ipc_object_copyout( task->itk_space, ip_to_object(sendPort),
- MACH_MSG_TYPE_PORT_SEND, NULL, NULL, &name);
+ MACH_MSG_TYPE_PORT_SEND, IPC_OBJECT_COPYOUT_FLAGS_NONE, NULL, NULL, &name);
if (kr != KERN_SUCCESS) {
- ipc_port_release_send( sendPort );
name = MACH_PORT_NULL;
}
} else if (sendPort == IP_NULL) {
cpuid.h \
eflags.h \
fpu.h \
+ x86_hypercall.h \
io_map_entries.h \
lapic.h \
lock.h \
init_fpu();
clear_ts();
+
+#if HYPERVISOR
+ /* Notify hypervisor that we are about to resume */
+ hv_resume();
+#endif
+
IOCPURunPlatformActiveActions();
KDBG(IOKDBG_CODE(DBG_HIBERNATE, 0) | DBG_FUNC_END, start, elapsed,
/* Restart timer interrupts */
rtc_timer_start();
-
#if MONOTONIC
mt_cpu_up(cdp);
#endif /* MONOTONIC */
#include <mach/thread_status.h>
#include <mach/vm_param.h>
-#include <kern/counters.h>
#include <kern/cpu_data.h>
#include <kern/mach_param.h>
#include <kern/task.h>
__private_extern__ void mach_call_munger(x86_saved_state_t *state);
-extern const char *mach_syscall_name_table[];
+extern const char *const mach_syscall_name_table[];
__attribute__((noreturn))
void
#include <mach/thread_status.h>
#include <mach/vm_param.h>
-#include <kern/counters.h>
#include <kern/cpu_data.h>
#include <kern/mach_param.h>
#include <kern/task.h>
#else /* !KERNEL_PRIVATE */
+/*
+ * <sys/commpage.h> defines a couple of conveniency macros
+ * to help read data from the commpage.
+ */
#if defined(__i386__)
#define _COMM_PAGE_AREA_LENGTH _COMM_PAGE32_AREA_LENGTH
/*
- * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
static void do_cwas(i386_cpu_info_t *cpuinfo, boolean_t on_slave);
static void cpuid_do_precpuid_was(void);
+#if DEBUG || DEVELOPMENT
+static void cpuid_vmm_detect_pv_interface(i386_vmm_info_t *info_p, const char *signature,
+ bool (*)(i386_vmm_info_t*, const uint32_t, const uint32_t));
+static bool cpuid_vmm_detect_applepv_features(i386_vmm_info_t *info_p, const uint32_t base, const uint32_t max_leaf);
+#endif /* DEBUG || DEVELOPMENT */
+
static inline cpuid_cache_descriptor_t *
cpuid_leaf2_find(uint8_t value)
{
info_p->cpuid_vmm_bus_frequency = reg[ebx];
}
+#if DEBUG || DEVELOPMENT
+ cpuid_vmm_detect_pv_interface(info_p, APPLEPV_SIGNATURE, &cpuid_vmm_detect_applepv_features);
+#endif
+
DBG(" vmm_vendor : %s\n", info_p->cpuid_vmm_vendor);
DBG(" vmm_family : %u\n", info_p->cpuid_vmm_family);
DBG(" vmm_bus_frequency : %u\n", info_p->cpuid_vmm_bus_frequency);
return cpuid_vmm_info()->cpuid_vmm_family;
}
+#if DEBUG || DEVELOPMENT
+uint64_t
+cpuid_vmm_get_applepv_features(void)
+{
+ return cpuid_vmm_info()->cpuid_vmm_applepv_features;
+}
+#endif /* DEBUG || DEVELOPMENT */
+
cwa_classifier_e
cpuid_wa_required(cpu_wa_e wa)
{
cpuid_tsx_disabled = true;
}
}
+
+
+#if DEBUG || DEVELOPMENT
+
+/*
+ * Hunt for Apple Paravirtualization support in the hypervisor class leaves [0x4000_0000-0x4001_0000].
+ * Hypervisor interfaces are expected to be found at 0x100 boundaries for compatibility.
+ */
+
+static bool
+cpuid_vmm_detect_applepv_features(i386_vmm_info_t *info_p, const uint32_t base, const uint32_t max_leaf)
+{
+ if ((max_leaf - base) < APPLEPV_LEAF_INDEX_MAX) {
+ return false;
+ }
+
+ /*
+ * Issue cpuid to make sure the interface supports "AH#1" features.
+ * This avoids a possible collision with "Hv#1" used by Hyper-V.
+ */
+ uint32_t reg[4];
+ char interface[5];
+ cpuid_fn(base + APPLEPV_INTERFACE_LEAF_INDEX, reg);
+ memcpy(&interface[0], ®[eax], 4);
+ interface[4] = '\0';
+ if (0 == strcmp(interface, APPLEPV_INTERFACE)) {
+ cpuid_fn(base + APPLEPV_FEATURES_LEAF_INDEX, reg);
+ info_p->cpuid_vmm_applepv_features = quad(reg[ecx], reg[edx]);
+ return true;
+ }
+ return false;
+}
+
+static void
+cpuid_vmm_detect_pv_interface(i386_vmm_info_t *info_p, const char *signature,
+ bool (*searcher)(i386_vmm_info_t*, const uint32_t, const uint32_t))
+{
+ int hcalls;
+ if (PE_parse_boot_argn("hcalls", &hcalls, sizeof(hcalls)) &&
+ hcalls == 0) {
+ return;
+ }
+
+ assert(info_p);
+ /*
+ * Look for PV interface matching signature
+ */
+ for (uint32_t base = 0x40000100; base < 0x40010000; base += 0x100) {
+ uint32_t reg[4];
+ char vendor[13];
+
+ cpuid_fn(base, reg);
+ memcpy(&vendor[0], ®[ebx], 4);
+ memcpy(&vendor[4], ®[ecx], 4);
+ memcpy(&vendor[8], ®[edx], 4);
+ vendor[12] = '\0';
+ if ((0 == strcmp(vendor, signature)) &&
+ (reg[eax] - base) < 0x100 &&
+ (*searcher)(info_p, base, reg[eax])) {
+ break;
+ }
+ }
+}
+
+#endif /* DEBUG || DEVELOPMENT */
/*
- * Copyright (c) 2000-2019 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2020 Apple Computer, Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#define CPUID_VMM_FAMILY_KVM 0x6
+#if DEBUG || DEVELOPMENT
+
+/*
+ * Apple Paravirtualization CPUID leaves
+ * The base leaf can be placed at any unused 0x100 aligned boundary
+ * in the hypervisor class leaves [0x4000_0000-0x4001_0000].
+ */
+
+#define APPLEPV_INTERFACE_LEAF_INDEX 1
+#define APPLEPV_FEATURES_LEAF_INDEX 2
+#define APPLEPV_LEAF_INDEX_MAX APPLEPV_FEATURES_LEAF_INDEX
+
+#define APPLEPV_SIGNATURE "apple-pv-xnu"
+#define APPLEPV_INTERFACE "AH#1"
+
+/*
+ * Apple Hypercall Feature Vector:
+ * Values in ECX:EDX returned by the base leaf
+ */
+
+#define CPUID_LEAF_FEATURE_COREDUMP _Bit(0)
+
+#endif /* DEBUG || DEVELOPMENT */
+
#ifndef ASSEMBLER
#include <stdint.h>
uint32_t cpuid_vmm_family;
uint32_t cpuid_vmm_bus_frequency;
uint32_t cpuid_vmm_tsc_frequency;
+ uint64_t cpuid_vmm_applepv_features;
} i386_vmm_info_t;
typedef enum {
extern i386_cpu_info_t *cpuid_info(void);
extern void cpuid_set_info(void);
extern boolean_t cpuid_vmm_present(void);
+extern uint32_t cpuid_vmm_family(void);
+
+#if DEBUG || DEVELOPMENT
+extern uint64_t cpuid_vmm_get_applepv_features(void);
+#endif /* DEBUG || DEVELOPMENT */
#ifdef MACH_KERNEL_PRIVATE
extern i386_vmm_info_t *cpuid_vmm_info(void);
-extern uint32_t cpuid_vmm_family(void);
extern cwa_classifier_e cpuid_wa_required(cpu_wa_e wa);
extern void cpuid_do_was(void);
extern const char *cpuid_vmm_family_string(void);
int early_boot = 1;
+bool serial_console_enabled = false;
+
static boot_args *kernelBootArgs;
extern int disableConsoleOutput;
}
}
if (serialmode & SERIALMODE_OUTPUT) {
+ serial_console_enabled = true;
(void)switch_to_serial_console();
disableConsoleOutput = FALSE; /* Allow printfs to happen */
}
extern unsigned int not_in_kdp;
+#if !LOCK_STATS
+#define usimple_lock_nopreempt(lck, grp) \
+ usimple_lock_nopreempt(lck)
+#define usimple_lock_try_nopreempt(lck, grp) \
+ usimple_lock_try_nopreempt(lck)
+#endif
+static void usimple_lock_nopreempt(usimple_lock_t, lck_grp_t *);
+static unsigned int usimple_lock_try_nopreempt(usimple_lock_t, lck_grp_t *);
+
/*
* We often want to know the addresses of the callers
* of the various lock routines. However, this information
usimple_lock((usimple_lock_t) lck, NULL);
}
+void
+lck_spin_lock_nopreempt(
+ lck_spin_t *lck)
+{
+ usimple_lock_nopreempt((usimple_lock_t) lck, NULL);
+}
+
+void
+lck_spin_lock_nopreempt_grp(
+ lck_spin_t *lck,
+ lck_grp_t *grp)
+{
+#pragma unused(grp)
+ usimple_lock_nopreempt((usimple_lock_t) lck, grp);
+}
+
/*
* Routine: lck_spin_unlock
*/
usimple_unlock((usimple_lock_t) lck);
}
+void
+lck_spin_unlock_nopreempt(
+ lck_spin_t *lck)
+{
+ usimple_unlock_nopreempt((usimple_lock_t) lck);
+}
+
boolean_t
lck_spin_try_lock_grp(
lck_spin_t *lck,
return lrval;
}
+int
+lck_spin_try_lock_nopreempt(
+ lck_spin_t *lck)
+{
+ boolean_t lrval = (boolean_t)usimple_lock_try_nopreempt((usimple_lock_t) lck, LCK_GRP_NULL);
+#if DEVELOPMENT || DEBUG
+ if (lrval) {
+ pltrace(FALSE);
+ }
+#endif
+ return lrval;
+}
+
+int
+lck_spin_try_lock_nopreempt_grp(
+ lck_spin_t *lck,
+ lck_grp_t *grp)
+{
+#pragma unused(grp)
+ boolean_t lrval = (boolean_t)usimple_lock_try_nopreempt((usimple_lock_t) lck, grp);
+#if DEVELOPMENT || DEBUG
+ if (lrval) {
+ pltrace(FALSE);
+ }
+#endif
+ return lrval;
+}
+
/*
* Routine: lck_spin_assert
*/
usimple_lock_t l,
__unused unsigned short tag)
{
-#ifndef MACHINE_SIMPLE_LOCK
USLDBG(usld_lock_init(l, tag));
hw_lock_init(&l->interlock);
-#else
- simple_lock_init((simple_lock_t)l, tag);
-#endif
}
volatile uint32_t spinlock_owner_cpu = ~0;
return spinlock_owner_cpu;
}
+__abortlike
+static void
+usimple_lock_acquire_timeout_panic(usimple_lock_t l)
+{
+ uintptr_t lowner = (uintptr_t)l->interlock.lock_data;
+ uint32_t lock_cpu;
+
+ spinlock_timed_out = l; /* spinlock_timeout_NMI consumes this */
+ lock_cpu = spinlock_timeout_NMI(lowner);
+ panic("Spinlock acquisition timed out: lock=%p, "
+ "lock owner thread=0x%lx, current_thread: %p, "
+ "lock owner active on CPU 0x%x, current owner: 0x%lx, time: %llu",
+ l, lowner, current_thread(), lock_cpu,
+ (uintptr_t)l->interlock.lock_data, mach_absolute_time());
+}
+
/*
* Acquire a usimple_lock.
*
usimple_lock_t l
LCK_GRP_ARG(lck_grp_t *grp))
{
-#ifndef MACHINE_SIMPLE_LOCK
DECL_PC(pc);
OBTAIN_PC(pc);
USLDBG(usld_lock_pre(l, pc));
- if (__improbable(hw_lock_to(&l->interlock, LockTimeOutTSC, grp) == 0)) {
- boolean_t uslock_acquired = FALSE;
- while (machine_timeout_suspended()) {
- enable_preemption();
- if ((uslock_acquired = hw_lock_to(&l->interlock, LockTimeOutTSC, grp))) {
- break;
- }
+ while (__improbable(hw_lock_to(&l->interlock, LockTimeOutTSC, grp) == 0)) {
+ if (!machine_timeout_suspended()) {
+ usimple_lock_acquire_timeout_panic(l);
}
+ enable_preemption();
+ }
+
+#if DEVELOPMENT || DEBUG
+ pltrace(FALSE);
+#endif
+
+ USLDBG(usld_lock_post(l, pc));
+#if CONFIG_DTRACE
+ LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, l, 0, (uintptr_t)LCK_GRP_PROBEARG(grp));
+#endif
+}
+
+/*
+ * Acquire a usimple_lock_nopreempt
+ *
+ * Called and returns with preemption disabled. Note
+ * that the hw_lock routines are responsible for
+ * maintaining preemption state.
+ */
+static void
+usimple_lock_nopreempt(
+ usimple_lock_t l,
+ lck_grp_t *grp)
+{
+ DECL_PC(pc);
- if (uslock_acquired == FALSE) {
- uint32_t lock_cpu;
- uintptr_t lowner = (uintptr_t)l->interlock.lock_data;
- spinlock_timed_out = l;
- lock_cpu = spinlock_timeout_NMI(lowner);
- panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p, lock owner active on CPU 0x%x, current owner: 0x%lx, time: %llu",
- l, lowner, current_thread(), lock_cpu, (uintptr_t)l->interlock.lock_data, mach_absolute_time());
+ OBTAIN_PC(pc);
+ USLDBG(usld_lock_pre(l, pc));
+
+ while (__improbable(hw_lock_to_nopreempt(&l->interlock, LockTimeOutTSC, grp) == 0)) {
+ if (!machine_timeout_suspended()) {
+ usimple_lock_acquire_timeout_panic(l);
}
+ enable_preemption();
}
+
#if DEVELOPMENT || DEBUG
pltrace(FALSE);
#endif
USLDBG(usld_lock_post(l, pc));
-#else
- simple_lock((simple_lock_t)l, grp);
-#endif
#if CONFIG_DTRACE
LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, l, 0, (uintptr_t)LCK_GRP_PROBEARG(grp));
#endif
usimple_unlock(
usimple_lock_t l)
{
-#ifndef MACHINE_SIMPLE_LOCK
DECL_PC(pc);
OBTAIN_PC(pc);
pltrace(TRUE);
#endif
hw_lock_unlock(&l->interlock);
-#else
- simple_unlock_rwmb((simple_lock_t)l);
-#endif
}
+/*
+ * Release a usimple_unlock_nopreempt.
+ *
+ * Called and returns with preemption enabled. Note
+ * that the hw_lock routines are responsible for
+ * maintaining preemption state.
+ */
+void
+usimple_unlock_nopreempt(
+ usimple_lock_t l)
+{
+ DECL_PC(pc);
+
+ OBTAIN_PC(pc);
+ USLDBG(usld_unlock(l, pc));
+#if DEVELOPMENT || DEBUG
+ pltrace(TRUE);
+#endif
+ hw_lock_unlock_nopreempt(&l->interlock);
+}
/*
* Conditionally acquire a usimple_lock.
usimple_lock_t l,
lck_grp_t *grp)
{
-#ifndef MACHINE_SIMPLE_LOCK
unsigned int success;
DECL_PC(pc);
USLDBG(usld_lock_try_post(l, pc));
}
return success;
-#else
- return simple_lock_try((simple_lock_t)l, grp);
+}
+
+/*
+ * Conditionally acquire a usimple_lock.
+ *
+ * Called and returns with preemption disabled. Note
+ * that the hw_lock routines are responsible for
+ * maintaining preemption state.
+ *
+ * XXX No stats are gathered on a miss; I preserved this
+ * behavior from the original assembly-language code, but
+ * doesn't it make sense to log misses? XXX
+ */
+static unsigned int
+usimple_lock_try_nopreempt(
+ usimple_lock_t l,
+ lck_grp_t *grp)
+{
+ unsigned int success;
+ DECL_PC(pc);
+
+ OBTAIN_PC(pc);
+ USLDBG(usld_lock_try_pre(l, pc));
+ if ((success = hw_lock_try_nopreempt(&l->interlock, grp))) {
+#if DEVELOPMENT || DEBUG
+ pltrace(FALSE);
#endif
+ USLDBG(usld_lock_try_post(l, pc));
+ }
+ return success;
}
/*
return true;
}
-void
-ml_cpu_init_state(void)
-{
-}
-
void
ml_cpu_begin_state_transition(__unused int cpu_id)
{
#include <mach/thread_status.h>
#include <mach/vm_param.h>
-#include <kern/counters.h>
#include <kern/kalloc.h>
#include <kern/mach_param.h>
#include <kern/processor.h>
#include <mach/thread_status.h>
#include <mach/vm_param.h>
-#include <kern/counters.h>
#include <kern/mach_param.h>
#include <kern/processor.h>
#include <kern/cpu_data.h>
#include <mach/thread_status.h>
#include <mach/vm_param.h>
-#include <kern/counters.h>
#include <kern/mach_param.h>
#include <kern/task.h>
#include <kern/thread.h>
kern_return_t
pmap_permissions_verify(pmap_t, vm_map_t, vm_offset_t, vm_offset_t);
+#if DEVELOPMENT || DEBUG
+extern kern_return_t pmap_test_text_corruption(pmap_paddr_t);
+#endif /* DEVELOPMENT || DEBUG */
+
#if MACH_ASSERT
extern int pmap_stats_assert;
#define PMAP_STATS_ASSERTF(args) \
}
/* locks */
-static lck_grp_attr_t *ucode_slock_grp_attr = NULL;
-static lck_grp_t *ucode_slock_grp = NULL;
-static lck_attr_t *ucode_slock_attr = NULL;
-static lck_spin_t *ucode_slock = NULL;
-
-static kern_return_t
-register_locks(void)
-{
- /* already allocated? */
- if (ucode_slock_grp_attr && ucode_slock_grp && ucode_slock_attr && ucode_slock) {
- return KERN_SUCCESS;
- }
-
- /* allocate lock group attribute and group */
- if (!(ucode_slock_grp_attr = lck_grp_attr_alloc_init())) {
- goto nomem_out;
- }
-
- if (!(ucode_slock_grp = lck_grp_alloc_init("uccode_lock", ucode_slock_grp_attr))) {
- goto nomem_out;
- }
-
- /* Allocate lock attribute */
- if (!(ucode_slock_attr = lck_attr_alloc_init())) {
- goto nomem_out;
- }
-
- /* Allocate the spin lock */
- /* We keep one global spin-lock. We could have one per update
- * request... but srsly, why would you update microcode like that?
- */
- if (!(ucode_slock = lck_spin_alloc_init(ucode_slock_grp, ucode_slock_attr))) {
- goto nomem_out;
- }
-
- return KERN_SUCCESS;
-
-nomem_out:
- /* clean up */
- if (ucode_slock) {
- lck_spin_free(ucode_slock, ucode_slock_grp);
- }
- if (ucode_slock_attr) {
- lck_attr_free(ucode_slock_attr);
- }
- if (ucode_slock_grp) {
- lck_grp_free(ucode_slock_grp);
- }
- if (ucode_slock_grp_attr) {
- lck_grp_attr_free(ucode_slock_grp_attr);
- }
-
- return KERN_NO_SPACE;
-}
+static LCK_GRP_DECLARE(ucode_slock_grp, "uccode_lock");
+static LCK_SPIN_DECLARE(ucode_slock, &ucode_slock_grp);
/* Copy in an update */
static int
cpu_apply_microcode(void)
{
/* grab the lock */
- lck_spin_lock(ucode_slock);
+ lck_spin_lock(&ucode_slock);
/* execute the update */
update_microcode();
/* release the lock */
- lck_spin_unlock(ucode_slock);
+ lck_spin_unlock(&ucode_slock);
}
static void
{
cpumask_t dest_cpumask;
- if (register_locks() != KERN_SUCCESS) {
- return;
- }
-
mp_disable_preemption();
dest_cpumask = CPUMASK_OTHERS;
cpu_apply_microcode();
int vmx_use_count = 0;
boolean_t vmx_exclusive = FALSE;
-lck_grp_t *vmx_lck_grp = NULL;
-lck_mtx_t *vmx_lck_mtx = NULL;
+static LCK_GRP_DECLARE(vmx_lck_grp, "vmx");
+static LCK_MTX_DECLARE(vmx_lck_mtx, &vmx_lck_grp);
/* -----------------------------------------------------------------------------
* vmx_is_available()
set_cr4(get_cr4() | CR4_VMXE);
}
-void
-vmx_init()
-{
- vmx_lck_grp = lck_grp_alloc_init("vmx", LCK_GRP_ATTR_NULL);
- assert(vmx_lck_grp);
-
- vmx_lck_mtx = lck_mtx_alloc_init(vmx_lck_grp, LCK_ATTR_NULL);
- assert(vmx_lck_mtx);
-}
-
/* -----------------------------------------------------------------------------
* vmx_get_specs()
* Obtain VMX facility specifications for this CPU and
return VMX_UNSUPPORTED;
}
- lck_mtx_lock(vmx_lck_mtx);
+ lck_mtx_lock(&vmx_lck_mtx);
if (vmx_exclusive || (exclusive && vmx_use_count)) {
error = VMX_INUSE;
error = VMX_OK;
}
- lck_mtx_unlock(vmx_lck_mtx);
+ lck_mtx_unlock(&vmx_lck_mtx);
return error;
}
{
assert(0 == get_preemption_level());
- lck_mtx_lock(vmx_lck_mtx);
+ lck_mtx_lock(&vmx_lck_mtx);
if (1 == vmx_use_count) {
vmx_exclusive = FALSE;
vmx_use_count--;
}
- lck_mtx_unlock(vmx_lck_mtx);
+ lck_mtx_unlock(&vmx_lck_mtx);
VMX_KPRINTF("VMX use count: %d\n", vmx_use_count);
}
void *vmxon_region; /* the logical address of the VMXON region page */
} vmx_cpu_t;
-void vmx_init(void);
void vmx_cpu_init(void);
void vmx_resume(boolean_t is_wake_from_hibernate);
void vmx_suspend(void);
--- /dev/null
+/*
+ * Copyright (c) 2020 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <kern/assert.h>
+#include <kern/hvg_hypercall.h>
+#include <i386/cpuid.h>
+#include <os/log.h>
+
+
+static bool
+hvg_live_coredump_enabled(void)
+{
+ return cpuid_vmm_present() && (cpuid_vmm_get_applepv_features() & CPUID_LEAF_FEATURE_COREDUMP) != 0;
+}
+
+/*
+ * This routine issues an Apple hypercall that notifies the hypervisor to
+ * take a guest kernel coredump. If the vmcore argument is not NULL, the
+ * name tag of the vmcore file is copied into the caller's vmcore tag array.
+ * Otherwise the name tag is ignored.
+ */
+
+hvg_hcall_return_t
+hvg_hcall_trigger_dump(hvg_hcall_vmcore_file_t *vmcore,
+ const hvg_hcall_dump_option_t dump_option)
+{
+ hvg_hcall_return_t ret;
+ hvg_hcall_output_regs_t output;
+ const size_t reg_size = sizeof(output.rax);
+
+ /* Does the hypervisor support feature: live kernel core dump? */
+ if (!hvg_live_coredump_enabled()) {
+ return HVG_HCALL_FEAT_DISABLED;
+ }
+
+ /* Make sure that we don't overflow vmcore tag array with hypercall output */
+ if (vmcore && (reg_size != sizeof(uint64_t))) {
+ os_log_error(OS_LOG_DEFAULT, "%s: invalid hcall register size, %zu bytes (expect %zu bytes)\n",
+ __func__, reg_size, sizeof(uint64_t));
+ return HVG_HCALL_INVALID_PARAMETER;
+ }
+
+ switch (dump_option) {
+ case HVG_HCALL_DUMP_OPTION_REGULAR:
+ /* Only regular dump-guest-memory is supported for now */
+ break;
+ default:
+ return HVG_HCALL_INVALID_PARAMETER;
+ }
+
+ /* Everything checks out, issue hypercall */
+ memset(&output, 0, sizeof(hvg_hcall_output_regs_t));
+ ret = hvg_hypercall1(HVG_HCALL_TRIGGER_DUMP,
+ dump_option,
+ &output);
+
+ if (ret == HVG_HCALL_SUCCESS) {
+ if (vmcore) {
+ /* Caller requested vmcore tag to be returned */
+ memcpy(&vmcore->tag[0], &output.rax, reg_size);
+ memcpy(&vmcore->tag[reg_size], &output.rdi, reg_size);
+ memcpy(&vmcore->tag[reg_size * 2], &output.rsi, reg_size);
+ memcpy(&vmcore->tag[reg_size * 3], &output.rdx, reg_size);
+ memcpy(&vmcore->tag[reg_size * 4], &output.rcx, reg_size);
+ memcpy(&vmcore->tag[reg_size * 5], &output.r8, reg_size);
+ memcpy(&vmcore->tag[reg_size * 6], &output.r9, reg_size);
+ vmcore->tag[reg_size * 7] = '\0';
+ }
+ }
+ return ret;
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _I386_X86_HYPERCALL_H_
+#define _I386_X86_HYPERCALL_H_
+
+#if DEBUG || DEVELOPMENT
+
+
+/*
+ * Apple Hypercall Calling Convention (x64)
+ *
+ * Registers | Usage |
+ * --------------------------------------------------------
+ * %rax | In: hypercall code |
+ * | Out: if RFLAGS.CF = 0 (success) |
+ * | hypercall output[0] |
+ * | if RFLAGS.CF = 1 (error) |
+ * | hypercall error value |
+ * %rdi | In: 1st argument |
+ * | Out: hypercall output[1] |
+ * %rsi | In: 2nd argument |
+ * | Out: hypercall output[2] |
+ * %rdx | In: 3rd argument |
+ * | Out: hypercall output[3] |
+ * %rcx | In: 4th argument |
+ * | Out: hypercall output[4] |
+ * %r8 | In: 5th argument |
+ * | Out: hypercall output[5] |
+ * %r9 | In: 6th argument |
+ * | Out: hypercall output[6] |
+ *
+ * %rax is used by the caller to specify hypercall code. When a hypercall fails,
+ * the hypervisor stores errno in %rax. A successful hypercall returns the
+ * output of the call in %rax, %rdi, %rsi, %rdx, %rcx, %r8, and %r9.
+ */
+
+typedef struct hvg_hcall_output_regs {
+ uint64_t rax;
+ uint64_t rdi;
+ uint64_t rsi;
+ uint64_t rdx;
+ uint64_t rcx;
+ uint64_t r8;
+ uint64_t r9;
+} hvg_hcall_output_regs_t;
+
+/*
+ * To avoid collision with other hypercall interfaces (e.g., KVM) in the vmcall
+ * namespace, Apple hypercalls put "A" (0x41) in the top byte of %eax so that
+ * hypervisors can support multiple hypercall interfaces simultaneously and
+ * handle Apple hypercalls correctly for compatiblity.
+ *
+ * For example, KVM uses the same vmcall instruction and has call code 1 for
+ * KVM_HC_VAPIC_POLL_IRQ. When invoking an Apple hypercall with code 1, a
+ * hypervisor will not accidentially treat the Apple hypercall as a KVM call.
+ */
+
+#define HVG_HCALL_CODE(code) ('A' << 24 | (code & 0xFFFFFF))
+
+
+/*
+ * Caller is responsible for checking the existence of Apple Hypercall
+ * before invoking Apple hypercalls.
+ */
+
+#define HVG_HCALL_RETURN(rax) {\
+ __asm__ __volatile__ goto (\
+ "jnc 2f \n\t" \
+ "jmp %l0 \n\t" \
+ "2: \n\t" \
+ : /* no output */ \
+ : /* no input */ \
+ : /* no clobber */ \
+ : error);\
+ return HVG_HCALL_SUCCESS;\
+error:\
+ return (hvg_hcall_return_t)rax;\
+}
+
+static inline hvg_hcall_return_t
+hvg_hypercall6(uint64_t code, uint64_t rdi, uint64_t rsi, uint64_t rdx, uint64_t rcx, uint64_t r8, uint64_t r9,
+ hvg_hcall_output_regs_t *output)
+{
+ __asm__ __volatile__ ("movq %12, %%r8 \n\t"
+ "movq %13, %%r9 \n\t"
+ "vmcall \n\t"
+ "movq %%r8, %5 \n\t"
+ "movq %%r9, %6 \n\t"
+ : "=a" (output->rax), /* %0: output[0] */
+ "=D" (output->rdi), /* %1: output[1] */
+ "=S" (output->rsi), /* %2: output[2] */
+ "=d" (output->rdx), /* %3: output[3] */
+ "=c" (output->rcx), /* %4: output[4] */
+ "=r" (output->r8), /* %5: output[5] */
+ "=r" (output->r9) /* %6: output[6] */
+ : "a" (HVG_HCALL_CODE(code)),/* %7: call code */
+ "D" (rdi), /* %8: arg[0] */
+ "S" (rsi), /* %9: arg[1] */
+ "d" (rdx), /* %10: arg[2] */
+ "c" (rcx), /* %11: arg[3] */
+ "r" (r8), /* %12: arg[4] */
+ "r" (r9) /* %13: arg[5] */
+ : "memory", "r8", "r9");
+ HVG_HCALL_RETURN(output->rax);
+}
+
+static inline hvg_hcall_return_t
+hvg_hypercall0(const uint64_t code,
+ hvg_hcall_output_regs_t *output)
+{
+ return hvg_hypercall6(code, 0, 0, 0, 0, 0, 0, output);
+}
+
+static inline hvg_hcall_return_t
+hvg_hypercall1(const uint64_t code,
+ const uint64_t rdi,
+ hvg_hcall_output_regs_t *output)
+{
+ return hvg_hypercall6(code, rdi, 0, 0, 0, 0, 0, output);
+}
+
+static inline hvg_hcall_return_t
+hvg_hypercall2(const uint64_t code,
+ const uint64_t rdi, const uint64_t rsi,
+ hvg_hcall_output_regs_t *output)
+{
+ return hvg_hypercall6(code, rdi, rsi, 0, 0, 0, 0, output);
+}
+
+static inline hvg_hcall_return_t
+hvg_hypercall3(const uint64_t code,
+ const uint64_t rdi, const uint64_t rsi, const uint64_t rdx,
+ hvg_hcall_output_regs_t *output)
+{
+ return hvg_hypercall6(code, rdi, rsi, rdx, 0, 0, 0, output);
+}
+
+static inline hvg_hcall_return_t
+hvg_hypercall4(const uint64_t code,
+ const uint64_t rdi, const uint64_t rsi, const uint64_t rdx, const uint64_t rcx,
+ hvg_hcall_output_regs_t *output)
+{
+ return hvg_hypercall6(code, rdi, rsi, rdx, rcx, 0, 0, output);
+}
+
+static inline hvg_hcall_return_t
+hvg_hypercall5(const uint64_t code,
+ const uint64_t rdi, const uint64_t rsi, const uint64_t rdx, const uint64_t rcx, const uint64_t r8,
+ hvg_hcall_output_regs_t *output)
+{
+ return hvg_hypercall6(code, rdi, rsi, rdx, rcx, r8, 0, output);
+}
+
+#endif /* DEBUG || DEVELOPMENT */
+
+#endif /* _I386_X86_HYPERCALL_H_ */
return KERN_SUCCESS;
}
-/*
- * Routine: ipc_entry_get
- * Purpose:
- * Tries to allocate an entry out of the space.
- * Conditions:
- * The space is write-locked and active throughout.
- * An object may be locked. Will not allocate memory.
- * Returns:
- * KERN_SUCCESS A free entry was found.
- * KERN_NO_SPACE No entry allocated.
- */
-
-kern_return_t
-ipc_entry_get(
- ipc_space_t space,
- mach_port_name_t *namep,
- ipc_entry_t *entryp)
-{
- kern_return_t kr;
-
- kr = ipc_entries_hold(space, 1);
- if (KERN_SUCCESS != kr) {
- return kr;
- }
-
- return ipc_entry_claim(space, namep, entryp);
-}
-
/*
* Routine: ipc_entry_alloc
* Purpose:
return KERN_INVALID_TASK;
}
- kr = ipc_entry_get(space, namep, entryp);
+ kr = ipc_entries_hold(space, 1);
if (kr == KERN_SUCCESS) {
- return kr;
+ return ipc_entry_claim(space, namep, entryp);
}
kr = ipc_entry_grow_table(space, ITS_SIZE_NONE);
*/
kern_return_t kr;
kr = ipc_entry_grow_table(space, index + 1);
- assert(kr != KERN_NO_SPACE);
if (kr != KERN_SUCCESS) {
/* space is unlocked */
return kr;
mach_port_name_t *namep,
ipc_entry_t *entryp);
-/* Allocate an entry in a space */
-extern kern_return_t ipc_entry_get(
- ipc_space_t space,
- mach_port_name_t *namep,
- ipc_entry_t *entryp);
-
/* Allocate an entry in a space, growing the space if necessary */
extern kern_return_t ipc_entry_alloc(
ipc_space_t space,
if (ip_active(port) &&
ip_kotype(port) == IKOT_EVENTLINK) {
- ipc_eventlink = (struct ipc_eventlink *)port->ip_kobject;
+ ipc_eventlink = (struct ipc_eventlink *)ipc_kobject_get(port);
if (ipc_eventlink) {
ipc_eventlink_reference(ipc_eventlink);
}
#endif
+static int
+task_importance_task_get_pid(ipc_importance_task_t iit)
+{
+#if DEVELOPMENT || DEBUG
+ return (int)iit->iit_bsd_pid;
+#else
+ return task_pid(iit->iit_task);
+#endif
+}
+
/*
* Routine: ipc_importance_reset_locked
* Purpose:
task_imp->iit_legacy_externdrop = 0;
after_donor = ipc_importance_task_is_donor(task_imp);
-#if DEVELOPMENT || DEBUG
- if (task_imp->iit_assertcnt > 0 && task_imp->iit_live_donor) {
- printf("Live donor task %s[%d] still has %d importance assertions after reset\n",
- task_imp->iit_procname, task_imp->iit_bsd_pid, task_imp->iit_assertcnt);
- }
-#endif
-
/* propagate a downstream drop if there was a change in donor status */
if (after_donor != before_donor) {
ipc_importance_task_propagate_assertion_locked(task_imp, IIT_UPDATE_DROP, FALSE);
* will trigger the probe in ipc_importance_task_externalize_assertion()
* above and have impresult==1 here.
*/
- DTRACE_BOOST5(receive_boost, task_t, task_self, int, task_pid(task_self), int, sender_pid, int, 1, int, task_self->task_imp_base->iit_assertcnt);
+ DTRACE_BOOST5(receive_boost, task_t, task_self, int, task_pid(task_self),
+ int, sender_pid, int, 1, int, task_self->task_imp_base->iit_assertcnt);
}
#endif /* IMPORTANCE_TRACE */
}
mach_voucher_attr_content_t out_content,
mach_voucher_attr_content_size_t *in_out_content_size)
{
- mach_voucher_attr_content_size_t size = 0;
ipc_importance_elem_t elem;
unsigned int i;
+ char *buf = (char *)out_content;
+ mach_voucher_attr_content_size_t size = *in_out_content_size;
+ mach_voucher_attr_content_size_t pos = 0;
+ __unused int pid;
+
IMPORTANCE_ASSERT_MANAGER(manager);
IMPORTANCE_ASSERT_KEY(key);
/* the first non-default value provides the data */
- for (i = 0; i < value_count && *in_out_content_size > 0; i++) {
+ for (i = 0; i < value_count; i++) {
elem = (ipc_importance_elem_t)values[i];
if (IIE_NULL == elem) {
continue;
}
- snprintf((char *)out_content, *in_out_content_size, "Importance for pid ");
- size = (mach_voucher_attr_content_size_t)strlen((char *)out_content);
+ pos += scnprintf(buf + pos, size - pos, "Importance for ");
for (;;) {
ipc_importance_inherit_t inherit = III_NULL;
ipc_importance_task_t task_imp;
- task_t task;
- int t_pid;
if (IIE_TYPE_TASK == IIE_TYPE(elem)) {
task_imp = (ipc_importance_task_t)elem;
- task = task_imp->iit_task;
- t_pid = (TASK_NULL != task) ?
- task_pid(task) : -1;
- snprintf((char *)out_content + size, *in_out_content_size - size, "%d", t_pid);
} else {
inherit = (ipc_importance_inherit_t)elem;
task_imp = inherit->iii_to_task;
- task = task_imp->iit_task;
- t_pid = (TASK_NULL != task) ?
- task_pid(task) : -1;
- snprintf((char *)out_content + size, *in_out_content_size - size,
- "%d (%d of %d boosts) %s from pid ", t_pid,
- III_EXTERN(inherit), inherit->iii_externcnt,
- (inherit->iii_donating) ? "donated" : "linked");
}
-
- size = (mach_voucher_attr_content_size_t)strlen((char *)out_content);
+#if DEVELOPMENT || DEBUG
+ pos += scnprintf(buf + pos, size - pos, "%s[%d]",
+ task_imp->iit_procname, task_imp->iit_bsd_pid);
+#else
+ ipc_importance_lock();
+ pid = task_importance_task_get_pid(task_imp);
+ ipc_importance_unlock();
+ pos += scnprintf(buf + pos, size - pos, "pid %d", pid);
+#endif /* DEVELOPMENT || DEBUG */
if (III_NULL == inherit) {
break;
}
-
+ pos += scnprintf(buf + pos, size - pos,
+ " (%d of %d boosts) %s from ",
+ III_EXTERN(inherit), inherit->iii_externcnt,
+ (inherit->iii_donating) ? "donated" : "linked");
elem = inherit->iii_from_elem;
}
- size++; /* account for NULL */
+
+ pos++; /* account for terminating \0 */
+ break;
}
*out_command = MACH_VOUCHER_ATTR_NOOP; /* cannot be used to regenerate value */
- *in_out_content_size = size;
+ *in_out_content_size = pos;
return KERN_SUCCESS;
}
target_pid = -1;
if (temp_inherit->iii_donating) {
-#if DEVELOPMENT || DEBUG
- target_pid = temp_inherit->iii_to_task->iit_bsd_pid;
-#else
- temp_task = temp_inherit->iii_to_task->iit_task;
- if (temp_task != TASK_NULL) {
- target_pid = task_pid(temp_task);
- }
-#endif
+ target_pid = task_importance_task_get_pid(temp_inherit->iii_to_task);
}
if (target_pid != -1 && previous_pid != target_pid) {
continue;
}
- if (IIE_TYPE_TASK == IIE_TYPE(elem) &&
- (((ipc_importance_task_t)elem)->iit_task != TASK_NULL)) {
- target_pid = task_pid(((ipc_importance_task_t)elem)->iit_task);
+ if (IIE_TYPE_TASK == IIE_TYPE(elem)) {
+ ipc_importance_task_t temp_iit = (ipc_importance_task_t)elem;
+ target_pid = task_importance_task_get_pid(temp_iit);
} else {
temp_inherit = (ipc_importance_inherit_t)elem;
-#if DEVELOPMENT || DEBUG
- target_pid = temp_inherit->iii_to_task->iit_bsd_pid;
-#else
- temp_task = temp_inherit->iii_to_task->iit_task;
- if (temp_task != TASK_NULL) {
- target_pid = task_pid(temp_task);
- }
-#endif
+ target_pid = task_importance_task_get_pid(temp_inherit->iii_to_task);
}
if (target_pid != -1 && previous_pid != target_pid) {
#include <mach/machine/ndr_def.h> /* NDR_record */
-#define IPC_KERNEL_MAP_SIZE (1024 * 1024)
+#define IPC_KERNEL_MAP_SIZE (CONFIG_IPC_KERNEL_MAP_SIZE * 1024 * 1024)
SECURITY_READ_ONLY_LATE(vm_map_t) ipc_kernel_map;
/* values to limit physical copy out-of-line memory descriptors */
#define IPC_KMSG_MAX_SPACE (64 * 1024 * 1024) /* keep in sync with COPYSIZELIMIT_PANIC */
const vm_size_t ipc_kmsg_max_body_space = ((IPC_KMSG_MAX_SPACE * 3) / 4 - MAX_TRAILER_SIZE);
+#if XNU_TARGET_OS_OSX
+#define IPC_CONTROL_PORT_OPTIONS_DEFAULT IPC_CONTROL_PORT_OPTIONS_NONE
+#else
+#define IPC_CONTROL_PORT_OPTIONS_DEFAULT (IPC_CONTROL_PORT_OPTIONS_IMMOVABLE_HARD | IPC_CONTROL_PORT_OPTIONS_PINNED_SOFT)
+#endif
+
+TUNABLE(ipc_control_port_options_t, ipc_control_port_options,
+ "ipc_control_port_options", IPC_CONTROL_PORT_OPTIONS_DEFAULT);
+
+SECURITY_READ_ONLY_LATE(bool) pinned_control_port_enabled;
+SECURITY_READ_ONLY_LATE(bool) immovable_control_port_enabled;
+
+
LCK_GRP_DECLARE(ipc_lck_grp, "ipc");
LCK_ATTR_DECLARE(ipc_lck_attr, 0, 0);
arcade_init();
#endif
+ pinned_control_port_enabled = !!(ipc_control_port_options & (IPC_CONTROL_PORT_OPTIONS_PINNED_SOFT | IPC_CONTROL_PORT_OPTIONS_PINNED_HARD));
+ immovable_control_port_enabled = !!(ipc_control_port_options & (IPC_CONTROL_PORT_OPTIONS_IMMOVABLE_SOFT | IPC_CONTROL_PORT_OPTIONS_IMMOVABLE_HARD));
+
+ if (pinned_control_port_enabled && !immovable_control_port_enabled) {
+ kprintf("Invalid ipc_control_port_options boot-arg: pinned control port cannot be enabled without immovability enforcement. Ignoring pinning boot-arg.");
+ pinned_control_port_enabled = false;
+ ipc_control_port_options &= ~(IPC_CONTROL_PORT_OPTIONS_PINNED_SOFT | IPC_CONTROL_PORT_OPTIONS_PINNED_HARD);
+ }
+
kr = kmem_suballoc(kernel_map, &min, IPC_KERNEL_MAP_SIZE,
TRUE,
(VM_FLAGS_ANYWHERE),
#include <kern/thread.h>
#include <kern/sched_prim.h>
#include <kern/misc_protos.h>
-#include <kern/counters.h>
#include <kern/cpu_data.h>
#include <kern/policy_internal.h>
#include <kern/mach_filter.h>
* rights in the message body to succeed
*/
if (IO_VALID(object) && io_is_kobject(object)) {
- kmsg->ikm_flags |= IPC_KMSG_FLAGS_ALLOW_IMMOVABLE_SEND;
+ kmsg->ikm_flags |= IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND;
}
}
*/
if (reply_entry != IE_NULL) {
kr = ipc_right_copyin(space, reply_name, reply_entry,
- reply_type, IPC_RIGHT_COPYIN_FLAGS_DEADOK,
+ reply_type, IPC_OBJECT_COPYIN_FLAGS_DEADOK,
&reply_port, &reply_soright,
&release_port, &assertcnt, 0, NULL);
assert(assertcnt == 0);
* copyin the destination.
*/
kr = ipc_right_copyin(space, dest_name, dest_entry,
- dest_type, (IPC_RIGHT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND |
- IPC_RIGHT_COPYIN_FLAGS_ALLOW_DEAD_SEND_ONCE),
+ dest_type, (IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND |
+ IPC_OBJECT_COPYIN_FLAGS_ALLOW_DEAD_SEND_ONCE),
&dest_port, &dest_soright,
&release_port, &assertcnt, 0, NULL);
assert(assertcnt == 0);
*/
if (MACH_PORT_VALID(reply_name)) {
kr = ipc_right_copyin(space, reply_name, reply_entry,
- reply_type, IPC_RIGHT_COPYIN_FLAGS_DEADOK,
+ reply_type, IPC_OBJECT_COPYIN_FLAGS_DEADOK,
&reply_port, &reply_soright,
&release_port, &assertcnt, 0, NULL);
assert(assertcnt == 0);
*/
if (IE_NULL != voucher_entry) {
kr = ipc_right_copyin(space, voucher_name, voucher_entry,
- voucher_type, IPC_RIGHT_COPYIN_FLAGS_NONE,
+ voucher_type, IPC_OBJECT_COPYIN_FLAGS_NONE,
(ipc_object_t *)&voucher_port,
&voucher_soright,
&voucher_release_port,
uint32_t entries_held = 0;
boolean_t need_write_lock = FALSE;
+ ipc_object_copyout_flags_t reply_copyout_options = IPC_OBJECT_COPYOUT_FLAGS_NONE;
kern_return_t kr;
/*
}
if (need_write_lock) {
+handle_reply_again:
is_write_lock(space);
while (entries_held) {
/* Handle reply port. */
if (IP_VALID(reply)) {
+ ipc_port_t reply_subst = IP_NULL;
ipc_entry_t entry;
+ ip_lock(reply);
+
+ /* Is the reply port still active and allowed to be copied out? */
+ if (!ip_active(reply) ||
+ !ip_label_check(space, reply, reply_type,
+ &reply_copyout_options, &reply_subst)) {
+ /* clear the context value */
+ reply->ip_reply_context = 0;
+ ip_unlock(reply);
+
+ assert(reply_subst == IP_NULL);
+ release_reply_port = reply;
+ reply = IP_DEAD;
+ reply_name = MACH_PORT_DEAD;
+ goto done_with_reply;
+ }
+
+ /* is the kolabel requesting a substitution */
+ if (reply_subst != IP_NULL) {
+ /*
+ * port is unlocked, its right consumed
+ * space is unlocked
+ */
+ assert(reply_type == MACH_MSG_TYPE_PORT_SEND);
+ msg->msgh_local_port = reply = reply_subst;
+ goto handle_reply_again;
+ }
+
+
/* Is there already an entry we can use? */
if ((reply_type != MACH_MSG_TYPE_PORT_SEND_ONCE) &&
ipc_right_reverse(space, ip_to_object(reply), &reply_name, &entry)) {
- /* reply port is locked and active */
assert(entry->ie_bits & MACH_PORT_TYPE_SEND_RECEIVE);
} else {
- ip_lock(reply);
- /* Is the reply port still active and allowed to be copied out? */
- if (!ip_active(reply) || !ip_label_check(space, reply, reply_type)) {
- /* clear the context value */
- reply->ip_reply_context = 0;
- ip_unlock(reply);
-
- release_reply_port = reply;
- reply = IP_DEAD;
- reply_name = MACH_PORT_DEAD;
- goto done_with_reply;
- }
-
/* claim a held entry for the reply port */
assert(entries_held > 0);
entries_held--;
ipc_entry_claim(space, &reply_name, &entry);
- assert(IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_NONE);
+ assert(!ipc_right_inuse(entry));
assert(entry->ie_object == IO_NULL);
entry->ie_object = ip_to_object(reply);
}
}
kr = ipc_right_copyout(space, reply_name, entry,
- reply_type, NULL, NULL, ip_to_object(reply));
+ reply_type, IPC_OBJECT_COPYOUT_FLAGS_NONE, NULL, NULL,
+ ip_to_object(reply));
assert(kr == KERN_SUCCESS);
/* reply port is unlocked */
} else {
if ((option & MACH_RCV_VOUCHER) != 0) {
ipc_entry_t entry;
+ ip_lock(voucher);
+
if (ipc_right_reverse(space, ip_to_object(voucher),
&voucher_name, &entry)) {
- /* voucher port locked */
assert(entry->ie_bits & MACH_PORT_TYPE_SEND);
} else {
assert(entries_held > 0);
entries_held--;
ipc_entry_claim(space, &voucher_name, &entry);
- assert(IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_NONE);
+ assert(!ipc_right_inuse(entry));
assert(entry->ie_object == IO_NULL);
entry->ie_object = ip_to_object(voucher);
- ip_lock(voucher);
}
/* space is locked and active */
- require_ip_active(voucher);
+
assert(ip_kotype(voucher) == IKOT_VOUCHER);
kr = ipc_right_copyout(space, voucher_name, entry,
- MACH_MSG_TYPE_MOVE_SEND, NULL, NULL,
- ip_to_object(voucher));
+ MACH_MSG_TYPE_MOVE_SEND, IPC_OBJECT_COPYOUT_FLAGS_NONE,
+ NULL, NULL, ip_to_object(voucher));
/* voucher port is unlocked */
} else {
voucher_type = MACH_MSGH_BITS_ZERO;
* MACH_MSG_IPC_KERNEL Kernel resource shortage.
* (Name is MACH_PORT_NULL.)
*/
-
-mach_msg_return_t
+static mach_msg_return_t
ipc_kmsg_copyout_object(
ipc_space_t space,
ipc_object_t object,
return MACH_MSG_SUCCESS;
}
- kr = ipc_object_copyout(space, object, msgt_name, context, guard_flags, namep);
+ kr = ipc_object_copyout(space, object, msgt_name, IPC_OBJECT_COPYOUT_FLAGS_NONE,
+ context, guard_flags, namep);
if (kr != KERN_SUCCESS) {
- ipc_object_destroy(object, msgt_name);
-
if (kr == KERN_INVALID_CAPABILITY) {
*namep = MACH_PORT_DEAD;
} else {
}
static mach_msg_descriptor_t *
-ipc_kmsg_copyout_port_descriptor(mach_msg_descriptor_t *dsc,
- mach_msg_descriptor_t *dest_dsc,
- ipc_space_t space,
- kern_return_t *mr)
+ipc_kmsg_copyout_port_descriptor(
+ mach_msg_descriptor_t *dsc,
+ mach_msg_descriptor_t *dest_dsc,
+ ipc_space_t space,
+ kern_return_t *mr)
{
- mach_port_t port;
- mach_port_name_t name;
- mach_msg_type_name_t disp;
+ mach_port_t port;
+ mach_port_name_t name;
+ mach_msg_type_name_t disp;
/* Copyout port right carried in the message */
port = dsc->port.name;
return (mach_msg_descriptor_t *)dest_dsc;
}
-mach_msg_descriptor_t *
-ipc_kmsg_copyout_ool_descriptor(mach_msg_ool_descriptor_t *dsc, mach_msg_descriptor_t *user_dsc, int is_64bit, vm_map_t map, mach_msg_return_t *mr);
-mach_msg_descriptor_t *
-ipc_kmsg_copyout_ool_descriptor(mach_msg_ool_descriptor_t *dsc, mach_msg_descriptor_t *user_dsc, int is_64bit, vm_map_t map, mach_msg_return_t *mr)
+static mach_msg_descriptor_t *
+ipc_kmsg_copyout_ool_descriptor(
+ mach_msg_ool_descriptor_t *dsc,
+ mach_msg_descriptor_t *user_dsc,
+ int is_64bit,
+ vm_map_t map,
+ mach_msg_return_t *mr)
{
- vm_map_copy_t copy;
- vm_map_address_t rcv_addr;
- mach_msg_copy_options_t copy_options;
- vm_map_size_t size;
+ vm_map_copy_t copy;
+ vm_map_address_t rcv_addr;
+ mach_msg_copy_options_t copy_options;
+ vm_map_size_t size;
mach_msg_descriptor_type_t dsc_type;
- boolean_t misaligned = FALSE;
+ boolean_t misaligned = FALSE;
//SKIP_PORT_DESCRIPTORS(saddr, sdsc_count);
for (i = dsc_count - 1; i >= 0; i--) {
switch (kern_dsc[i].type.type) {
case MACH_MSG_PORT_DESCRIPTOR:
- user_dsc = ipc_kmsg_copyout_port_descriptor(&kern_dsc[i], user_dsc, space, &mr);
+ user_dsc = ipc_kmsg_copyout_port_descriptor(&kern_dsc[i],
+ user_dsc, space, &mr);
break;
case MACH_MSG_OOL_VOLATILE_DESCRIPTOR:
case MACH_MSG_OOL_DESCRIPTOR:
user_dsc = ipc_kmsg_copyout_ool_descriptor(
- (mach_msg_ool_descriptor_t *)&kern_dsc[i], user_dsc, is_task_64bit, map, &mr);
+ (mach_msg_ool_descriptor_t *)&kern_dsc[i],
+ user_dsc, is_task_64bit, map, &mr);
break;
case MACH_MSG_OOL_PORTS_DESCRIPTOR:
user_dsc = ipc_kmsg_copyout_ool_ports_descriptor(
- (mach_msg_ool_ports_descriptor_t *)&kern_dsc[i], user_dsc, is_task_64bit, map, space, kmsg, &mr);
+ (mach_msg_ool_ports_descriptor_t *)&kern_dsc[i],
+ user_dsc, is_task_64bit, map, space, kmsg, &mr);
break;
case MACH_MSG_GUARDED_PORT_DESCRIPTOR:
user_dsc = ipc_kmsg_copyout_guarded_port_descriptor(
- (mach_msg_guarded_port_descriptor_t *)&kern_dsc[i], user_dsc, is_task_64bit, kmsg, space, option, &mr);
+ (mach_msg_guarded_port_descriptor_t *)&kern_dsc[i],
+ user_dsc, is_task_64bit, kmsg, space, option, &mr);
break;
default: {
panic("untyped IPC copyout body: invalid message descriptor");
#include <ipc/ipc_object.h>
#include <sys/kdebug.h>
-typedef uint16_t ipc_kmsg_flags_t;
-
-#define IPC_KMSG_FLAGS_ALLOW_IMMOVABLE_SEND 0x1 /* Dest port contains an immovable send right */
-
#if (DEVELOPMENT || DEBUG)
/* Turn on to keep partial message signatures for better debug */
#define IKM_PARTIAL_SIG 0
*/
struct ipc_kmsg {
- mach_msg_size_t ikm_size;
- uint32_t ikm_ppriority; /* pthread priority of this kmsg */
struct ipc_kmsg *ikm_next; /* next message on port/discard queue */
struct ipc_kmsg *ikm_prev; /* prev message on port/discard queue */
union {
#if MACH_FLIPC
struct mach_node *ikm_node; /* Originating node - needed for ack */
#endif
+ mach_msg_size_t ikm_size;
+ uint32_t ikm_ppriority; /* pthread priority of this kmsg */
#if IKM_PARTIAL_SIG
uintptr_t ikm_header_sig; /* sig for just the header */
uintptr_t ikm_headtrail_sig;/* sif for header and trailer */
#endif
uintptr_t ikm_signature; /* sig for all kernel-processed data */
- ipc_kmsg_flags_t ikm_flags;
+ ipc_object_copyin_flags_t ikm_flags;
mach_msg_qos_t ikm_qos_override; /* qos override on this kmsg */
mach_msg_filter_id ikm_filter_policy_id; /* Sandbox-specific policy id used for message filtering */
};
ipc_space_t space,
mach_msg_option_t option);
-/* Copyout a port right returning a name */
-extern mach_msg_return_t ipc_kmsg_copyout_object(
- ipc_space_t space,
- ipc_object_t object,
- mach_msg_type_name_t msgt_name,
- mach_port_context_t *context,
- mach_msg_guard_flags_t *guard_flags,
- mach_port_name_t *namep);
-
/* Copyout the header and body to a user message */
extern mach_msg_return_t ipc_kmsg_copyout(
ipc_kmsg_t kmsg,
#include <mach/sync_policy.h>
#include <kern/assert.h>
-#include <kern/counters.h>
+#include <kern/counter.h>
#include <kern/sched_prim.h>
#include <kern/ipc_kobject.h>
#include <kern/ipc_mig.h> /* XXX - for mach_msg_receive_continue */
void
imq_release_and_unlock(ipc_mqueue_t mq, uint64_t reserved_prepost)
{
- assert(imq_held(mq));
waitq_unlock(&mq->imq_wait_queue);
waitq_prepost_release_reserve(reserved_prepost);
}
if (wresult == THREAD_WAITING) {
wresult = thread_block(THREAD_CONTINUE_NULL);
- counter(c_ipc_mqueue_send_block++);
}
/* Call turnstile complete with interlock held */
if (full_queue_empty) {
ipc_port_t port = ip_from_mq(mqueue);
int dst_pid = 0;
- if (ip_active(port) && !port->ip_tempowner &&
- port->ip_receiver_name && port->ip_receiver &&
- port->ip_receiver != ipc_space_kernel) {
- dst_pid = task_pid(port->ip_receiver->is_task);
- }
+ dst_pid = ipc_port_get_receiver_task(port, NULL);
}
#endif
}
{
struct turnstile *send_turnstile = port_send_turnstile(ip_from_mq(port_mq));
(void)set_mq;
- assert(imq_held(port_mq));
+ imq_held(port_mq);
assert(port_mq->imq_msgcount > 1 || ipc_kmsg_queue_empty(&port_mq->imq_messages));
port_mq->imq_msgcount--;
}
if (wresult == THREAD_WAITING) {
- counter((interruptible == THREAD_ABORTSAFE) ?
- c_ipc_mqueue_receive_block_user++ :
- c_ipc_mqueue_receive_block_kernel++);
-
if (self->ith_continuation) {
thread_block(ipc_mqueue_receive_continue);
}
ipc_mqueue_release_peek_ref(ipc_mqueue_t mq)
{
assert(!imq_is_set(mq));
- assert(imq_held(mq));
+ imq_held(mq);
/*
* clear any preposts this mq may have generated
* Changes a message queue limit; the maximum number
* of messages which may be queued.
* Conditions:
- * Nothing locked.
+ * Port is locked.
*/
void
#define imq_is_valid(mq) waitq_is_valid(&(mq)->imq_wait_queue)
#define imq_unlock(mq) waitq_unlock(&(mq)->imq_wait_queue)
-#define imq_held(mq) waitq_held(&(mq)->imq_wait_queue)
+#define imq_held(mq) assert(waitq_held(&(mq)->imq_wait_queue))
#define imq_valid(mq) waitq_valid(&(mq)->imq_wait_queue)
extern void imq_lock(ipc_mqueue_t mq);
}
/* space is write-locked */
- if (ipc_right_inuse(space, name, entry)) {
+ if (ipc_right_inuse(entry)) {
+ is_write_unlock(space);
return KERN_NAME_EXISTS;
}
assert(type != MACH_PORT_TYPE_NONE);
assert(urefs <= MACH_PORT_UREFS_MAX);
- object = io_alloc(otype);
+ object = io_alloc(otype, Z_WAITOK | Z_ZERO);
if (object == IO_NULL) {
return KERN_RESOURCE_SHORTAGE;
}
- if (otype == IOT_PORT) {
- ipc_port_t port = ip_object_to_port(object);
-
- bzero((char *)port, sizeof(*port));
- } else if (otype == IOT_PORT_SET) {
- ipc_pset_t pset = ips_object_to_pset(object);
-
- bzero((char *)pset, sizeof(*pset));
- }
-
io_lock_init(object);
*namep = CAST_MACH_PORT_TO_NAME(object);
kr = ipc_entry_alloc(space, namep, &entry);
assert(type != MACH_PORT_TYPE_NONE);
assert(urefs <= MACH_PORT_UREFS_MAX);
- object = io_alloc(otype);
+ object = io_alloc(otype, Z_WAITOK | Z_ZERO);
if (object == IO_NULL) {
return KERN_RESOURCE_SHORTAGE;
}
- if (otype == IOT_PORT) {
- ipc_port_t port = ip_object_to_port(object);
-
- bzero((char *)port, sizeof(*port));
- } else if (otype == IOT_PORT_SET) {
- ipc_pset_t pset = ips_object_to_pset(object);
-
- bzero((char *)pset, sizeof(*pset));
- }
-
io_lock_init(object);
kr = ipc_entry_alloc_name(space, name, &entry);
if (kr != KERN_SUCCESS) {
}
/* space is write-locked */
- if (ipc_right_inuse(space, name, entry)) {
+ if (ipc_right_inuse(entry)) {
+ is_write_unlock(space);
io_free(otype, object);
return KERN_NAME_EXISTS;
}
kern_return_t
ipc_object_copyin(
- ipc_space_t space,
- mach_port_name_t name,
- mach_msg_type_name_t msgt_name,
- ipc_object_t *objectp,
- mach_port_context_t context,
- mach_msg_guard_flags_t *guard_flags,
- ipc_kmsg_flags_t kmsg_flags)
+ ipc_space_t space,
+ mach_port_name_t name,
+ mach_msg_type_name_t msgt_name,
+ ipc_object_t *objectp,
+ mach_port_context_t context,
+ mach_msg_guard_flags_t *guard_flags,
+ ipc_object_copyin_flags_t copyin_flags)
{
ipc_entry_t entry;
ipc_port_t soright;
kern_return_t kr;
int assertcnt = 0;
- ipc_right_copyin_flags_t irc_flags = IPC_RIGHT_COPYIN_FLAGS_DEADOK;
- if (kmsg_flags & IPC_KMSG_FLAGS_ALLOW_IMMOVABLE_SEND) {
- irc_flags |= IPC_RIGHT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND;
- }
-
+ ipc_object_copyin_flags_t irc_flags = IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND |
+ IPC_OBJECT_COPYIN_FLAGS_SOFT_FAIL_IMMOVABLE_SEND;
+ irc_flags = (copyin_flags & irc_flags) | IPC_OBJECT_COPYIN_FLAGS_DEADOK;
/*
* Could first try a read lock when doing
* MACH_MSG_TYPE_COPY_SEND, MACH_MSG_TYPE_MAKE_SEND,
ip_lock(port);
if (ip_active(port)) {
assert(port->ip_srights > 0);
- port->ip_srights++;
}
+ port->ip_srights++;
ip_reference(port);
ip_unlock(port);
break;
* Routine: ipc_object_copyout
* Purpose:
* Copyout a capability, placing it into a space.
- * If successful, consumes a ref for the object.
+ * Always consumes a ref for the object.
* Conditions:
* Nothing locked.
* Returns:
ipc_space_t space,
ipc_object_t object,
mach_msg_type_name_t msgt_name,
+ ipc_object_copyout_flags_t flags,
mach_port_context_t *context,
mach_msg_guard_flags_t *guard_flags,
mach_port_name_t *namep)
{
struct knote *kn = current_thread()->ith_knote;
mach_port_name_t name;
+ ipc_port_t port = ip_object_to_port(object);
ipc_entry_t entry;
kern_return_t kr;
assert(io_otype(object) == IOT_PORT);
if (ITH_KNOTE_VALID(kn, msgt_name)) {
- filt_machport_turnstile_prepare_lazily(kn,
- msgt_name, ip_object_to_port(object));
+ filt_machport_turnstile_prepare_lazily(kn, msgt_name, port);
}
is_write_lock(space);
for (;;) {
+ ipc_port_t port_subst = IP_NULL;
+
if (!is_active(space)) {
is_write_unlock(space);
- return KERN_INVALID_TASK;
- }
-
- if ((msgt_name != MACH_MSG_TYPE_PORT_SEND_ONCE) &&
- ipc_right_reverse(space, object, &name, &entry)) {
- /* object is locked and active */
-
- assert(entry->ie_bits & MACH_PORT_TYPE_SEND_RECEIVE);
- break;
+ kr = KERN_INVALID_TASK;
+ goto out;
}
-
- name = CAST_MACH_PORT_TO_NAME(object);
- kr = ipc_entry_get(space, &name, &entry);
+ kr = ipc_entries_hold(space, 1);
if (kr != KERN_SUCCESS) {
/* unlocks/locks space, so must start again */
kr = ipc_entry_grow_table(space, ITS_SIZE_NONE);
if (kr != KERN_SUCCESS) {
- return kr; /* space is unlocked */
+ /* space is unlocked */
+ goto out;
}
continue;
}
- assert(IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_NONE);
- assert(entry->ie_object == IO_NULL);
-
io_lock(object);
if (!io_active(object)) {
io_unlock(object);
- ipc_entry_dealloc(space, name, entry);
is_write_unlock(space);
- return KERN_INVALID_CAPABILITY;
+ kr = KERN_INVALID_CAPABILITY;
+ goto out;
}
/* Don't actually copyout rights we aren't allowed to */
- if (!ip_label_check(space, ip_object_to_port(object), msgt_name)) {
+ if (!ip_label_check(space, port, msgt_name, &flags, &port_subst)) {
io_unlock(object);
- ipc_entry_dealloc(space, name, entry);
is_write_unlock(space);
- return KERN_INVALID_CAPABILITY;
+ assert(port_subst == IP_NULL);
+ kr = KERN_INVALID_CAPABILITY;
+ goto out;
+ }
+
+ /* is the kolabel requesting a substitution */
+ if (port_subst != IP_NULL) {
+ /*
+ * port is unlocked, its right consumed
+ * space is unlocked
+ */
+ assert(msgt_name == MACH_MSG_TYPE_PORT_SEND);
+ port = port_subst;
+ if (!IP_VALID(port)) {
+ object = IO_DEAD;
+ kr = KERN_INVALID_CAPABILITY;
+ goto out;
+ }
+
+ object = ip_to_object(port);
+ is_write_lock(space);
+ continue;
}
- entry->ie_object = object;
break;
}
/* space is write-locked and active, object is locked and active */
+ if ((msgt_name != MACH_MSG_TYPE_PORT_SEND_ONCE) &&
+ ipc_right_reverse(space, object, &name, &entry)) {
+ assert(entry->ie_bits & MACH_PORT_TYPE_SEND_RECEIVE);
+ } else {
+ ipc_entry_claim(space, &name, &entry);
+
+ assert(!ipc_right_inuse(entry));
+ assert(entry->ie_object == IO_NULL);
+
+ entry->ie_object = object;
+ }
+
kr = ipc_right_copyout(space, name, entry,
- msgt_name, context, guard_flags, object);
+ msgt_name, flags, context, guard_flags, object);
/* object is unlocked */
is_write_unlock(space);
+out:
if (kr == KERN_SUCCESS) {
*namep = name;
+ } else if (IO_VALID(object)) {
+ ipc_object_destroy(object, msgt_name);
}
+
return kr;
}
mach_msg_type_name_t msgt_name,
mach_port_name_t name)
{
+ ipc_port_t port = ip_object_to_port(object);
mach_port_name_t oname;
ipc_entry_t oentry;
ipc_entry_t entry;
}
/* space is write-locked and active */
+ io_lock(object);
+
+ /*
+ * Don't actually copyout rights we aren't allowed to
+ *
+ * In particular, kolabel-ed objects do not allow callers
+ * to pick the name they end up with.
+ */
+ if (!io_active(object) || ip_is_kolabeled(port)) {
+ io_unlock(object);
+ if (!ipc_right_inuse(entry)) {
+ ipc_entry_dealloc(space, name, entry);
+ }
+ is_write_unlock(space);
+ return KERN_INVALID_CAPABILITY;
+ }
+
+ /* space is write-locked and active, object is locked and active */
+
if ((msgt_name != MACH_MSG_TYPE_PORT_SEND_ONCE) &&
ipc_right_reverse(space, object, &oname, &oentry)) {
- /* object is locked and active */
-
if (name != oname) {
io_unlock(object);
-
- if (IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_NONE) {
+ if (!ipc_right_inuse(entry)) {
ipc_entry_dealloc(space, name, entry);
}
-
is_write_unlock(space);
return KERN_RIGHT_EXISTS;
}
assert(entry == oentry);
assert(entry->ie_bits & MACH_PORT_TYPE_SEND_RECEIVE);
+ } else if (ipc_right_inuse(entry)) {
+ io_unlock(object);
+ is_write_unlock(space);
+ return KERN_NAME_EXISTS;
} else {
- if (ipc_right_inuse(space, name, entry)) {
- return KERN_NAME_EXISTS;
- }
-
- assert(IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_NONE);
assert(entry->ie_object == IO_NULL);
- io_lock(object);
- if (!io_active(object)) {
- io_unlock(object);
- ipc_entry_dealloc(space, name, entry);
- is_write_unlock(space);
- return KERN_INVALID_CAPABILITY;
- }
-
- /* Don't actually copyout rights we aren't allowed to */
- if (!ip_label_check(space, ip_object_to_port(object), msgt_name)) {
- io_unlock(object);
- ipc_entry_dealloc(space, name, entry);
- is_write_unlock(space);
- return KERN_INVALID_CAPABILITY;
- }
-
entry->ie_object = object;
}
- /* space is write-locked and active, object is locked and active */
-
#if IMPORTANCE_INHERITANCE
/*
* We are slamming a receive right into the space, without
* port has assertions (and the task wants them).
*/
if (msgt_name == MACH_MSG_TYPE_PORT_RECEIVE) {
- ipc_port_t port = ip_object_to_port(object);
-
if (space->is_task != TASK_NULL) {
task_imp = space->is_task->task_imp_base;
if (ipc_importance_task_is_any_receiver_type(task_imp)) {
#endif /* IMPORTANCE_INHERITANCE */
kr = ipc_right_copyout(space, name, entry,
- msgt_name, NULL, NULL, object);
+ msgt_name, IPC_OBJECT_COPYOUT_FLAGS_NONE, NULL, NULL, object);
/* object is unlocked */
is_write_unlock(space);
typedef natural_t ipc_object_bits_t;
typedef natural_t ipc_object_type_t;
+__options_closed_decl(ipc_object_copyout_flags_t, uint32_t, {
+ IPC_OBJECT_COPYOUT_FLAGS_NONE = 0x0,
+ IPC_OBJECT_COPYOUT_FLAGS_PINNED = 0x1,
+ IPC_OBJECT_COPYOUT_FLAGS_NO_LABEL_CHECK = 0x2,
+});
+
+__options_closed_decl(ipc_object_copyin_flags_t, uint32_t, {
+ IPC_OBJECT_COPYIN_FLAGS_NONE = 0x0,
+ IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND = 0x1, /* Dest port contains an immovable send right */
+ IPC_OBJECT_COPYIN_FLAGS_SOFT_FAIL_IMMOVABLE_SEND = 0x2, /* Silently fail copyin without guard exception */
+ IPC_OBJECT_COPYIN_FLAGS_ALLOW_DEAD_SEND_ONCE = 0x4,
+ IPC_OBJECT_COPYIN_FLAGS_DEADOK = 0x8,
+});
+
/*
* The ipc_object is used to both tag and reference count these two data
* structures, and (Noto Bene!) pointers to either of these or the
extern zone_t ipc_object_zones[IOT_NUMBER];
extern lck_grp_t ipc_lck_grp;
-#define io_alloc(otype) \
- ((ipc_object_t) zalloc(ipc_object_zones[(otype)]))
+static inline ipc_object_t
+io_alloc(unsigned int otype, zalloc_flags_t flags)
+{
+ return zalloc_flags(ipc_object_zones[otype], flags);
+}
extern void io_free(
unsigned int otype,
ipc_object_t *objectp,
mach_port_context_t context,
mach_msg_guard_flags_t *guard_flags,
- uint16_t kmsg_flags);
+ ipc_object_copyin_flags_t copyin_flags);
/* Copyin a naked capability from the kernel */
extern void ipc_object_copyin_from_kernel(
ipc_space_t space,
ipc_object_t object,
mach_msg_type_name_t msgt_name,
+ ipc_object_copyout_flags_t flags,
mach_port_context_t *context,
mach_msg_guard_flags_t *guard_flags,
mach_port_name_t *namep);
#include <ipc/ipc_entry.h>
#include <ipc/ipc_space.h>
#include <ipc/ipc_object.h>
+#include <ipc/ipc_right.h>
#include <ipc/ipc_port.h>
#include <ipc/ipc_pset.h>
#include <ipc/ipc_kmsg.h>
* Purpose:
* Initializes a newly-allocated port.
* Doesn't touch the ip_object fields.
+ *
+ * The memory is expected to be zero initialized (allocated with Z_ZERO).
*/
void
port->ip_receiver = space;
port->ip_receiver_name = name;
- port->ip_mscount = 0;
- port->ip_srights = 0;
- port->ip_sorights = 0;
if (flags & IPC_PORT_INIT_MAKE_SEND_RIGHT) {
port->ip_srights = 1;
port->ip_mscount = 1;
}
- port->ip_nsrequest = IP_NULL;
- port->ip_pdrequest = IP_NULL;
- port->ip_requests = IPR_NULL;
-
- port->ip_premsg = IKM_NULL;
- port->ip_context = 0;
- port->ip_reply_context = 0;
-
- port->ip_sprequests = 0;
- port->ip_spimportant = 0;
- port->ip_impdonation = 0;
- port->ip_tempowner = 0;
-
- port->ip_guarded = 0;
- port->ip_strict_guard = 0;
- port->ip_immovable_receive = 0;
- port->ip_no_grant = 0;
- port->ip_immovable_send = 0;
- port->ip_impcount = 0;
-
if (flags & IPC_PORT_INIT_FILTER_MESSAGE) {
port->ip_object.io_bits |= IP_BIT_FILTER_MSG;
}
port->ip_tg_block_tracking = (flags & IPC_PORT_INIT_TG_BLOCK_TRACKING) != 0;
- port->ip_specialreply = (flags & IPC_PORT_INIT_SPECIAL_REPLY) != 0;
- port->ip_sync_link_state = PORT_SYNC_LINK_ANY;
- port->ip_sync_bootstrap_checkin = 0;
- ipc_special_reply_port_bits_reset(port);
+ if (flags & IPC_PORT_INIT_SPECIAL_REPLY) {
+ port->ip_specialreply = true;
+ port->ip_immovable_receive = true;
+ }
- port->ip_send_turnstile = TURNSTILE_NULL;
+ port->ip_sync_link_state = PORT_SYNC_LINK_ANY;
ipc_mqueue_kind_t kind = IPC_MQUEUE_KIND_NONE;
if (flags & IPC_PORT_INIT_MESSAGE_QUEUE) {
struct knote *kn;
turnstile_update_flags_t inheritor_flags = TURNSTILE_INHERITOR_TURNSTILE;
- assert(imq_held(mqueue));
+ imq_held(mqueue);
if (!ip_active(port)) {
/* this port is no longer active, it should not push anywhere */
return ipc_port_watchport_elem(port)->twe_task->watchports->tw_thread;
}
+/*
+ * Routine: ipc_port_get_receiver_task
+ * Purpose:
+ * Returns receiver task pointer and its pid (if any) for port.
+ *
+ * Conditions:
+ * Nothing locked.
+ */
+pid_t
+ipc_port_get_receiver_task(ipc_port_t port, uintptr_t *task)
+{
+ task_t receiver = TASK_NULL;
+ pid_t pid = -1;
+
+ if (!port) {
+ goto out;
+ }
+
+ ip_lock(port);
+ if (ip_active(port) &&
+ MACH_PORT_VALID(port->ip_receiver_name) &&
+ port->ip_receiver &&
+ port->ip_receiver != ipc_space_kernel &&
+ port->ip_receiver != ipc_space_reply) {
+ receiver = port->ip_receiver->is_task;
+ pid = task_pid(receiver);
+ }
+ ip_unlock(port);
+
+out:
+ if (task) {
+ *task = (uintptr_t)receiver;
+ }
+ return pid;
+}
+
/*
* Routine: ipc_port_impcount_delta
* Purpose:
* Nothing locked.
*/
-mach_port_name_t
-ipc_port_copyout_send(
+static mach_port_name_t
+ipc_port_copyout_send_internal(
ipc_port_t sright,
- ipc_space_t space)
+ ipc_space_t space,
+ ipc_object_copyout_flags_t flags)
{
mach_port_name_t name;
kern_return_t kr;
kr = ipc_object_copyout(space, ip_to_object(sright),
- MACH_MSG_TYPE_PORT_SEND, NULL, NULL, &name);
+ MACH_MSG_TYPE_PORT_SEND, flags, NULL, NULL, &name);
if (kr != KERN_SUCCESS) {
- ipc_port_release_send(sright);
-
if (kr == KERN_INVALID_CAPABILITY) {
name = MACH_PORT_DEAD;
} else {
return name;
}
+mach_port_name_t
+ipc_port_copyout_send(
+ ipc_port_t sright,
+ ipc_space_t space)
+{
+ return ipc_port_copyout_send_internal(sright, space, IPC_OBJECT_COPYOUT_FLAGS_NONE);
+}
+
+mach_port_name_t
+ipc_port_copyout_send_pinned(
+ ipc_port_t sright,
+ ipc_space_t space)
+{
+ return ipc_port_copyout_send_internal(sright, space, IPC_OBJECT_COPYOUT_FLAGS_PINNED);
+}
+
/*
- * Routine: ipc_port_release_send
+ * Routine: ipc_port_release_send_and_unlock
* Purpose:
* Release a naked send right.
* Consumes a ref for the port.
* Conditions:
- * Nothing locked.
+ * Port is valid and locked on entry
+ * Port is unlocked on exit.
*/
-
void
-ipc_port_release_send(
+ipc_port_release_send_and_unlock(
ipc_port_t port)
{
ipc_port_t nsrequest = IP_NULL;
mach_port_mscount_t mscount;
- if (!IP_VALID(port)) {
- return;
- }
-
- ip_lock(port);
-
assert(port->ip_srights > 0);
if (port->ip_srights == 0) {
panic("Over-release of port %p send right!", port);
}
}
+/*
+ * Routine: ipc_port_release_send
+ * Purpose:
+ * Release a naked send right.
+ * Consumes a ref for the port.
+ * Conditions:
+ * Nothing locked.
+ */
+
+void
+ipc_port_release_send(
+ ipc_port_t port)
+{
+ if (IP_VALID(port)) {
+ ip_lock(port);
+ ipc_port_release_send_and_unlock(port);
+ }
+}
+
/*
* Routine: ipc_port_make_sonce_locked
* Purpose:
{
ipc_port_t port;
- port = ip_object_to_port(io_alloc(IOT_PORT));
+ port = ip_object_to_port(io_alloc(IOT_PORT, Z_WAITOK | Z_ZERO));
if (port == IP_NULL) {
return IP_NULL;
}
-#if MACH_ASSERT
+#if MACH_ASSERT
uintptr_t buf[IP_CALLSTACK_MAX];
ipc_port_callstack_init_debug(&buf[0], IP_CALLSTACK_MAX);
#endif /* MACH_ASSERT */
- bzero((char *)port, sizeof(*port));
io_lock_init(ip_to_object(port));
port->ip_references = 1;
port->ip_object.io_bits = io_makebits(TRUE, IOT_PORT, 0);
struct ipc_port *ip_pdrequest;
struct ipc_port_request *ip_requests;
union {
- struct ipc_kmsg *premsg;
+ struct ipc_kmsg *XNU_PTRAUTH_SIGNED_PTR("ipc_port.premsg") premsg;
struct turnstile *send_turnstile;
+ ipc_port_t XNU_PTRAUTH_SIGNED_PTR("ipc_port.alt_port") alt_port;
} kdata2;
mach_vm_address_t ip_context;
ip_no_grant:1, /* Port wont accept complex messages containing (ool) port descriptors */
ip_immovable_send:1, /* No send(once) rights to this port can be moved out of a space */
ip_tg_block_tracking:1, /* Track blocking relationship between thread groups during sync IPC */
- ip_impcount:17; /* number of importance donations in nested queue */
+ ip_pinned: 1, /* Can't deallocate the last send right from a space while the bit is set */
+ ip_impcount:16; /* number of importance donations in nested queue */
mach_port_mscount_t ip_mscount;
mach_port_rights_t ip_srights;
#define ip_premsg kdata2.premsg
#define ip_send_turnstile kdata2.send_turnstile
+#define ip_alt_port kdata2.alt_port
#define port_send_turnstile(port) (IP_PREALLOC(port) ? (port)->ip_premsg->ikm_turnstile : (port)->ip_send_turnstile)
#define ip_kotype(port) io_kotype(ip_to_object(port))
#define ip_is_kobject(port) io_is_kobject(ip_to_object(port))
+#define ip_is_control(port) \
+ (ip_is_kobject(port) && (ip_kotype(port) == IKOT_TASK_CONTROL || ip_kotype(port) == IKOT_THREAD_CONTROL))
#define ip_is_kolabeled(port) io_is_kolabeled(ip_to_object(port))
#define ip_get_kobject(port) ipc_kobject_get(port)
-#define ip_label_check(space, port, msgt_name) \
- (!ip_is_kolabeled(port) || ipc_kobject_label_check((space), (port), (msgt_name)))
#define ip_full_kernel(port) imq_full_kernel(&(port)->ip_messages)
#define ip_full(port) imq_full(&(port)->ip_messages)
ipc_port_t sright,
ipc_space_t space);
+extern mach_port_name_t ipc_port_copyout_send_pinned(
+ ipc_port_t sright,
+ ipc_space_t space);
+
extern void ipc_port_thread_group_blocked(
ipc_port_t port);
extern void ipc_port_thread_group_unblocked(void);
+extern void ipc_port_release_send_and_unlock(
+ ipc_port_t port);
#endif /* MACH_KERNEL_PRIVATE */
#if KERNEL_PRIVATE
extern void ipc_port_release_receive(
ipc_port_t port);
-/* finalize the destruction of a port before it gets freed */
+/* Finalize the destruction of a port before it gets freed */
extern void ipc_port_finalize(
ipc_port_t port);
+/* Get receiver task and its pid (if any) for port. */
+extern pid_t ipc_port_get_receiver_task(ipc_port_t port, uintptr_t *task);
+
/* Allocate a port in a special space */
extern ipc_port_t ipc_port_alloc_special(
ipc_space_t space,
assert(space->is_table == IE_NULL);
assert(!is_active(space));
- pset = ips_object_to_pset(io_alloc(IOT_PORT_SET));
+ pset = ips_object_to_pset(io_alloc(IOT_PORT_SET, Z_WAITOK | Z_ZERO));
if (pset == IPS_NULL) {
return IPS_NULL;
}
- bzero((char *)pset, sizeof(*pset));
-
io_lock_init(ips_to_object(pset));
pset->ips_references = 1;
pset->ips_object.io_bits = io_makebits(TRUE, IOT_PORT_SET, 0);
int result = 0;
/* mqueue locked by caller */
- assert(imq_held(mqueue));
+ imq_held(mqueue);
assert(hint != NOTE_REVOKE);
if (imq_is_valid(mqueue)) {
assert(!imq_is_set(mqueue));
* Translate (space, object) -> (name, entry).
* Only finds send/receive rights.
* Returns TRUE if an entry is found; if so,
- * the object is locked and active.
+ * the object active.
* Conditions:
* The space must be locked (read or write) and active.
- * Nothing else locked.
+ * The port is locked and active
*/
-boolean_t
+bool
ipc_right_reverse(
ipc_space_t space,
ipc_object_t object,
assert(io_otype(object) == IOT_PORT);
port = ip_object_to_port(object);
+ require_ip_active(port);
- ip_lock(port);
- if (!ip_active(port)) {
- ip_unlock(port);
-
- return FALSE;
- }
+ ip_lock_held(port);
if (port->ip_receiver == space) {
name = port->ip_receiver_name;
*namep = name;
*entryp = entry;
- return TRUE;
+ return true;
}
if (ipc_hash_lookup(space, ip_to_object(port), namep, entryp)) {
assert(IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_SEND);
assert(port == ip_object_to_port(entry->ie_object));
- return TRUE;
+ return true;
}
- ip_unlock(port);
- return FALSE;
+ return false;
}
/*
port = ip_object_to_port(entry->ie_object);
assert(port != IP_NULL);
- if (!ipc_right_check(space, port, name, entry, IPC_RIGHT_COPYIN_FLAGS_NONE)) {
+ if (!ipc_right_check(space, port, name, entry, IPC_OBJECT_COPYIN_FLAGS_NONE)) {
/* port is locked and active */
/* if no new request, just cancel previous */
* Returns TRUE if it is.
* Conditions:
* The space is write-locked and active.
- * It is unlocked if the entry is inuse.
*/
-boolean_t
+bool
ipc_right_inuse(
- ipc_space_t space,
- __unused mach_port_name_t name,
- ipc_entry_t entry)
+ ipc_entry_t entry)
{
- if (IE_BITS_TYPE(entry->ie_bits) != MACH_PORT_TYPE_NONE) {
- is_write_unlock(space);
- return TRUE;
- }
- return FALSE;
+ return IE_BITS_TYPE(entry->ie_bits) != MACH_PORT_TYPE_NONE;
}
/*
* Routine: ipc_right_check
* Purpose:
* Check if the port has died. If it has,
- * and IPC_RIGHT_COPYIN_FLAGS_ALLOW_DEAD_SEND_ONCE is not
+ * and IPC_OBJECT_COPYIN_FLAGS_ALLOW_DEAD_SEND_ONCE is not
* passed and it is not a send once right then
* clean up the entry and return TRUE.
* Conditions:
ipc_port_t port,
mach_port_name_t name,
ipc_entry_t entry,
- ipc_right_copyin_flags_t flags)
+ ipc_object_copyin_flags_t flags)
{
ipc_entry_bits_t bits;
ip_lock(port);
if (ip_active(port) ||
- ((flags & IPC_RIGHT_COPYIN_FLAGS_ALLOW_DEAD_SEND_ONCE) &&
+ ((flags & IPC_OBJECT_COPYIN_FLAGS_ALLOW_DEAD_SEND_ONCE) &&
entry->ie_request == IE_REQ_NONE &&
(entry->ie_bits & MACH_PORT_TYPE_SEND_ONCE))) {
return FALSE;
* Returns:
* KERN_SUCCESS A user ref was released.
* KERN_INVALID_RIGHT Entry has wrong type.
+ * KERN_INVALID_CAPABILITY Deallocating a pinned right.
*/
kern_return_t
port = ip_object_to_port(entry->ie_object);
assert(port != IP_NULL);
- if (ipc_right_check(space, port, name, entry, IPC_RIGHT_COPYIN_FLAGS_NONE)) {
+ if (ipc_right_check(space, port, name, entry, IPC_OBJECT_COPYIN_FLAGS_NONE)) {
bits = entry->ie_bits;
assert(IE_BITS_TYPE(bits) == MACH_PORT_TYPE_DEAD_NAME);
goto dead_name; /* it will release port */
port = ip_object_to_port(entry->ie_object);
assert(port != IP_NULL);
- if (ipc_right_check(space, port, name, entry, IPC_RIGHT_COPYIN_FLAGS_NONE)) {
+ if (ipc_right_check(space, port, name, entry, IPC_OBJECT_COPYIN_FLAGS_NONE)) {
bits = entry->ie_bits;
assert(IE_BITS_TYPE(bits) == MACH_PORT_TYPE_DEAD_NAME);
goto dead_name; /* it will release port */
assert(port->ip_srights > 0);
if (IE_BITS_UREFS(bits) == 1) {
+ if (pinned_control_port_enabled && port->ip_pinned != 0) {
+ ip_unlock(port);
+ is_write_unlock(space);
+ mach_port_guard_exception(name, 0, MPG_FLAGS_MOD_REFS_PINNED_DEALLOC,
+ ipc_control_port_options & IPC_CONTROL_PORT_OPTIONS_PINNED_HARD ?
+ kGUARD_EXC_MOD_REFS : kGUARD_EXC_MOD_REFS_NON_FATAL);
+ return KERN_INVALID_CAPABILITY;
+ }
if (--port->ip_srights == 0) {
nsrequest = port->ip_nsrequest;
if (nsrequest != IP_NULL) {
* KERN_SUCCESS Count was modified.
* KERN_INVALID_RIGHT Entry has wrong type.
* KERN_INVALID_VALUE Bad delta for the right.
+ * KERN_INVALID_CAPABILITY Deallocating a pinned right.
*/
kern_return_t
port = ip_object_to_port(entry->ie_object);
assert(port != IP_NULL);
- if (ipc_right_check(space, port, name, entry, IPC_RIGHT_COPYIN_FLAGS_NONE)) {
+ if (ipc_right_check(space, port, name, entry, IPC_OBJECT_COPYIN_FLAGS_NONE)) {
assert(!(entry->ie_bits & MACH_PORT_TYPE_SEND_ONCE));
mach_port_guard_exception(name, 0, 0, kGUARD_EXC_INVALID_RIGHT);
goto invalid_right;
port = ip_object_to_port(entry->ie_object);
assert(port != IP_NULL);
- if (!ipc_right_check(space, port, name, entry, IPC_RIGHT_COPYIN_FLAGS_NONE)) {
+ if (!ipc_right_check(space, port, name, entry, IPC_OBJECT_COPYIN_FLAGS_NONE)) {
/* port is locked and active */
ip_unlock(port);
port = IP_NULL;
port = ip_object_to_port(entry->ie_object);
assert(port != IP_NULL);
- if (ipc_right_check(space, port, name, entry, IPC_RIGHT_COPYIN_FLAGS_NONE)) {
+ if (ipc_right_check(space, port, name, entry, IPC_OBJECT_COPYIN_FLAGS_NONE)) {
assert((entry->ie_bits & MACH_PORT_TYPE_SEND) == 0);
goto invalid_right;
}
}
if ((urefs + delta) == 0) {
+ if (pinned_control_port_enabled && port->ip_pinned != 0) {
+ ip_unlock(port);
+ goto pinned_right;
+ }
+
if (--port->ip_srights == 0) {
nsrequest = port->ip_nsrequest;
if (nsrequest != IP_NULL) {
}
return KERN_INVALID_RIGHT;
+pinned_right:
+ assert(pinned_control_port_enabled);
+
+ is_write_unlock(space);
+ mach_port_guard_exception(name, 0, MPG_FLAGS_MOD_REFS_PINNED_DEALLOC,
+ ipc_control_port_options & IPC_CONTROL_PORT_OPTIONS_PINNED_HARD ?
+ kGUARD_EXC_MOD_REFS : kGUARD_EXC_MOD_REFS_NON_FATAL);
+ return KERN_INVALID_CAPABILITY;
+
invalid_value:
is_write_unlock(space);
mach_port_guard_exception(name, 0, 0, kGUARD_EXC_INVALID_VALUE);
* types while we still have it locked. Otherwise,
* recapture the (now dead) bits.
*/
- if (!ipc_right_check(space, port, name, entry, IPC_RIGHT_COPYIN_FLAGS_NONE)) {
+ if (!ipc_right_check(space, port, name, entry, IPC_OBJECT_COPYIN_FLAGS_NONE)) {
if (request != IE_REQ_NONE) {
type |= ipc_port_request_type(port, name, request);
}
* be read without a lock.
*/
if (reply_port->ip_immovable_send) {
- mach_port_guard_exception(reply_name, 0, 0, kGUARD_EXC_IMMOVABLE);
- return FALSE;
+ if (!ip_is_control(reply_port) || immovable_control_port_enabled) {
+ mach_port_guard_exception_immovable(reply_name, reply_port, MPG_FLAGS_NONE);
+ return FALSE;
+ }
}
if (reply_type == MACH_MSG_TYPE_MOVE_SEND_ONCE) {
* Returns:
* KERN_SUCCESS Acquired an object, possibly IO_DEAD.
* KERN_INVALID_RIGHT Name doesn't denote correct right.
- * KERN_INVALID_CAPABILITY Trying to move an kobject port or an immovable right
+ * KERN_INVALID_CAPABILITY Trying to move an kobject port or an immovable right,
+ * or moving the last ref of pinned right
* KERN_INVALID_ARGUMENT Port is unguarded or guard mismatch
*/
mach_port_name_t name,
ipc_entry_t entry,
mach_msg_type_name_t msgt_name,
- ipc_right_copyin_flags_t flags,
+ ipc_object_copyin_flags_t flags,
ipc_object_t *objectp,
ipc_port_t *sorightp,
ipc_port_t *releasep,
ipc_entry_bits_t bits;
ipc_port_t port;
kern_return_t kr;
- boolean_t deadok = flags & IPC_RIGHT_COPYIN_FLAGS_DEADOK? TRUE : FALSE;
- boolean_t allow_imm_send = flags & IPC_RIGHT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND? TRUE : FALSE;
+ boolean_t deadok = !!(flags & IPC_OBJECT_COPYIN_FLAGS_DEADOK);
+ boolean_t allow_imm_send = !!(flags & IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND);
+ boolean_t soft_fail_imm_send = !!(flags & IPC_OBJECT_COPYIN_FLAGS_SOFT_FAIL_IMMOVABLE_SEND);
*releasep = IP_NULL;
*assertcntp = 0;
port = ip_object_to_port(entry->ie_object);
assert(port != IP_NULL);
- if (ipc_right_check(space, port, name, entry, IPC_RIGHT_COPYIN_FLAGS_NONE)) {
+ if (ipc_right_check(space, port, name, entry, IPC_OBJECT_COPYIN_FLAGS_NONE)) {
bits = entry->ie_bits;
*releasep = port;
goto copy_dead;
}
if (!allow_imm_send && port->ip_immovable_send) {
- ip_unlock(port);
- mach_port_guard_exception(name, 0, 0, kGUARD_EXC_IMMOVABLE);
- return KERN_INVALID_CAPABILITY;
+ if (!ip_is_control(port) || immovable_control_port_enabled) {
+ ip_unlock(port);
+ if (!soft_fail_imm_send) {
+ mach_port_guard_exception_immovable(name, port, MPG_FLAGS_NONE);
+ }
+ return KERN_INVALID_CAPABILITY;
+ }
}
ipc_port_copy_send_locked(port);
port = ip_object_to_port(entry->ie_object);
assert(port != IP_NULL);
- if (ipc_right_check(space, port, name, entry, IPC_RIGHT_COPYIN_FLAGS_NONE)) {
+ if (ipc_right_check(space, port, name, entry, IPC_OBJECT_COPYIN_FLAGS_NONE)) {
bits = entry->ie_bits;
*releasep = port;
goto move_dead;
if ((bits & MACH_PORT_TYPE_SEND) == 0) {
assert(IE_BITS_TYPE(bits) == MACH_PORT_TYPE_SEND_ONCE);
assert(port->ip_sorights > 0);
-
ip_unlock(port);
goto invalid_right;
}
if (!allow_imm_send && port->ip_immovable_send) {
- ip_unlock(port);
- mach_port_guard_exception(name, 0, 0, kGUARD_EXC_IMMOVABLE);
- return KERN_INVALID_CAPABILITY;
+ if (!ip_is_control(port) || immovable_control_port_enabled) {
+ ip_unlock(port);
+ if (!soft_fail_imm_send) {
+ mach_port_guard_exception_immovable(name, port, MPG_FLAGS_NONE);
+ }
+ return KERN_INVALID_CAPABILITY;
+ }
}
if (IE_BITS_UREFS(bits) == 1) {
assert(port->ip_receiver == space);
assert(IE_BITS_TYPE(bits) ==
MACH_PORT_TYPE_SEND_RECEIVE);
+ assert(port->ip_pinned == 0);
ip_reference(port);
} else {
}
if (!allow_imm_send && port->ip_immovable_send) {
- ip_unlock(port);
- mach_port_guard_exception(name, 0, 0, kGUARD_EXC_IMMOVABLE);
- return KERN_INVALID_CAPABILITY;
+ if (!ip_is_control(port) || immovable_control_port_enabled) {
+ ip_unlock(port);
+ if (!soft_fail_imm_send) {
+ mach_port_guard_exception_immovable(name, port, MPG_FLAGS_NONE);
+ }
+ return KERN_INVALID_CAPABILITY;
+ }
}
assert(IE_BITS_TYPE(bits) == MACH_PORT_TYPE_SEND_ONCE);
port = ip_object_to_port(entry->ie_object);
assert(port != IP_NULL);
- if (ipc_right_check(space, port, name, entry, IPC_RIGHT_COPYIN_FLAGS_NONE)) {
+ if (ipc_right_check(space, port, name, entry, IPC_OBJECT_COPYIN_FLAGS_NONE)) {
*releasep = port;
goto invalid_right;
}
ipc_object_t object_two;
kr = ipc_right_copyin(space, name, entry,
- msgt_one, IPC_RIGHT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND,
+ msgt_one, IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND,
objectp, sorightp, releasep,
&assertcnt, 0, NULL);
assert(assertcnt == 0);
* receive right.
*/
kr = ipc_right_copyin(space, name, entry,
- msgt_two, IPC_RIGHT_COPYIN_FLAGS_NONE,
+ msgt_two, IPC_OBJECT_COPYIN_FLAGS_NONE,
&object_two, sorightp, releasep,
&assertcnt, 0, NULL);
assert(assertcnt == 0);
}
kr = ipc_right_copyin(space, name, entry,
- msgt_name, IPC_RIGHT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND,
+ msgt_name, IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND,
objectp, sorightp, releasep,
&assertcnt, 0, NULL);
assert(assertcnt == 0);
mach_port_name_t name,
ipc_entry_t entry,
mach_msg_type_name_t msgt_name,
+ ipc_object_copyout_flags_t flags,
mach_port_context_t *context,
mach_msg_guard_flags_t *guard_flags,
ipc_object_t object)
port = ip_object_to_port(object);
+ if (pinned_control_port_enabled && (flags & IPC_OBJECT_COPYOUT_FLAGS_PINNED)) {
+ assert(!port->ip_pinned);
+ assert(port->ip_immovable_send);
+ port->ip_pinned = 1;
+ }
+
switch (msgt_name) {
case MACH_MSG_TYPE_PORT_SEND_ONCE:
#define ipc_right_lookup_read ipc_right_lookup_write
#define ipc_right_lookup_two_read ipc_right_lookup_two_write
-typedef uint32_t ipc_right_copyin_flags_t;
-
-#define IPC_RIGHT_COPYIN_FLAGS_NONE 0x0
-#define IPC_RIGHT_COPYIN_FLAGS_DEADOK 0x1
-#define IPC_RIGHT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND 0x2
-#define IPC_RIGHT_COPYIN_FLAGS_ALLOW_DEAD_SEND_ONCE 0x4 /* allow copyin of a send once right to a dead port with no dead name requests */
-
/* Find an entry in a space, given the name */
extern kern_return_t ipc_right_lookup_write(
ipc_space_t space,
ipc_entry_t *entryp2);
/* Translate (space, object) -> (name, entry) */
-extern boolean_t ipc_right_reverse(
+extern bool ipc_right_reverse(
ipc_space_t space,
ipc_object_t object,
mach_port_name_t *namep,
ipc_right_request_cancel((space), (port), (name), (entry)))
/* Check if an entry is being used */
-extern boolean_t ipc_right_inuse(
- ipc_space_t space,
- mach_port_name_t name,
+extern bool ipc_right_inuse(
ipc_entry_t entry);
/* Check if the port has died */
ipc_port_t port,
mach_port_name_t name,
ipc_entry_t entry,
- ipc_right_copyin_flags_t flags);
+ ipc_object_copyin_flags_t flags);
/* Clean up an entry in a dead space */
extern void ipc_right_terminate(
mach_port_name_t name,
ipc_entry_t entry,
mach_msg_type_name_t msgt_name,
- ipc_right_copyin_flags_t flags,
+ ipc_object_copyin_flags_t flags,
ipc_object_t *objectp,
ipc_port_t *sorightp,
ipc_port_t *releasep,
mach_port_name_t name,
ipc_entry_t entry,
mach_msg_type_name_t msgt_name,
+ ipc_object_copyout_flags_t flags,
mach_port_context_t *context,
mach_msg_guard_flags_t *guard_flags,
ipc_object_t object);
typedef uint8_t sync_qos_count_t;
typedef uint64_t ipc_label_t;
-#define IPC_LABEL_NONE ((ipc_label_t)0x0)
-#define IPC_LABEL_DEXT ((ipc_label_t)0x1)
-#define IPC_LABEL_PLATFORM ((ipc_label_t)0x2)
-#define IPC_LABEL_SPECIAL ((ipc_label_t)0x3)
+#define IPC_LABEL_NONE ((ipc_label_t)0x0000)
+#define IPC_LABEL_DEXT ((ipc_label_t)0x0001)
+#define IPC_LABEL_PLATFORM ((ipc_label_t)0x0002)
+#define IPC_LABEL_SPECIAL ((ipc_label_t)0x0003)
+#define IPC_LABEL_SPACE_MASK ((ipc_label_t)0x00ff)
+
+#define IPC_LABEL_SUBST_TASK ((ipc_label_t)0x0100)
+#define IPC_LABEL_SUBST_THREAD ((ipc_label_t)0x0200)
+#define IPC_LABEL_SUBST_ONCE ((ipc_label_t)0x0300)
+#define IPC_LABEL_SUBST_MASK ((ipc_label_t)0xff00)
typedef struct ipc_kobject_label *ipc_kobject_label_t;
* keeps the voucher bound to the port (and active).
*/
if (ip_kotype(port) == IKOT_VOUCHER) {
- return (uintptr_t)port->ip_kobject;
+ return (uintptr_t)ipc_kobject_get(port);
}
}
return (uintptr_t)IV_NULL;
* if this is the first send right
*/
if (!ipc_kobject_make_send_lazy_alloc_port(&voucher->iv_port,
- (ipc_kobject_t)voucher, IKOT_VOUCHER, false, 0)) {
+ (ipc_kobject_t)voucher, IKOT_VOUCHER, IPC_KOBJECT_ALLOC_NONE, false, 0)) {
ipc_voucher_release(voucher);
}
return voucher->iv_port;
* ipc_voucher_attr_control_notify if this is the first send right
*/
if (!ipc_kobject_make_send_lazy_alloc_port(&control->ivac_port,
- (ipc_kobject_t)control, IKOT_VOUCHER_ATTR_CONTROL, false, 0)) {
+ (ipc_kobject_t)control, IKOT_VOUCHER_ATTR_CONTROL, IPC_KOBJECT_ALLOC_NONE, false, 0)) {
ivac_release(control);
}
return control->ivac_port;
iv_index_t e_sum;
iv_index_t e_hash;
queue_chain_t e_hash_link;
- uint8_t e_data[];
+ uint8_t *e_data;
};
typedef struct user_data_value_element *user_data_element_t;
#define USER_DATA_ASSERT_KEY(key) assert(MACH_VOUCHER_ATTR_KEY_TEST == (key))
#endif
+static void
+user_data_value_element_free(user_data_element_t elem)
+{
+ kheap_free(KHEAP_DATA_BUFFERS, elem->e_data, elem->e_size);
+ kfree(elem, sizeof(struct user_data_value_element));
+}
+
/*
* Routine: user_data_release_value
* Purpose:
if (sync == elem->e_made) {
queue_remove(&user_data_bucket[hash], elem, user_data_element_t, e_hash_link);
user_data_unlock();
- kfree(elem, sizeof(*elem) + elem->e_size);
+ user_data_value_element_free(elem);
return KERN_SUCCESS;
}
assert(sync < elem->e_made);
user_data_unlock();
if (NULL != alloc) {
- kfree(alloc, sizeof(*alloc) + content_size);
+ user_data_value_element_free(alloc);
}
return elem;
if (NULL == alloc) {
user_data_unlock();
- alloc = (user_data_element_t)kalloc(sizeof(*alloc) + content_size);
+ alloc = kalloc(sizeof(struct user_data_value_element));
alloc->e_made = 1;
alloc->e_size = content_size;
alloc->e_sum = sum;
alloc->e_hash = hash;
+ alloc->e_data = kheap_alloc(KHEAP_DATA_BUFFERS, content_size, Z_WAITOK | Z_NOFAIL);
memcpy(alloc->e_data, content, content_size);
goto retry;
}
}
#if !(DEVELOPMENT || DEBUG) && CONFIG_MACF
- const boolean_t dbg_ok = (mac_task_check_expose_task(kernel_task) == 0);
+ const boolean_t dbg_ok = (mac_task_check_expose_task(kernel_task, TASK_FLAVOR_CONTROL) == 0);
#else
const boolean_t dbg_ok = TRUE;
#endif
#include <kern/ipc_tt.h>
#include <kern/kalloc.h>
#include <vm/vm_protos.h>
+#include <kdp/kdp_dyld.h>
kern_return_t
mach_port_get_attributes(
mach_port_info_t info,
mach_msg_type_number_t *count);
+extern lck_mtx_t g_dyldinfo_mtx;
+
int
_kernelrpc_mach_vm_allocate_trap(struct _kernelrpc_mach_vm_allocate_trap_args *args)
{
}
rv = ipc_object_copyin(task->itk_space, args->poly, args->polyPoly,
- (ipc_object_t *)&port, 0, NULL, IPC_KMSG_FLAGS_ALLOW_IMMOVABLE_SEND);
+ (ipc_object_t *)&port, 0, NULL, IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND);
if (rv != KERN_SUCCESS) {
goto done;
}
int
_kernelrpc_mach_port_get_attributes_trap(struct _kernelrpc_mach_port_get_attributes_args *args)
{
- task_inspect_t task = port_name_to_task_read_no_eval(args->target);
+ task_read_t task = port_name_to_task_read_no_eval(args->target);
int rv = MACH_SEND_INVALID_DEST;
mach_msg_type_number_t count;
// thread-argument-passing and its value should not be garbage
current_thread()->ith_knote = ITH_KNOTE_NULL;
rv = ipc_object_copyout(task->itk_space, ip_to_object(previous),
- MACH_MSG_TYPE_PORT_SEND_ONCE, NULL, NULL, &previous_name);
+ MACH_MSG_TYPE_PORT_SEND_ONCE, IPC_OBJECT_COPYOUT_FLAGS_NONE, NULL, NULL, &previous_name);
if (rv != KERN_SUCCESS) {
- ipc_object_destroy(ip_to_object(previous),
- MACH_MSG_TYPE_PORT_SEND_ONCE);
goto done;
}
}
ipc_voucher_release(voucher);
return kr;
}
+
+/*
+ * Mach Trap: task_dyld_process_info_notify_get_trap
+ *
+ * Return an array of active dyld notifier port names for current_task(). User
+ * is responsible for allocating the memory for the mach port names array
+ * and deallocating the port names inside the array returned.
+ *
+ * Does not consume any reference.
+ *
+ * Args:
+ * names_addr: Address for mach port names array. (In param only)
+ * names_count_addr: Number of active dyld notifier ports. (In-Out param)
+ * In: Number of slots available for copyout in caller
+ * Out: Actual number of ports copied out
+ *
+ * Returns:
+ *
+ * KERN_SUCCESS: A valid namesCnt is returned. (Can be zero)
+ * KERN_INVALID_ARGUMENT: Arguments are invalid.
+ * KERN_MEMORY_ERROR: Memory copyio operations failed.
+ * KERN_NO_SPACE: User allocated memory for port names copyout is insufficient.
+ *
+ * Other error code see task_info().
+ */
+kern_return_t
+task_dyld_process_info_notify_get_trap(struct task_dyld_process_info_notify_get_trap_args *args)
+{
+ struct task_dyld_info dyld_info;
+ mach_msg_type_number_t info_count = TASK_DYLD_INFO_COUNT;
+ mach_port_name_t copyout_names[DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT];
+ ipc_port_t copyout_ports[DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT];
+ ipc_port_t release_ports[DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT];
+ uint32_t copyout_count = 0, release_count = 0, active_count = 0;
+ mach_vm_address_t ports_addr; /* a user space address */
+ mach_port_name_t new_name;
+ natural_t user_names_count = 0;
+ ipc_port_t sright;
+ kern_return_t kr;
+ ipc_port_t *portp;
+ ipc_entry_t entry;
+
+ if ((mach_port_name_array_t)args->names_addr == NULL || (natural_t *)args->names_count_addr == NULL) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ kr = copyin((vm_map_address_t)args->names_count_addr, &user_names_count, sizeof(natural_t));
+ if (kr) {
+ return KERN_MEMORY_FAILURE;
+ }
+
+ if (user_names_count == 0) {
+ return KERN_NO_SPACE;
+ }
+
+ kr = task_info(current_task(), TASK_DYLD_INFO, (task_info_t)&dyld_info, &info_count);
+ if (kr) {
+ return kr;
+ }
+
+ if (dyld_info.all_image_info_format == TASK_DYLD_ALL_IMAGE_INFO_32) {
+ ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
+ offsetof(struct user32_dyld_all_image_infos, notifyMachPorts));
+ } else {
+ ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
+ offsetof(struct user64_dyld_all_image_infos, notifyMachPorts));
+ }
+
+ lck_mtx_lock(&g_dyldinfo_mtx);
+ itk_lock(current_task());
+
+ if (current_task()->itk_dyld_notify == NULL) {
+ itk_unlock(current_task());
+ (void)copyoutmap_atomic32(current_task()->map, MACH_PORT_NULL, (vm_map_address_t)ports_addr); /* reset magic */
+ lck_mtx_unlock(&g_dyldinfo_mtx);
+
+ kr = copyout(©out_count, (vm_map_address_t)args->names_count_addr, sizeof(natural_t));
+ return kr ? KERN_MEMORY_ERROR : KERN_SUCCESS;
+ }
+
+ for (int slot = 0; slot < DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT; slot++) {
+ portp = ¤t_task()->itk_dyld_notify[slot];
+ if (*portp == IPC_PORT_NULL) {
+ continue;
+ } else {
+ sright = ipc_port_copy_send(*portp);
+ if (IP_VALID(sright)) {
+ copyout_ports[active_count++] = sright; /* donates */
+ sright = IPC_PORT_NULL;
+ } else {
+ release_ports[release_count++] = *portp; /* donates */
+ *portp = IPC_PORT_NULL;
+ }
+ }
+ }
+
+ task_dyld_process_info_update_helper(current_task(), active_count,
+ (vm_map_address_t)ports_addr, release_ports, release_count);
+ /* itk_lock, g_dyldinfo_mtx are unlocked upon return */
+
+ for (int i = 0; i < active_count; i++) {
+ sright = copyout_ports[i]; /* donates */
+ copyout_ports[i] = IPC_PORT_NULL;
+
+ assert(IP_VALID(sright));
+ ip_reference(sright);
+ /*
+ * Below we consume each send right in copyout_ports, and if copyout_send
+ * succeeds, replace it with a port ref; otherwise release the port ref.
+ *
+ * We can reuse copyout_ports array for this purpose since
+ * copyout_count <= active_count.
+ */
+ new_name = ipc_port_copyout_send(sright, current_space()); /* consumes */
+ if (MACH_PORT_VALID(new_name)) {
+ copyout_names[copyout_count] = new_name;
+ copyout_ports[copyout_count] = sright; /* now holds port ref */
+ copyout_count++;
+ } else {
+ ip_release(sright);
+ }
+ }
+
+ assert(copyout_count <= active_count);
+
+ if (user_names_count < copyout_count) {
+ kr = KERN_NO_SPACE;
+ goto copyout_failed;
+ }
+
+ /* copyout to caller's local copy */
+ kr = copyout(copyout_names, (vm_map_address_t)args->names_addr,
+ copyout_count * sizeof(mach_port_name_t));
+ if (kr) {
+ kr = KERN_MEMORY_ERROR;
+ goto copyout_failed;
+ }
+
+ kr = copyout(©out_count, (vm_map_address_t)args->names_count_addr, sizeof(natural_t));
+ if (kr) {
+ kr = KERN_MEMORY_ERROR;
+ goto copyout_failed;
+ }
+
+ /* now, release port refs on copyout_ports */
+ for (int i = 0; i < copyout_count; i++) {
+ sright = copyout_ports[i];
+ assert(IP_VALID(sright));
+ ip_release(sright);
+ }
+
+ return KERN_SUCCESS;
+
+
+copyout_failed:
+ /*
+ * No locks are held beyond this point.
+ *
+ * Release port refs on copyout_ports, and deallocate ports that we copied out
+ * earlier.
+ */
+ for (int i = 0; i < copyout_count; i++) {
+ sright = copyout_ports[i];
+ assert(IP_VALID(sright));
+
+ if (ipc_right_lookup_write(current_space(), copyout_names[i], &entry)) {
+ /* userspace has deallocated the name we copyout */
+ ip_release(sright);
+ continue;
+ }
+ /* space is locked and active */
+ if (entry->ie_object == ip_to_object(sright) ||
+ IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_DEAD_NAME) {
+ (void)ipc_right_dealloc(current_space(), copyout_names[i], entry); /* unlocks space */
+ } else {
+ is_write_unlock(current_space());
+ }
+
+ /* space is unlocked */
+ ip_release(sright);
+ }
+
+ return kr;
+}
#include <kern/kern_types.h>
#include <kern/assert.h>
-#include <kern/counters.h>
#include <kern/cpu_number.h>
#include <kern/ipc_kobject.h>
#include <kern/ipc_mig.h>
}
}
-static mach_msg_fetch_filter_policy_cbfunc_t mach_msg_fetch_filter_policy_callback = NULL;
+static SECURITY_READ_ONLY_LATE(mach_msg_fetch_filter_policy_cbfunc_t) mach_msg_fetch_filter_policy_callback = NULL;
kern_return_t
mach_msg_filter_register_callback(
#include <mach/vm_prot.h>
#include <mach/vm_map.h>
#include <kern/task.h>
-#include <kern/counters.h>
#include <kern/thread.h>
#include <kern/exc_guard.h>
#include <mach/mach_port_server.h>
}
kr = ipc_object_copyin(space, name, msgt_name, (ipc_object_t *) poly, 0, NULL,
- IPC_KMSG_FLAGS_ALLOW_IMMOVABLE_SEND);
+ (space == current_space() && msgt_name == MACH_MSG_TYPE_COPY_SEND) ?
+ IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND : IPC_OBJECT_COPYIN_FLAGS_SOFT_FAIL_IMMOVABLE_SEND);
if (kr == KERN_SUCCESS) {
*polyPoly = ipc_object_copyin_type(msgt_name);
thread_guard_violation(t, code, subcode, fatal);
}
+/*
+ * Temporary wrapper for immovable mach port guard exception.
+ *
+ * Condition: !(ip_is_control(port) && !immovable_control_port_enabled)
+ */
+void
+mach_port_guard_exception_immovable(
+ mach_port_name_t name,
+ mach_port_t port,
+ uint64_t portguard)
+{
+ if (ip_is_control(port) && immovable_control_port_enabled) {
+ mach_port_guard_exception(name, 0, portguard,
+ ipc_control_port_options & IPC_CONTROL_PORT_OPTIONS_IMMOVABLE_HARD ?
+ kGUARD_EXC_IMMOVABLE : kGUARD_EXC_IMMOVABLE_NON_FATAL);
+ } else if (!ip_is_control(port)) {
+ /* always fatal exception for non-control port violation */
+ mach_port_guard_exception(name, 0, portguard, kGUARD_EXC_IMMOVABLE);
+ } else {
+ /* ip_is_control(port) && !immovable_control_port_enabled */
+ panic("mach_port_guard_exception_immovable: condition does not hold.");
+ }
+}
+
/*
* Routine: mach_port_guard_ast
uint64_t inguard,
uint64_t portguard,
unsigned reason);
+
+extern void mach_port_guard_exception_immovable(
+ mach_port_name_t name,
+ mach_port_t port,
+ uint64_t portguard);
__END_DECLS
#endif /* _IPC_PORT_H_ */
#define DYLD_ALL_IMAGE_INFOS_ADDRESS_MINIMUM_VERSION 9
#define DYLD_ALL_IMAGE_INFOS_TIMESTAMP_MINIMUM_VERSION 15
+#define DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT 8
+#define DYLD_PROCESS_INFO_NOTIFY_MAGIC 0x49414E46
+
/* Re-use dyld format for kext load addresses */
#if __LP64__
typedef struct user64_dyld_uuid_info kernel_uuid_info;
/* the following field is only in version 15 (Mac OS X 10.12, iOS 10.0) and later */
user32_addr_t sharedCacheBaseAddress;
uint64_t timestamp;
- user32_addr_t reserved[14];
+ user32_addr_t dyldpath;
+ mach_port_name_t notifyMachPorts[DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT];
+ user32_addr_t reserved[5];
/* the following fields are only in version 16 (macOS 10.13, iOS 12.0) and later */
user32_addr_t compact_dyld_image_info_addr;
user32_size_t compact_dyld_image_info_size;
/* the following field is only in version 15 (macOS 10.12, iOS 10.0) and later */
user64_addr_t sharedCacheBaseAddress;
uint64_t timestamp;
- user64_addr_t reserved[14];
+ user64_addr_t dyldPath;
+ mach_port_name_t notifyMachPorts[DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT];
+ user64_addr_t reserved[9];
/* the following fields are only in version 16 (macOS 10.13, iOS 12.0) and later */
user64_addr_t compact_dyld_image_info_addr;
user64_size_t compact_dyld_image_info_size;
pc = get_saved_state_pc(state);
sp = get_saved_state_sp(state);
} else {
- /* kstackptr may not always be there, so recompute it */
- struct arm_kernel_saved_state * state = &thread_get_kernel_state(thread)->machine.ss;
- stacklimit = VM_MAX_KERNEL_ADDRESS;
- stacklimit_bottom = VM_MIN_KERNEL_ADDRESS;
- bt_vm_map = kernel_map;
+ struct arm_saved_state *state = thread->machine.kpcb;
+ if (state != NULL) {
+ if (fp == 0) {
+ fp = state->ss_64.fp;
+ }
- /* Get the frame pointer */
- if (fp == 0) {
- fp = state->fp;
+ prevlr = state->ss_64.lr;
+ pc = state->ss_64.pc;
+ sp = state->ss_64.sp;
+ } else {
+ /* kstackptr may not always be there, so recompute it */
+ arm_kernel_saved_state_t *kstate = &thread_get_kernel_state(thread)->machine.ss;
+
+ if (fp == 0) {
+ fp = kstate->fp;
+ }
+ prevlr = kstate->lr;
+ pc = kstate->pc;
+ sp = kstate->sp;
}
- /* Fill in the current link register */
- prevlr = state->lr;
- pc = state->pc;
- sp = state->sp;
+ stacklimit = VM_MAX_KERNEL_ADDRESS;
+ stacklimit_bottom = VM_MIN_KERNEL_ADDRESS;
+ bt_vm_map = kernel_map;
}
if (!user_p && !prevlr && !fp && !sp && !pc) {
circle_queue.h \
clock.h \
coalition.h \
+ counter.h \
cpu_number.h \
cpu_data.h \
energy_perf.h \
extmod_statistics.h \
+ hv_io_notifier.h \
hv_support.h \
hv_support_kext.h \
ipc_mig.h \
locks.h \
lock_group.h \
host.h \
+ hvg_hypercall.h \
mach_param.h \
macro_help.h \
mpqueue.h \
*/
#include <kern/ast.h>
-#include <kern/counters.h>
+#include <kern/counter.h>
#include <kern/cpu_quiesce.h>
#include <kern/misc_protos.h>
#include <kern/queue.h>
assert(urgent_reason & AST_PREEMPT);
- counter(c_ast_taken_block++);
-
thread_block_reason(THREAD_CONTINUE_NULL, NULL, urgent_reason);
assert(ml_get_interrupts_enabled() == FALSE);
#endif
if (preemption_reasons & AST_PREEMPT) {
- counter(c_ast_taken_block++);
/* switching to a continuation implicitly re-enables interrupts */
thread_block_reason(thread_preempted, NULL, preemption_reasons);
/* NOTREACHED */
{
audit_session_aiaref(aia_p);
if (!ipc_kobject_make_send_lazy_alloc_port(sessionport,
- (ipc_kobject_t)aia_p, IKOT_AU_SESSIONPORT, false, 0)) {
+ (ipc_kobject_t)aia_p, IKOT_AU_SESSIONPORT, IPC_KOBJECT_ALLOC_NONE, false, 0)) {
audit_session_aiaunref(aia_p);
}
inline static void
bitmap_not(bitmap_t *out, const bitmap_t *in, uint nbits)
{
- for (uint i = 0; i <= bitmap_index(nbits - 1); i++) {
+ uint i;
+
+ for (i = 0; i < bitmap_index(nbits - 1); i++) {
out[i] = ~in[i];
}
+
+ uint nbits_complete = i * 64;
+
+ if (nbits > nbits_complete) {
+ out[i] = ~in[i] & mask(nbits - nbits_complete);
+ }
}
inline static void
inline static void
bitmap_and_not(bitmap_t *out, const bitmap_t *in1, const bitmap_t *in2, uint nbits)
{
- for (uint i = 0; i <= bitmap_index(nbits - 1); i++) {
+ uint i;
+
+ for (i = 0; i < bitmap_index(nbits - 1); i++) {
out[i] = in1[i] & ~in2[i];
}
+
+ uint nbits_complete = i * 64;
+
+ if (nbits > nbits_complete) {
+ out[i] = (in1[i] & ~in2[i]) & mask(nbits - nbits_complete);
+ }
}
inline static bool
return VM_MAP_NULL;
}
m = t->map;
- vm_map_reference_swap(m);
+ vm_map_reference(m);
task_unlock(t);
return m;
}
{
return vm_map_adjusted_size(map);
}
+int
+get_task_page_size(
+ task_t task)
+{
+ return vm_map_page_size(task->map);
+}
#if CONFIG_COREDUMP
ptinfo->pti_threads_system = tinfo.threads_system;
ptinfo->pti_threads_user = tinfo.threads_user;
- ptinfo->pti_faults = task->faults;
+ ptinfo->pti_faults = (int32_t) MIN(counter_load(&task->faults), INT32_MAX);
ptinfo->pti_pageins = task->pageins;
ptinfo->pti_cow_faults = task->cow_faults;
ptinfo->pti_messages_sent = task->messages_sent;
LCK_GRP_DECLARE(coalitions_lck_grp, "coalition");
/* coalitions_list_lock protects coalition_count, coalitions queue, next_coalition_id. */
-static LCK_MTX_DECLARE(coalitions_list_lock, &coalitions_lck_grp);
+static LCK_RW_DECLARE(coalitions_list_lock, &coalitions_lck_grp);
static uint64_t coalition_count;
static uint64_t coalition_next_id = 1;
static queue_head_t coalitions_q;
lck_mtx_init(&new_coal->lock, &coalitions_lck_grp, LCK_ATTR_NULL);
- lck_mtx_lock(&coalitions_list_lock);
+ lck_rw_lock_exclusive(&coalitions_list_lock);
new_coal->id = coalition_next_id++;
coalition_count++;
enqueue_tail(&coalitions_q, &new_coal->coalitions);
#endif
cid = new_coal->id;
ctype = new_coal->type;
- lck_mtx_unlock(&coalitions_list_lock);
+ lck_rw_unlock_exclusive(&coalitions_list_lock);
coal_dbg("id:%llu, type:%s", cid, coal_type_str(ctype));
* coalition_find_by_id_internal
* Returns: Coalition object with specified id, NOT referenced.
* If not found, returns COALITION_NULL.
- * Condition: coalitions_list_lock must be LOCKED.
+ * If found, returns a locked coalition.
+ *
+ * Condition: No locks held
*/
static coalition_t
coalition_find_by_id_internal(uint64_t coal_id)
{
+ coalition_t coal;
+
if (coal_id == 0) {
return COALITION_NULL;
}
- lck_mtx_assert(&coalitions_list_lock, LCK_MTX_ASSERT_OWNED);
- coalition_t coal;
+ lck_rw_lock_shared(&coalitions_list_lock);
qe_foreach_element(coal, &coalitions_q, coalitions) {
if (coal->id == coal_id) {
+ coalition_lock(coal);
+ lck_rw_unlock_shared(&coalitions_list_lock);
return coal;
}
}
+ lck_rw_unlock_shared(&coalitions_list_lock);
+
return COALITION_NULL;
}
coalition_t
coalition_find_by_id(uint64_t cid)
{
- if (cid == 0) {
- return COALITION_NULL;
- }
-
- lck_mtx_lock(&coalitions_list_lock);
-
coalition_t coal = coalition_find_by_id_internal(cid);
+
if (coal == COALITION_NULL) {
- lck_mtx_unlock(&coalitions_list_lock);
return COALITION_NULL;
}
- coalition_lock(coal);
+ /* coal is locked */
if (coal->reaped) {
coalition_unlock(coal);
- lck_mtx_unlock(&coalitions_list_lock);
return COALITION_NULL;
}
#endif
coalition_unlock(coal);
- lck_mtx_unlock(&coalitions_list_lock);
coal_dbg("id:%llu type:%s ref_count:%u",
coal->id, coal_type_str(coal->type), rc);
coalition_t
coalition_find_and_activate_by_id(uint64_t cid)
{
- if (cid == 0) {
- return COALITION_NULL;
- }
-
- lck_mtx_lock(&coalitions_list_lock);
-
coalition_t coal = coalition_find_by_id_internal(cid);
+
if (coal == COALITION_NULL) {
- lck_mtx_unlock(&coalitions_list_lock);
return COALITION_NULL;
}
- coalition_lock(coal);
+ /* coal is locked */
if (coal->reaped || coal->terminated) {
/* Too late to put something new into this coalition, it's
* already on its way out the door */
coalition_unlock(coal);
- lck_mtx_unlock(&coalitions_list_lock);
return COALITION_NULL;
}
#endif
coalition_unlock(coal);
- lck_mtx_unlock(&coalitions_list_lock);
coal_dbg("id:%llu type:%s ref_count:%u, active_count:%u",
coal->id, coal_type_str(coal->type), rc, ac);
coalition_unlock(coal);
- lck_mtx_lock(&coalitions_list_lock);
+ lck_rw_lock_exclusive(&coalitions_list_lock);
coalition_count--;
remqueue(&coal->coalitions);
- lck_mtx_unlock(&coalitions_list_lock);
+ lck_rw_unlock_exclusive(&coalitions_list_lock);
/* Release the list's reference and launchd's reference. */
coalition_release(coal);
int ncoals = 0;
struct coalition *coal;
- lck_mtx_lock(&coalitions_list_lock);
+ lck_rw_lock_shared(&coalitions_list_lock);
qe_foreach_element(coal, &coalitions_q, coalitions) {
if (!coal->reaped && (type < 0 || type == (int)coal->type)) {
if (coal_list && ncoals < list_sz) {
++ncoals;
}
}
- lck_mtx_unlock(&coalitions_list_lock);
+ lck_rw_unlock_shared(&coalitions_list_lock);
return ncoals;
}
--- /dev/null
+/*
+ * Copyright (c) 2020 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#ifdef XNU_KERNEL_PRIVATE
+
+#ifndef _KERN_COUNTER_H
+#define _KERN_COUNTER_H
+
+/*!
+ * @file <kern/counter.h>
+ *
+ * @brief
+ * Module for working with 64bit relaxed atomic counters.
+ *
+ * @discussion
+ * Different counter types have different speed-memory tradeoffs, but
+ * they all share a common interface.
+ *
+ * Counters can be statically allocated or dynamically allocated.
+ *
+ * Statically allocated counters are always backed by per-cpu storage which means
+ * writes take place on the current CPUs value and reads sum all of the per-cpu values.
+ *
+ * Dynamically allocated counters can be either per-cpu or use a single 64bit value.
+ * To create a per-cpu counter, use the scalable_counter_t type. Note that this
+ * trades of additional memory for better scalability.
+ * To create a single 64bit counter, use the atomic_counter_t type.
+ *
+ * For most counters you can just use the counter_t type and the choice of
+ * scalable or atomic will be made at compile time based on the target.
+ *
+ * The counter types are opaque handles. They ARE NOT COPYABLE. If you need
+ * to make a copy of a counter, you should do so like this:
+ * <code>
+ * counter_t original;
+ * ...
+ * counter_t copy;
+ * counter_alloc(©);
+ * counter_add(©, counter_load(&original));
+ * ...
+ * // Make sure to free them at some point.
+ * counter_free(&original);
+ * counter_free(©);
+ * </code>
+ *
+ * Static counter example:
+ * <code>
+ * SCALABLE_COUNTER_DEFINE(my_counter);
+ * ...
+ * counter_inc(&my_counter);
+ * assert(counter_load(&my_counter) == 1);
+ * </code>
+ *
+ * Dynamic Counter Example:
+ * <code>
+ * scalable_counter_t my_percpu_counter;
+ * atomic_counter_t my_atomic_counter;
+ * counter_t my_counter;
+ *
+ * // All three counters share the same interface. So to change the speed-memory
+ * // tradeoff just change the type.
+ * counter_init(&my_scalable_counter);
+ * counter_init(&my_atomic_counter);
+ * counter_init(&my_counter);
+ *
+ * counter_inc(&my_scalable_counter);
+ * counter_inc(&my_atomic_counter);
+ * counter_inc(&my_counter);
+ *
+ * assert(counter_load(&my_scalable_counter) == 1);
+ * assert(counter_load(&my_atomic_counter) == 1);
+ * assert(counter_load(&my_counter) == 1);
+ * </code>
+ */
+
+#include <mach/mach_types.h>
+#include <kern/macro_help.h>
+#include <kern/startup.h>
+#include <kern/zalloc.h>
+
+typedef __zpercpu uint64_t *scalable_counter_t;
+typedef uint64_t atomic_counter_t;
+/* Generic counter base type. Does not have an implementation. */
+struct generic_counter_t;
+
+/*!
+ * @macro SCALABLE_COUNTER_DECLARE
+ *
+ * @abstract
+ * (optionally) declares a static per-cpu counter (in a header).
+ *
+ * @param var the name of the counter.
+ */
+#define SCALABLE_COUNTER_DECLARE(name) \
+ extern scalable_counter_t name;
+
+/*!
+ * @macro SCALABLE_COUNTER_DEFINE
+ *
+ * @abstract
+ * Defines a static per-cpu counter.
+ * Counter can only be accessed after the TUNABLES phase of startup.
+ *
+ * @param var the name of the counter.
+ */
+#define SCALABLE_COUNTER_DEFINE(name) \
+ __startup_data uint64_t __ ##name##_early_storage = 0; \
+ scalable_counter_t name = {&__##name##_early_storage}; \
+ STARTUP_ARG(TUNABLES, STARTUP_RANK_MIDDLE, scalable_counter_static_boot_mangle, &name); \
+ STARTUP_ARG(PERCPU, STARTUP_RANK_SECOND, scalable_counter_static_init, &name);
+
+/*
+ * Initialize a per-cpu counter.
+ * May block and will never fail.
+ * This counter must be freed with counter_free.
+ */
+OS_OVERLOADABLE
+extern void counter_alloc(struct generic_counter_t *);
+
+OS_OVERLOADABLE
+extern void counter_free(struct generic_counter_t *);
+/*
+ * Add amount to counter.
+ * @param amount: The amount to add.
+ */
+OS_OVERLOADABLE
+extern void counter_add(struct generic_counter_t *, uint64_t amount);
+
+/*
+ * Add 1 to this counter.
+ */
+OS_OVERLOADABLE
+extern void counter_inc(struct generic_counter_t *);
+
+/*
+ * Subtract 1 from this counter.
+ */
+OS_OVERLOADABLE
+extern void counter_dec(struct generic_counter_t *);
+
+/* Variants of the above operations where the caller takes responsibility for disabling preemption. */
+OS_OVERLOADABLE
+extern void counter_add_preemption_disabled(struct generic_counter_t *, uint64_t amount);
+OS_OVERLOADABLE
+extern void counter_inc_preemption_disabled(struct generic_counter_t *);
+OS_OVERLOADABLE
+extern void counter_dec_preemption_disabled(struct generic_counter_t *);
+
+/*
+ * Read the value of the percpu counter.
+ * Note that this will cause synchronization of all the sharded values.
+ */
+OS_OVERLOADABLE
+extern uint64_t counter_load(struct generic_counter_t *);
+
+#pragma mark implementation details
+/* NB: Nothing below here should be used directly. */
+
+__startup_func void scalable_counter_static_boot_mangle(scalable_counter_t *counter);
+__startup_func void scalable_counter_static_init(scalable_counter_t *counter);
+
+#if XNU_TARGET_OS_WATCH || XNU_TARGET_OS_TV
+#define ATOMIC_COUNTER_USE_PERCPU 0
+#else
+#define ATOMIC_COUNTER_USE_PERCPU 1
+#endif /* XNU_TARGET_OS_OSX */
+
+#if ATOMIC_COUNTER_USE_PERCPU
+typedef scalable_counter_t counter_t;
+#else
+typedef atomic_counter_t counter_t;
+#endif /* ATOMIC_COUNTER_USE_PERCPU */
+
+#define COUNTER_MAKE_PROTOTYPES(counter_t) \
+OS_OVERLOADABLE \
+extern void counter_alloc(counter_t *); \
+ \
+OS_OVERLOADABLE \
+extern void counter_free(counter_t *); \
+ \
+OS_OVERLOADABLE \
+extern void counter_add(counter_t *, uint64_t amount); \
+ \
+OS_OVERLOADABLE \
+extern void counter_inc(counter_t *); \
+ \
+OS_OVERLOADABLE \
+extern void counter_dec(counter_t *); \
+ \
+OS_OVERLOADABLE \
+extern void counter_add_preemption_disabled(counter_t *, uint64_t amount); \
+ \
+OS_OVERLOADABLE \
+extern void counter_inc_preemption_disabled(counter_t *); \
+ \
+OS_OVERLOADABLE \
+extern void counter_dec_preemption_disabled(counter_t *); \
+ \
+OS_OVERLOADABLE \
+extern uint64_t counter_load(counter_t *);
+
+COUNTER_MAKE_PROTOTYPES(scalable_counter_t);
+COUNTER_MAKE_PROTOTYPES(atomic_counter_t);
+
+#endif /* _KERN_COUNTER_H */
+
+#endif /* XNU_KERNEL_PRIVATE */
--- /dev/null
+/* * Copyright (c) 2020 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <kern/assert.h>
+#include <kern/cpu_data.h>
+#include <kern/counter.h>
+#include <kern/zalloc.h>
+#include <machine/atomic.h>
+#include <machine/machine_routines.h>
+#include <machine/cpu_number.h>
+
+SECURITY_READ_ONLY_LATE(zone_t) counters_zone;
+ZONE_INIT(&counters_zone, "per_cpu_counters", sizeof(uint64_t),
+ ZC_PERCPU | ZC_ALIGNMENT_REQUIRED, ZONE_ID_ANY, NULL);
+
+/*
+ * Tracks how many static scalable counters are in use since they won't show up
+ * in the per_cpu_counters zone stats.
+ */
+uint64_t num_static_scalable_counters;
+
+/*
+ * Mangle the given scalable_counter_t so that it points to the early storage
+ * regardless of which CPU # we're boot on.
+ * Must be run before we go multi-core.
+ */
+__startup_func void
+scalable_counter_static_boot_mangle(scalable_counter_t *counter)
+{
+ *counter = __zpcpu_mangle_for_boot(*counter);
+}
+
+/*
+ * Initializes a static counter in permanent per-cpu memory.
+ * Run during startup for each static per-cpu counter
+ * Must be run before we go multi-core.
+ */
+__startup_func void
+scalable_counter_static_init(scalable_counter_t *counter)
+{
+ /*
+ * We pointed the counter to a single global value during early boot.
+ * Grab that value now. We'll store it in our current CPU's value
+ */
+ uint64_t current_value = os_atomic_load_wide(zpercpu_get(*counter), relaxed);
+ /*
+ * This counter can't be freed so we allocate it out of the permanent zone rather than
+ * our counter zone.
+ */
+ *counter = zalloc_percpu_permanent(sizeof(uint64_t), ZALIGN_64);
+ os_atomic_store_wide(zpercpu_get(*counter), current_value, relaxed);
+ num_static_scalable_counters++;
+}
+
+OS_OVERLOADABLE
+void
+counter_alloc(scalable_counter_t *counter)
+{
+ *counter = zalloc_percpu(counters_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
+}
+
+OS_OVERLOADABLE
+void
+counter_alloc(atomic_counter_t *counter)
+{
+ os_atomic_store_wide(counter, 0, relaxed);
+}
+
+OS_OVERLOADABLE
+void
+counter_free(scalable_counter_t *counter)
+{
+ zfree_percpu(counters_zone, *counter);
+}
+
+OS_OVERLOADABLE
+void
+counter_free(atomic_counter_t *counter)
+{
+ (void)counter;
+}
+
+OS_OVERLOADABLE
+void
+counter_add(atomic_counter_t *counter, uint64_t amount)
+{
+ os_atomic_add(counter, amount, relaxed);
+}
+
+OS_OVERLOADABLE
+void
+counter_inc(atomic_counter_t *counter)
+{
+ os_atomic_inc(counter, relaxed);
+}
+
+OS_OVERLOADABLE
+void
+counter_dec(atomic_counter_t *counter)
+{
+ os_atomic_dec(counter, relaxed);
+}
+
+OS_OVERLOADABLE
+void
+counter_add_preemption_disabled(atomic_counter_t *counter, uint64_t amount)
+{
+ counter_add(counter, amount);
+}
+
+OS_OVERLOADABLE
+void
+counter_inc_preemption_disabled(atomic_counter_t *counter)
+{
+ counter_inc(counter);
+}
+
+OS_OVERLOADABLE
+void
+counter_dec_preemption_disabled(atomic_counter_t *counter)
+{
+ counter_dec(counter);
+}
+
+OS_OVERLOADABLE
+uint64_t
+counter_load(atomic_counter_t *counter)
+{
+ return os_atomic_load_wide(counter, relaxed);
+}
+
+OS_OVERLOADABLE
+uint64_t
+counter_load(scalable_counter_t *counter)
+{
+ uint64_t value = 0;
+ zpercpu_foreach(it, *counter) {
+ value += os_atomic_load_wide(it, relaxed);
+ }
+ return value;
+}
+++ /dev/null
-/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- *
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- *
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * Mach Operating System
- * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
- * All Rights Reserved.
- *
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-
-#include <mach_counters.h>
-
-#include <kern/counters.h>
-
-/*
- * We explicitly initialize the counters to make
- * them contiguous in the kernel's data space.
- * This makes them easier to examine with ddb.
- */
-
-#if MACH_COUNTERS
-mach_counter_t c_action_thread_block = 0;
-mach_counter_t c_ast_taken_block = 0;
-mach_counter_t c_dev_io_blocks = 0;
-mach_counter_t c_dev_io_tries = 0;
-mach_counter_t c_idle_thread_block = 0;
-mach_counter_t c_idle_thread_handoff = 0;
-mach_counter_t c_incoming_interrupts = 0;
-mach_counter_t c_io_done_thread_block = 0;
-mach_counter_t c_ipc_mqueue_receive_block_kernel = 0;
-mach_counter_t c_ipc_mqueue_receive_block_user = 0;
-mach_counter_t c_ipc_mqueue_send_block = 0;
-mach_counter_t c_net_thread_block = 0;
-mach_counter_t c_reaper_thread_block = 0;
-mach_counter_t c_sched_thread_block = 0;
-mach_counter_t c_stacks_current = 0;
-mach_counter_t c_stacks_max = 0;
-mach_counter_t c_stacks_min = 0;
-mach_counter_t c_swtch_block = 0;
-mach_counter_t c_swtch_pri_block = 0;
-mach_counter_t c_syscalls_unix = 0;
-mach_counter_t c_syscalls_mach = 0;
-mach_counter_t c_thread_invoke_csw = 0;
-mach_counter_t c_thread_invoke_hits = 0;
-mach_counter_t c_thread_invoke_misses = 0;
-mach_counter_t c_thread_invoke_same = 0;
-mach_counter_t c_thread_invoke_same_cont = 0;
-mach_counter_t c_thread_switch_block = 0;
-mach_counter_t c_thread_switch_handoff = 0;
-mach_counter_t c_vm_fault_page_block_backoff_kernel = 0;
-mach_counter_t c_vm_fault_page_block_busy_kernel = 0;
-mach_counter_t c_vm_map_simplified = 0;
-mach_counter_t c_vm_map_simplify_called = 0;
-mach_counter_t c_vm_map_simplify_entry_called = 0;
-mach_counter_t c_vm_page_wait_block = 0;
-mach_counter_t c_vm_pageout_block = 0;
-mach_counter_t c_vm_pageout_scan_block = 0;
-mach_counter_t c_vm_fault_retry_on_w_prot = 0;
-mach_counter_t c_vm_fault_wait_on_unlock = 0;
-#endif /* MACH_COUNTERS */
+++ /dev/null
-/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- *
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- *
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * Mach Operating System
- * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
- * All Rights Reserved.
- *
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-
-#ifndef _KERN_COUNTERS_
-#define _KERN_COUNTERS_
-
-#include <mach_counters.h>
-
-/*
- * We can count various interesting events and paths.
- *
- * Use counter() to change the counters, eg:
- * counter(c_idle_thread_block++);
- * Use counter_always() for non-conditional counters.
- */
-
-#define counter_always(code) code
-
-#if MACH_COUNTERS
-
-#define counter(code) counter_always(code)
-
-#else /* MACH_COUNTERS */
-
-#define counter(code)
-
-#endif /* MACH_COUNTERS */
-
-/*
- * We define the counters with individual integers,
- * instead of a big structure, so that ddb
- * will know the addresses of the counters.
- */
-
-typedef unsigned int mach_counter_t;
-
-#if MACH_COUNTERS
-extern mach_counter_t c_action_thread_block;
-extern mach_counter_t c_ast_taken_block;
-extern mach_counter_t c_dev_io_blocks;
-extern mach_counter_t c_dev_io_tries;
-extern mach_counter_t c_idle_thread_block;
-extern mach_counter_t c_idle_thread_handoff;
-extern mach_counter_t c_incoming_interrupts;
-extern mach_counter_t c_io_done_thread_block;
-extern mach_counter_t c_ipc_mqueue_receive_block_kernel;
-extern mach_counter_t c_ipc_mqueue_receive_block_user;
-extern mach_counter_t c_ipc_mqueue_send_block;
-extern mach_counter_t c_net_thread_block;
-extern mach_counter_t c_reaper_thread_block;
-extern mach_counter_t c_sched_thread_block;
-extern mach_counter_t c_stacks_current;
-extern mach_counter_t c_stacks_max;
-extern mach_counter_t c_stacks_min;
-extern mach_counter_t c_swtch_block;
-extern mach_counter_t c_swtch_pri_block;
-extern mach_counter_t c_syscalls_unix;
-extern mach_counter_t c_syscalls_mach;
-extern mach_counter_t c_thread_invoke_csw;
-extern mach_counter_t c_thread_invoke_same;
-extern mach_counter_t c_thread_invoke_same_cont;
-extern mach_counter_t c_thread_invoke_misses;
-extern mach_counter_t c_thread_invoke_hits;
-extern mach_counter_t c_thread_switch_block;
-extern mach_counter_t c_thread_switch_handoff;
-extern mach_counter_t c_vm_fault_page_block_backoff_kernel;
-extern mach_counter_t c_vm_fault_page_block_busy_kernel;
-extern mach_counter_t c_vm_fault_retry_on_w_prot;
-extern mach_counter_t c_vm_fault_wait_on_unlock;
-extern mach_counter_t c_vm_map_simplified;
-extern mach_counter_t c_vm_map_simplify_called;
-extern mach_counter_t c_vm_map_simplify_entry_called;
-extern mach_counter_t c_vm_page_wait_block;
-extern mach_counter_t c_vm_pageout_block;
-extern mach_counter_t c_vm_pageout_scan_block;
-#endif /* MACH_COUNTERS */
-
-#endif /* _KERN_COUNTERS_ */
static uint32_t cpu_checkin_min_interval_us;
#if __LP64__
-static_assert(MAX_CPUS <= 32);
-#define CPU_CHECKIN_MASK 0x5555555555555555UL
-#define CPU_EXPECTED_MASK (~CPU_CHECKIN_MASK)
+#define CPU_CHECKIN_MASK_MAX_CPUS 32
+#define CPU_CHECKIN_MASK 0x5555555555555555UL
+#define CPU_EXPECTED_MASK (~CPU_CHECKIN_MASK)
#else
/* Avoid double-wide CAS on 32-bit platforms by using a 32-bit state and mask */
-static_assert(MAX_CPUS <= 16);
-#define CPU_CHECKIN_MASK 0x55555555UL
-#define CPU_EXPECTED_MASK (~CPU_CHECKIN_MASK)
+#define CPU_CHECKIN_MASK_MAX_CPUS 16
+#define CPU_CHECKIN_MASK 0x55555555UL
+#define CPU_EXPECTED_MASK (~CPU_CHECKIN_MASK)
#endif
+static_assert(MAX_CPUS <= CPU_CHECKIN_MASK_MAX_CPUS);
static_assert(CPU_CHECKIN_MASK == CPU_EXPECTED_MASK >> 1);
static inline checkin_mask_t
void
cpu_quiescent_counter_init(void)
{
- assert(CPU_CHECKIN_MASK & cpu_checked_in_bit(MAX_CPUS));
- assert(CPU_EXPECTED_MASK & cpu_expected_bit(MAX_CPUS));
- assert((CPU_CHECKIN_MASK & cpu_expected_bit(MAX_CPUS)) == 0);
- assert((CPU_EXPECTED_MASK & cpu_checked_in_bit(MAX_CPUS)) == 0);
+ assert(CPU_CHECKIN_MASK & cpu_checked_in_bit(MAX_CPUS - 1));
+ assert(CPU_EXPECTED_MASK & cpu_expected_bit(MAX_CPUS - 1));
+ assert((CPU_CHECKIN_MASK & cpu_expected_bit(MAX_CPUS - 1)) == 0);
+ assert((CPU_EXPECTED_MASK & cpu_checked_in_bit(MAX_CPUS - 1)) == 0);
cpu_quiescent_counter_set_min_interval_us(CPU_CHECKIN_MIN_INTERVAL_US);
}
struct cpu_quiesce *st = PERCPU_GET(cpu_quiesce);
__assert_only int cpuid = cpu_number();
+ assert(cpuid < MAX_CPUS);
assert(st->state == CPU_QUIESCE_COUNTER_NONE ||
st->state == CPU_QUIESCE_COUNTER_LEFT);
* conventional sense.
*/
if (debugger_current_op == DBOP_PANIC || ((debugger_current_op == DBOP_DEBUGGER) && debugger_is_panic))
-#endif
+#endif /* __x86_64__ */
{
kdp_callouts(KDP_EVENT_PANICLOG);
/* DEBUGGER_OPTION_PANICLOGANDREBOOT is used for two finger resets on embedded so we get a paniclog */
if (debugger_panic_options & DEBUGGER_OPTION_PANICLOGANDREBOOT) {
+ PEHaltRestart(kPEPanicDiagnosticsDone);
PEHaltRestart(kPEPanicRestartCPUNoCallouts);
}
}
*/
if ((debugger_panic_options & DEBUGGER_OPTION_SKIP_LOCAL_COREDUMP) &&
(debug_boot_arg & DB_REBOOT_POST_CORE)) {
+ PEHaltRestart(kPEPanicDiagnosticsDone);
kdp_machine_reboot_type(kPEPanicRestartCPU, debugger_panic_options);
}
if (on_device_corefile_enabled()) {
if (!kdp_has_polled_corefile()) {
if (debug_boot_arg & (DB_KERN_DUMP_ON_PANIC | DB_KERN_DUMP_ON_NMI)) {
- paniclog_append_noflush("skipping local kernel core because core file could not be opened prior to panic (error : 0x%x)",
+ paniclog_append_noflush("skipping local kernel core because core file could not be opened prior to panic (error : 0x%x)\n",
kdp_polled_corefile_error());
#if defined(__arm__) || defined(__arm64__)
panic_info->eph_panic_flags |= EMBEDDED_PANIC_HEADER_FLAG_COREDUMP_FAILED;
}
#if XNU_MONITOR
else if ((pmap_get_cpu_data()->ppl_state == PPL_STATE_PANIC) && (debug_boot_arg & (DB_KERN_DUMP_ON_PANIC | DB_KERN_DUMP_ON_NMI))) {
- paniclog_append_noflush("skipping local kernel core because the PPL is in PANIC state");
+ paniclog_append_noflush("skipping local kernel core because the PPL is in PANIC state\n");
panic_info->eph_panic_flags |= EMBEDDED_PANIC_HEADER_FLAG_COREDUMP_FAILED;
paniclog_flush();
}
*/
if ((debug_boot_arg & DB_REBOOT_POST_CORE) &&
((ret == 0) || (debugger_panic_options & DEBUGGER_OPTION_ATTEMPTCOREDUMPANDREBOOT))) {
+ PEHaltRestart(kPEPanicDiagnosticsDone);
kdp_machine_reboot_type(kPEPanicRestartCPU, debugger_panic_options);
}
}
}
+ if (debugger_current_op == DBOP_PANIC ||
+ ((debugger_current_op == DBOP_DEBUGGER) && debugger_is_panic)) {
+ PEHaltRestart(kPEPanicDiagnosticsDone);
+ }
+
if (debug_boot_arg & DB_REBOOT_ALWAYS) {
kdp_machine_reboot_type(kPEPanicRestartCPU, debugger_panic_options);
}
panic_spin_shmcon();
}
#endif /* defined(__arm__) || defined(__arm64__) */
+
+#else /* CONFIG_KDP_INTERACTIVE_DEBUGGING */
+
+ PEHaltRestart(kPEPanicDiagnosticsDone);
+
#endif /* CONFIG_KDP_INTERACTIVE_DEBUGGING */
if (!panicDebugging) {
extern long long alloc_ptepages_count;
#endif
-extern boolean_t panic_include_zprint;
-extern mach_memory_info_t *panic_kext_memory_info;
-extern vm_size_t panic_kext_memory_size;
-
__private_extern__ void
panic_display_zprint(void)
{
zone_index_foreach(i) {
if (ml_nofault_copy((vm_offset_t)&zone_array[i],
(vm_offset_t)&zone_copy, sizeof(struct zone)) == sizeof(struct zone)) {
- if (zone_copy.page_count > atop(1024 * 1024)) {
+ if (zone_copy.z_wired_cur > atop(1024 * 1024)) {
paniclog_append_noflush("%-8s%-20s %10llu %10lu\n",
zone_heap_name(&zone_copy),
- zone_copy.z_name, ptoa_64(zone_copy.page_count),
+ zone_copy.z_name, (uint64_t)zone_size_wired(&zone_copy),
(uintptr_t)zone_size_free(&zone_copy));
}
}
#endif /* CONFIG_ECC_LOGGING */
#if CONFIG_ZLEAKS
-extern boolean_t panic_include_ztrace;
-extern struct ztrace* top_ztrace;
void panic_print_symbol_name(vm_address_t search);
/*
#define KF_INTERRUPT_MASKED_DEBUG_OVRD (0x40)
#define KF_TRAPTRACE_OVRD (0x80)
#define KF_IOTRACE_OVRD (0x100)
+#define KF_INTERRUPT_MASKED_DEBUG_STACKSHOT_OVRD (0x200)
boolean_t kern_feature_override(uint32_t fmask);
#include <ipc/ipc_pset.h>
#include <ipc/ipc_machdep.h>
-#include <kern/counters.h>
#include <kern/ipc_tt.h>
#include <kern/task.h>
#include <kern/thread.h>
}
/* Reset gzalloc_data. */
- lock_zone(zone);
+ zone_lock(zone);
memcpy((void *)gzfc_copy, (void *)zone->gz.gzfc, gzfcsz);
bzero((void *)zone->gz.gzfc, gzfcsz);
zone->gz.gzfc_index = 0;
- unlock_zone(zone);
+ zone_unlock(zone);
/* Free up all the cached elements. */
for (uint32_t index = 0; index < gzfc_size; index++) {
*/
/* Decrement zone counters. */
- lock_zone(zone);
- zone->countfree += freed_elements;
- zone->page_count -= freed_elements;
- unlock_zone(zone);
+ zone_lock(zone);
+ zone->z_elems_free += freed_elements;
+ zone->z_wired_cur -= freed_elements;
+ zone_unlock(zone);
kmem_free(kernel_map, gzfc_copy, gzfcsz);
}
vm_offset_t residue = rounded_size - zone_elem_size(zone);
vm_offset_t gzaddr = 0;
gzhdr_t *gzh, *gzhcopy = NULL;
+ bool new_va = false;
if (!kmem_ready || (vm_page_zone == ZONE_NULL)) {
/* Early allocations are supplied directly from the
panic("gzalloc: kernel_memory_allocate for size 0x%llx failed with %d",
(uint64_t)rounded_size, kr);
}
+ new_va = true;
}
if (gzalloc_uf_mode) {
addr = (gzaddr + residue);
}
- if (zone->zfree_clear_mem) {
+ if (zone->z_free_zeroes) {
bzero((void *)gzaddr, rounded_size);
} else {
/* Fill with a pattern on allocation to trap uninitialized
*gzhcopy = *gzh;
}
- lock_zone(zone);
+ zone_lock(zone);
assert(zone->z_self == zone);
- zone->countfree--;
- zone->page_count += 1;
+ zone->z_elems_free--;
+ if (new_va) {
+ zone->z_va_cur += 1;
+ }
+ zone->z_wired_cur += 1;
zpercpu_get(zstats)->zs_mem_allocated += rounded_size;
-#if ZALLOC_DETAILED_STATS
- zpercpu_get(zstats)->zs_mem_wasted += rounded_size - zone_elem_size(zone);
-#endif /* ZALLOC_DETAILED_STATS */
- unlock_zone(zone);
+ zone_unlock(zone);
OSAddAtomic64((SInt32) rounded_size, &gzalloc_allocated);
OSAddAtomic64((SInt32) (rounded_size - zone_elem_size(zone)), &gzalloc_wasted);
}
if (gzfc_size && gzalloc_dfree_check) {
- lock_zone(zone);
+ zone_lock(zone);
assert(zone->z_self == zone);
for (uint32_t gd = 0; gd < gzfc_size; gd++) {
if (zone->gz.gzfc[gd] != saddr) {
"current free cache index: %d, freed index: %d",
__func__, saddr, zone->gz.gzfc_index, gd);
}
- unlock_zone(zone);
+ zone_unlock(zone);
}
if (gzalloc_consistency_checks) {
free_addr = saddr;
}
- lock_zone(zone);
+ zone_lock(zone);
assert(zone->z_self == zone);
/* Insert newly freed element into the protected free element
}
if (free_addr) {
- zone->countfree++;
- zone->page_count -= 1;
+ zone->z_elems_free++;
+ zone->z_wired_cur -= 1;
}
zpercpu_get(zstats)->zs_mem_freed += rounded_size;
- unlock_zone(zone);
+ zone_unlock(zone);
if (free_addr) {
// TODO: consider using physical reads to check for
#include <pexpert/pexpert.h>
-vm_statistics64_data_t PERCPU_DATA(vm_stat);
-uint64_t PERCPU_DATA(vm_page_grab_count);
+SCALABLE_COUNTER_DEFINE(vm_statistics_zero_fill_count); /* # of zero fill pages */
+SCALABLE_COUNTER_DEFINE(vm_statistics_reactivations); /* # of pages reactivated */
+SCALABLE_COUNTER_DEFINE(vm_statistics_pageins); /* # of pageins */
+SCALABLE_COUNTER_DEFINE(vm_statistics_pageouts); /* # of pageouts */
+SCALABLE_COUNTER_DEFINE(vm_statistics_faults); /* # of faults */
+SCALABLE_COUNTER_DEFINE(vm_statistics_cow_faults); /* # of copy-on-writes */
+SCALABLE_COUNTER_DEFINE(vm_statistics_lookups); /* object cache lookups */
+SCALABLE_COUNTER_DEFINE(vm_statistics_hits); /* object cache hits */
+SCALABLE_COUNTER_DEFINE(vm_statistics_purges); /* # of pages purged */
+SCALABLE_COUNTER_DEFINE(vm_statistics_decompressions); /* # of pages decompressed */
+SCALABLE_COUNTER_DEFINE(vm_statistics_compressions); /* # of pages compressed */
+SCALABLE_COUNTER_DEFINE(vm_statistics_swapins); /* # of pages swapped in (via compression segments) */
+SCALABLE_COUNTER_DEFINE(vm_statistics_swapouts); /* # of pages swapped out (via compression segments) */
+SCALABLE_COUNTER_DEFINE(vm_statistics_total_uncompressed_pages_in_compressor); /* # of pages (uncompressed) held within the compressor. */
+SCALABLE_COUNTER_DEFINE(vm_page_grab_count);
host_data_t realhost;
+static void
+get_host_vm_stats(vm_statistics64_t out)
+{
+ out->zero_fill_count = counter_load(&vm_statistics_zero_fill_count);
+ out->reactivations = counter_load(&vm_statistics_reactivations);
+ out->pageins = counter_load(&vm_statistics_pageins);
+ out->pageouts = counter_load(&vm_statistics_pageouts);
+ out->faults = counter_load(&vm_statistics_faults);
+ out->cow_faults = counter_load(&vm_statistics_cow_faults);
+ out->lookups = counter_load(&vm_statistics_lookups);
+ out->hits = counter_load(&vm_statistics_hits);
+ out->compressions = counter_load(&vm_statistics_compressions);
+ out->decompressions = counter_load(&vm_statistics_decompressions);
+ out->swapins = counter_load(&vm_statistics_swapins);
+ out->swapouts = counter_load(&vm_statistics_swapouts);
+}
vm_extmod_statistics_data_t host_extmod_statistics;
kern_return_t
return KERN_INVALID_ARGUMENT;
}
- assert(host_priv == &realhost);
-
unsigned int count = processor_count;
assert(count != 0);
return KERN_FAILURE;
}
- host_vm_stat = *PERCPU_GET_MASTER(vm_stat);
-
- percpu_foreach_secondary(stat, vm_stat) {
- vm_statistics64_data_t data = *stat;
- host_vm_stat.zero_fill_count += data.zero_fill_count;
- host_vm_stat.reactivations += data.reactivations;
- host_vm_stat.pageins += data.pageins;
- host_vm_stat.pageouts += data.pageouts;
- host_vm_stat.faults += data.faults;
- host_vm_stat.cow_faults += data.cow_faults;
- host_vm_stat.lookups += data.lookups;
- host_vm_stat.hits += data.hits;
- }
+ get_host_vm_stats(&host_vm_stat);
stat32 = (vm_statistics_t)info;
}
}
stat32->inactive_count = VM_STATISTICS_TRUNCATE_TO_32_BIT(vm_page_inactive_count);
-#if CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
stat32->wire_count = VM_STATISTICS_TRUNCATE_TO_32_BIT(vm_page_wire_count);
-#else
+#else /* !XNU_TARGET_OS_OSX */
stat32->wire_count = VM_STATISTICS_TRUNCATE_TO_32_BIT(vm_page_wire_count + vm_page_throttled_count + vm_lopage_free_count);
-#endif
+#endif /* !XNU_TARGET_OS_OSX */
stat32->zero_fill_count = VM_STATISTICS_TRUNCATE_TO_32_BIT(host_vm_stat.zero_fill_count);
stat32->reactivations = VM_STATISTICS_TRUNCATE_TO_32_BIT(host_vm_stat.reactivations);
stat32->pageins = VM_STATISTICS_TRUNCATE_TO_32_BIT(host_vm_stat.pageins);
if (*count < HOST_VM_INFO64_REV0_COUNT) {
return KERN_FAILURE;
}
-
- host_vm_stat = *PERCPU_GET_MASTER(vm_stat);
-
- percpu_foreach_secondary(stat, vm_stat) {
- vm_statistics64_data_t data = *stat;
- host_vm_stat.zero_fill_count += data.zero_fill_count;
- host_vm_stat.reactivations += data.reactivations;
- host_vm_stat.pageins += data.pageins;
- host_vm_stat.pageouts += data.pageouts;
- host_vm_stat.faults += data.faults;
- host_vm_stat.cow_faults += data.cow_faults;
- host_vm_stat.lookups += data.lookups;
- host_vm_stat.hits += data.hits;
- host_vm_stat.compressions += data.compressions;
- host_vm_stat.decompressions += data.decompressions;
- host_vm_stat.swapins += data.swapins;
- host_vm_stat.swapouts += data.swapouts;
- }
+ get_host_vm_stats(&host_vm_stat);
vm_statistics64_t stat = (vm_statistics64_t)info;
}
}
stat->inactive_count = vm_page_inactive_count;
-#if CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
stat->wire_count = vm_page_wire_count;
-#else
+#else /* !XNU_TARGET_OS_OSX */
stat->wire_count = vm_page_wire_count + vm_page_throttled_count + vm_lopage_free_count;
-#endif
+#endif /* !XNU_TARGET_OS_OSX */
stat->zero_fill_count = host_vm_stat.zero_fill_count;
stat->reactivations = host_vm_stat.reactivations;
stat->pageins = host_vm_stat.pageins;
return KERN_SUCCESS;
}
-
-uint64_t
-get_pages_grabbed_count(void)
-{
- uint64_t pages_grabbed_count = 0;
-
- percpu_foreach(count, vm_page_grab_count) {
- pages_grabbed_count += *count;
- }
-
- return pages_grabbed_count;
-}
-
-
kern_return_t
get_sched_statistics(struct _processor_statistics_np * out, uint32_t * count)
{
return KERN_NO_ACCESS;
}
+ if (IP_VALID(port) && (port->ip_immovable_receive || port->ip_immovable_send)) {
+ return KERN_INVALID_RIGHT;
+ }
+
return host_set_special_port(host_priv, id, port);
}
return KERN_INVALID_ARGUMENT;
}
- assert(host_priv == &realhost);
-
/*
* Always enforce that the multiuser bit is set
* if a value is written to the commpage word.
#ifndef _KERN_HOST_STATISTICS_H_
#define _KERN_HOST_STATISTICS_H_
-#include <libkern/OSAtomic.h>
-#include <mach/vm_statistics.h>
-#include <kern/percpu.h>
-#include <os/atomic_private.h>
+#include <kern/counter.h>
-extern
-uint64_t get_pages_grabbed_count(void);
+SCALABLE_COUNTER_DECLARE(vm_statistics_zero_fill_count); /* # of zero fill pages */
+SCALABLE_COUNTER_DECLARE(vm_statistics_reactivations); /* # of pages reactivated */
+SCALABLE_COUNTER_DECLARE(vm_statistics_pageins); /* # of pageins */
+SCALABLE_COUNTER_DECLARE(vm_statistics_pageouts); /* # of pageouts */
+SCALABLE_COUNTER_DECLARE(vm_statistics_faults); /* # of faults */
+SCALABLE_COUNTER_DECLARE(vm_statistics_cow_faults); /* # of copy-on-writes */
+SCALABLE_COUNTER_DECLARE(vm_statistics_lookups); /* object cache lookups */
+SCALABLE_COUNTER_DECLARE(vm_statistics_hits); /* object cache hits */
+SCALABLE_COUNTER_DECLARE(vm_statistics_purges); /* # of pages purged */
+SCALABLE_COUNTER_DECLARE(vm_statistics_decompressions); /* # of pages decompressed */
+SCALABLE_COUNTER_DECLARE(vm_statistics_compressions); /* # of pages compressed */
+SCALABLE_COUNTER_DECLARE(vm_statistics_swapins); /* # of pages swapped in (via compression segments) */
+SCALABLE_COUNTER_DECLARE(vm_statistics_swapouts); /* # of pages swapped out (via compression segments) */
+SCALABLE_COUNTER_DECLARE(vm_statistics_total_uncompressed_pages_in_compressor); /* # of pages (uncompressed) held within the compressor. */
-PERCPU_DECL(vm_statistics64_data_t, vm_stat);
-PERCPU_DECL(uint64_t, vm_page_grab_count);
-
-#define VM_STAT_INCR(event) \
-MACRO_BEGIN \
- os_atomic_inc(&PERCPU_GET(vm_stat)->event, relaxed); \
-MACRO_END
-
-#define VM_STAT_INCR_BY(event, amount) \
-MACRO_BEGIN \
- os_atomic_add(&PERCPU_GET(vm_stat)->event, amount, relaxed); \
-MACRO_END
+SCALABLE_COUNTER_DECLARE(vm_page_grab_count);
#endif /* _KERN_HOST_STATISTICS_H_ */
--- /dev/null
+/*
+ * Copyright (c) 2020 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <kern/hv_support.h>
+#include <kern/ipc_mig.h>
+#include <kern/kalloc.h>
+#include <kern/locks.h>
+#include <mach/port.h>
+#include <sys/queue.h>
+#include <ipc/ipc_port.h>
+
+#include <stdbool.h>
+
+#include "hv_io_notifier.h"
+
+static LCK_GRP_DECLARE(ion_lock_grp, "io notifier");
+
+typedef struct hv_ion_entry {
+ LIST_ENTRY(hv_ion_entry) list;
+
+ uint64_t addr;
+ size_t size;
+ uint64_t value;
+ uint32_t flags;
+
+ mach_port_t port;
+ mach_port_name_t port_name;
+} hv_ion_entry_t;
+
+LIST_HEAD(io_notifier_list, hv_ion_entry);
+
+struct hv_ion_grp {
+ struct io_notifier_list list;
+ lck_rw_t lock;
+};
+
+/*
+ * Lookup a matching notifier and return it.
+ */
+static hv_ion_entry_t *
+hv_io_notifier_grp_lookup(const hv_ion_grp_t *grp, const hv_ion_entry_t *key)
+{
+ hv_ion_entry_t *ion = NULL;
+
+ LIST_FOREACH(ion, &grp->list, list) {
+ if (ion->addr != key->addr) {
+ continue;
+ }
+
+ if (!(ion->flags & kHV_ION_ANY_SIZE) && ion->size != key->size) {
+ continue;
+ }
+
+ if (!(ion->flags & kHV_ION_ANY_VALUE) && ion->value != key->value) {
+ continue;
+ }
+
+ if (ion->port_name != key->port_name) {
+ continue;
+ }
+
+ if (ion->flags != key->flags) {
+ continue;
+ }
+
+ return ion;
+ }
+
+ return NULL;
+}
+
+/*
+ * Add a new notifier.
+ * Return KERN_SUCCESS if the notifier was added, an error otherwise.
+ */
+kern_return_t
+hv_io_notifier_grp_add(hv_ion_grp_t *grp, const hv_ion_t *notifier)
+{
+ hv_ion_entry_t *ion = NULL;
+
+ ion = kalloc(sizeof(*ion));
+ if (ion == NULL) {
+ return KERN_RESOURCE_SHORTAGE;
+ }
+
+ ion->addr = notifier->addr;
+ ion->size = notifier->size;
+ ion->value = notifier->value;
+ ion->flags = notifier->flags;
+ ion->port_name = notifier->port_name;
+
+ kern_return_t ret = ipc_object_copyin(current_task()->itk_space,
+ ion->port_name, MACH_MSG_TYPE_COPY_SEND, (ipc_object_t *)&ion->port, 0,
+ NULL, IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND);
+ if (ret != KERN_SUCCESS) {
+ kfree(ion, sizeof(*ion));
+ return ret;
+ }
+
+ lck_rw_lock_exclusive(&grp->lock);
+
+ if (hv_io_notifier_grp_lookup(grp, ion) != NULL) {
+ lck_rw_done(&grp->lock);
+ ipc_port_release_send(ion->port);
+ kfree(ion, sizeof(*ion));
+ return KERN_FAILURE;
+ }
+
+ LIST_INSERT_HEAD(&grp->list, ion, list);
+
+ lck_rw_done(&grp->lock);
+
+ return KERN_SUCCESS;
+}
+
+/*
+ * Remove and free a notifier.
+ * Return KERN_SUCCESS if the notifier was removed, an error otherwise.
+ */
+kern_return_t
+hv_io_notifier_grp_remove(hv_ion_grp_t *grp, const hv_ion_t *notifier)
+{
+ hv_ion_entry_t ion = {};
+ hv_ion_entry_t *entry = NULL;
+
+ ion.addr = notifier->addr;
+ ion.size = notifier->size;
+ ion.value = notifier->value;
+ ion.flags = notifier->flags;
+ ion.port_name = notifier->port_name;
+
+ lck_rw_lock_exclusive(&grp->lock);
+
+ entry = hv_io_notifier_grp_lookup(grp, &ion);
+ if (entry == NULL) {
+ lck_rw_done(&grp->lock);
+ return KERN_FAILURE;
+ }
+
+ LIST_REMOVE(entry, list);
+
+ lck_rw_done(&grp->lock);
+
+ ipc_port_release_send(entry->port);
+ kfree(entry, sizeof(*entry));
+
+ return KERN_SUCCESS;
+}
+
+/*
+ * Find matching notifiers and notify the port.
+ * Returns KERN_SUCCESS if no errors occurred when sending notifications and at
+ * least one notification was sent.
+ */
+kern_return_t
+hv_io_notifier_grp_fire(hv_ion_grp_t *grp, uint64_t addr, size_t size,
+ uint64_t value)
+{
+ kern_return_t kr = KERN_FAILURE;
+ hv_ion_entry_t *ion = NULL;
+ bool fired = false;
+
+ lck_rw_lock_shared(&grp->lock);
+
+ LIST_FOREACH(ion, &grp->list, list) {
+ if (ion->addr != addr) {
+ continue;
+ }
+
+ if (!(ion->flags & kHV_ION_ANY_SIZE) && ion->size != size) {
+ continue;
+ }
+
+ if (!(ion->flags & kHV_ION_ANY_VALUE) && ion->value != value) {
+ continue;
+ }
+
+ hv_ion_message_t msg = {
+ .header.msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, 0),
+ .header.msgh_size = sizeof(msg),
+ .header.msgh_remote_port = ion->port,
+ .header.msgh_local_port = MACH_PORT_NULL,
+ .header.msgh_voucher_port = MACH_PORT_NULL,
+ .header.msgh_id = 0,
+
+ .addr = addr,
+ .size = size,
+ .value = value,
+ };
+
+ kr = mach_msg_send_from_kernel_with_options(&msg.header, sizeof(msg),
+ MACH_SEND_TIMEOUT, MACH_MSG_TIMEOUT_NONE);
+
+ /*
+ * A timeout will occur when the queue is full. Ignore it if so
+ * configured.
+ */
+ if (kr == MACH_SEND_TIMED_OUT && !(ion->flags & kHV_ION_EXIT_FULL)) {
+ kr = MACH_MSG_SUCCESS;
+ }
+
+ if (kr != MACH_MSG_SUCCESS) {
+ fired = false;
+ break;
+ }
+
+ fired = true;
+ }
+
+ lck_rw_done(&grp->lock);
+ return fired ? KERN_SUCCESS : KERN_FAILURE;
+}
+
+kern_return_t
+hv_io_notifier_grp_alloc(hv_ion_grp_t **grp_p )
+{
+ hv_ion_grp_t *grp = kalloc(sizeof(*grp));
+
+ if (grp == NULL) {
+ return KERN_RESOURCE_SHORTAGE;
+ }
+ bzero(grp, sizeof(*grp));
+
+ lck_rw_init(&grp->lock, &ion_lock_grp, LCK_ATTR_NULL);
+
+ *grp_p = grp;
+ return KERN_SUCCESS;
+}
+
+void
+hv_io_notifier_grp_free(hv_ion_grp_t **grp_p)
+{
+ hv_ion_grp_t *grp = *grp_p;
+
+ while (!LIST_EMPTY(&grp->list)) {
+ hv_ion_entry_t *ion = LIST_FIRST(&grp->list);
+
+ LIST_REMOVE(ion, list);
+
+ ipc_port_release_send(ion->port);
+ kfree(ion, sizeof(*ion));
+ }
+
+ lck_rw_destroy(&grp->lock, &ion_lock_grp);
+
+ kfree(grp, sizeof(*grp));
+
+ *grp_p = NULL;
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#pragma once
+
+#include <mach/port.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum {
+ kHV_ION_NONE = (0u << 0),
+ kHV_ION_ANY_VALUE = (1u << 1),
+ kHV_ION_ANY_SIZE = (1u << 2),
+ kHV_ION_EXIT_FULL = (1u << 3),
+};
+
+#ifdef KERNEL_PRIVATE
+
+typedef struct {
+ mach_msg_header_t header;
+ uint64_t addr;
+ uint64_t size;
+ uint64_t value;
+} hv_ion_message_t;
+
+typedef struct {
+ uint64_t addr;
+ uint64_t size;
+ uint64_t value;
+ uint32_t port_name;
+ uint32_t flags;
+} hv_ion_t;
+
+typedef struct hv_ion_grp hv_ion_grp_t;
+
+extern kern_return_t hv_io_notifier_grp_add(hv_ion_grp_t *grp, const hv_ion_t *);
+extern kern_return_t hv_io_notifier_grp_remove(hv_ion_grp_t *, const hv_ion_t *);
+extern kern_return_t hv_io_notifier_grp_fire(hv_ion_grp_t *, uint64_t, size_t, uint64_t);
+extern kern_return_t hv_io_notifier_grp_alloc(hv_ion_grp_t **);
+extern void hv_io_notifier_grp_free(hv_ion_grp_t **);
+
+#endif /* KERNEL_PRIVATE */
+
+#ifdef __cplusplus
+}
+#endif
#include <libkern/OSAtomic.h>
#include <vm/vm_pageout.h>
#include <mach/sdt.h>
+#include <sys/kdebug.h>
#if defined(__x86_64__) && CONFIG_VMX
#include <i386/vmx/vmx_cpu.h>
.thread_destroy = NULL, /* thread is being destroyed */
.task_destroy = NULL, /* task is being destroyed */
.volatile_state = NULL, /* thread state is becoming volatile */
+ .resume = NULL, /* system is being resumed */
+ .memory_pressure = NULL,/* (unused) */
};
/* trap tables for hv_*_trap syscalls */
.suspend = NULL,
.thread_destroy = NULL,
.task_destroy = NULL,
- .volatile_state = NULL
+ .volatile_state = NULL,
+ .resume = NULL,
};
hv_callbacks_enabled = 0;
}
}
+/* system resume notification */
+void
+hv_resume(void)
+{
+ if (hv_callbacks_enabled && hv_callbacks.resume) {
+ hv_callbacks.resume();
+ }
+}
+
/* dispatch hv_task_trap/hv_thread_trap syscalls to trap handlers,
* fail for invalid index or absence of trap handlers, trap handler is
* responsible for validating targets */
hv_trace_guest_enter(uint32_t vcpu_id, uint64_t *vcpu_regs)
{
DTRACE_HV2(guest__enter, uint32_t, vcpu_id, uint64_t *, vcpu_regs);
+
+ KDBG(MACHDBG_CODE(DBG_MACH_HV, HV_GUEST_ENTER) | DBG_FUNC_START, vcpu_id);
}
void
-hv_trace_guest_exit(uint32_t vcpu_id, uint64_t *vcpu_regs)
+hv_trace_guest_exit(uint32_t vcpu_id, uint64_t *vcpu_regs, uint32_t reason)
{
+ KDBG(MACHDBG_CODE(DBG_MACH_HV, HV_GUEST_ENTER) | DBG_FUNC_END, vcpu_id,
+ reason);
+
DTRACE_HV2(guest__exit, uint32_t, vcpu_id, uint64_t *, vcpu_regs);
}
+
+void
+hv_trace_guest_error(uint32_t vcpu_id, uint64_t *vcpu_regs, uint32_t failure,
+ uint32_t error)
+{
+ /*
+ * An error indicates that the guest enter failed so there will be no
+ * guest exit. Close the guest enter interval.
+ */
+ KDBG(MACHDBG_CODE(DBG_MACH_HV, HV_GUEST_ENTER) | DBG_FUNC_END, vcpu_id,
+ -1, failure, error);
+ KDBG(MACHDBG_CODE(DBG_MACH_HV, HV_GUEST_ERROR), vcpu_id, failure, error);
+
+ DTRACE_HV3(guest__error, uint32_t, vcpu_id, uint64_t *, vcpu_regs, uint32_t, failure);
+}
#include <stdint.h>
#include <kern/kern_types.h>
#include <mach/kern_return.h>
+#include <kern/hv_io_notifier.h>
typedef enum {
HV_DEBUG_STATE
void (*thread_destroy)(void *vcpu);
void (*task_destroy)(void *vm);
void (*volatile_state)(void *vcpu, int state);
+#define HV_CALLBACKS_RESUME_DEFINED 1
+ void (*resume)(void);
void (*memory_pressure)(void);
} hv_callbacks_t;
extern kern_return_t hv_set_callbacks(hv_callbacks_t callbacks);
extern void hv_release_callbacks(void);
extern void hv_suspend(void);
+extern void hv_resume(void);
extern kern_return_t hv_task_trap(uint64_t index, uint64_t arg);
extern kern_return_t hv_thread_trap(uint64_t index, uint64_t arg);
extern boolean_t hv_ast_pending(void);
extern void hv_port_notify(mach_msg_header_t *msg);
extern void hv_trace_guest_enter(uint32_t vcpu_id, uint64_t *vcpu_regs);
-extern void hv_trace_guest_exit(uint32_t vcpu_id, uint64_t *vcpu_regs);
+extern void hv_trace_guest_exit(uint32_t vcpu_id, uint64_t *vcpu_regs,
+ uint32_t reason);
+extern void hv_trace_guest_error(uint32_t vcpu_id, uint64_t *vcpu_regs,
+ uint32_t failure, uint32_t error);
#if defined(__cplusplus)
}
--- /dev/null
+/*
+ * Copyright (c) 2020 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _KERN_HVG_HYPERCALL_H_
+#define _KERN_HVG_HYPERCALL_H_
+
+#include <os/base.h>
+#include <stdint.h>
+
+/* Architecture-independent definitions (exported to userland) */
+
+/*
+ * Apple Hypercall arguments
+ */
+typedef struct hvg_hcall_args {
+ uint64_t args[6];
+} hvg_hcall_args_t;
+
+
+/*
+ * Apple Hypercall return output
+ */
+typedef struct hvg_hcall_output {
+ uint64_t regs[7];
+} hvg_hcall_output_t;
+
+
+/*
+ * Apple Hypercall return code
+ */
+
+OS_CLOSED_ENUM(hvg_hcall_return, uint32_t,
+ HVG_HCALL_SUCCESS = 0x0000, /* The call succeeded */
+ HVG_HCALL_ACCESS_DENIED = 0x0001, /* Invalid access right */
+ HVG_HCALL_INVALID_CODE = 0x0002, /* Hypercall code not recognized */
+ HVG_HCALL_INVALID_PARAMETER = 0x0003, /* Specified register value not valid */
+ HVG_HCALL_IO_FAILED = 0x0004, /* Input/output error */
+ HVG_HCALL_FEAT_DISABLED = 0x0005, /* Feature not available */
+ HVG_HCALL_UNSUPPORTED = 0x0006, /* Hypercall not supported */
+ );
+
+
+/*
+ * Apple Hypercall call code
+ */
+
+OS_CLOSED_ENUM(hvg_hcall_code, uint32_t,
+ HVG_HCALL_TRIGGER_DUMP = 0x0001, /* Collect guest dump */
+ );
+
+/*
+ * Options for collecting kernel vmcore
+ */
+
+OS_CLOSED_OPTIONS(hvg_hcall_dump_option, uint32_t,
+ HVG_HCALL_DUMP_OPTION_REGULAR = 0x0001 /* Regular dump-guest-memory */
+ );
+
+typedef struct hvg_hcall_vmcore_file {
+ char tag[57]; /* 7 64-bit registers plus 1 byte for '\0' */
+} hvg_hcall_vmcore_file_t;
+
+extern hvg_hcall_return_t
+hvg_hcall_trigger_dump(hvg_hcall_vmcore_file_t *vmcore,
+ const hvg_hcall_dump_option_t dump_option);
+
+
+#ifdef XNU_KERNEL_PRIVATE
+
+/*
+ * For XNU kernel use only (omitted from userland headers)
+ */
+
+#if defined (__x86_64__)
+#include <i386/cpuid.h>
+#include <i386/x86_hypercall.h>
+#endif
+
+#endif /* XNU_KERNEL_PRIVATE */
+
+#endif /* _KERN_HV_HYPERCALL_H_ */
{
host_t host = HOST_NULL;
+ /* reject translation if itk_host is not host_priv */
+ if (port != current_task()->itk_host) {
+ return HOST_NULL;
+ }
+
if (IP_VALID(port)) {
ip_lock(port);
if (ip_active(port) &&
(ip_kotype(port) == IKOT_HOST_PRIV)) {
- host = (host_t) ip_get_kobject(port);
+ assert(ip_get_kobject(port) == &realhost);
+ host = &realhost;
}
ip_unlock(port);
}
}
#endif
- assert(host_priv == &realhost);
-
host_lock(host_priv);
for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; i++) {
return KERN_INVALID_ARGUMENT;
}
- assert(host_priv == &realhost);
-
host_lock(host_priv);
count = 0;
break;
}
}/* for */
- if (j == count) {
+ if (j == count && count < *CountCnt) {
masks[j] = (1 << i);
ports[j] =
ipc_port_copy_send(host_priv->exc_actions[i].port);
behaviors[j] = host_priv->exc_actions[i].behavior;
flavors[j] = host_priv->exc_actions[i].flavor;
count++;
- if (count > *CountCnt) {
- break;
- }
}
}
}/* for */
#include <device/device_types.h>
#include <device/device_server.h>
+#if CONFIG_USER_NOTIFICATION
#include <UserNotification/UNDReplyServer.h>
+#endif
#if CONFIG_ARCADE
#include <mach/arcade_register_server.h>
#include <uk_xkern/xk_uproxy_server.h>
#endif /* XK_PROXY */
+#include <kern/counter.h>
#include <kern/ipc_tt.h>
#include <kern/ipc_mig.h>
#include <kern/ipc_misc.h>
#include <ipc/ipc_port.h>
#include <ipc/ipc_voucher.h>
#include <kern/sync_sema.h>
-#include <kern/counters.h>
#include <kern/work_interval.h>
#include <kern/suid_cred.h>
+#include <kern/task_ident.h>
#if HYPERVISOR
#include <kern/hv_support.h>
mig_routine_t routine;
int size;
int kobjidx;
-#if MACH_COUNTERS
- mach_counter_t callcount;
-#endif
} mig_hash_t;
#define MAX_MIG_ENTRIES 1031
#ifdef VM32_SUPPORT
(const struct mig_subsystem *)&vm32_map_subsystem,
#endif
+#if CONFIG_USER_NOTIFICATION
(const struct mig_subsystem *)&UNDReply_subsystem,
+#endif
(const struct mig_subsystem *)&mach_voucher_subsystem,
(const struct mig_subsystem *)&mach_voucher_attr_control_subsystem,
(const struct mig_subsystem *)&memory_entry_subsystem,
if (!ptr->routine || msgh_id != ptr->num) {
ptr = (mig_hash_t *)0;
- } else {
-#if MACH_COUNTERS
- ptr->callcount++;
-#endif
}
return ptr;
if (options & IPC_KOBJECT_ALLOC_IMMOVABLE_SEND) {
port->ip_immovable_send = 1;
}
+ if (options & IPC_KOBJECT_ALLOC_PINNED) {
+ port->ip_pinned = 1;
+ }
}
/*
return port;
}
+static void
+ipc_kobject_subst_once_notify(mach_msg_header_t *msg)
+{
+ mach_no_senders_notification_t *notification = (void *)msg;
+ ipc_port_t port = notification->not_header.msgh_remote_port;
+
+ require_ip_active(port);
+ assert(IKOT_PORT_SUBST_ONCE == ip_kotype(port));
+
+ ip_release((ipc_port_t)ip_get_kobject(port));
+ ipc_port_dealloc_kernel(port);
+}
+
+/*
+ * Routine: ipc_kobject_alloc_subst_once
+ * Purpose:
+ * Make a port that will be substituted by the kolabel
+ * rules once, preventing the next substitution (of its target)
+ * to happen if any.
+ *
+ * Returns:
+ * A port with a send right, that will substitute to its "kobject".
+ *
+ * Conditions:
+ * No locks held (memory is allocated)
+ * `target` has a refcount that this function consumes
+ */
+ipc_port_t
+ipc_kobject_alloc_subst_once(
+ ipc_port_t target)
+{
+ return ipc_kobject_alloc_labeled_port(target,
+ IKOT_PORT_SUBST_ONCE, IPC_LABEL_SUBST_ONCE,
+ IPC_KOBJECT_ALLOC_MAKE_SEND | IPC_KOBJECT_ALLOC_NSREQUEST);
+}
+
/*
* Routine: ipc_kobject_make_send_lazy_alloc_port
* Purpose:
ipc_port_t *port_store,
ipc_kobject_t kobject,
ipc_kobject_type_t type,
+ ipc_kobject_alloc_options_t alloc_opts,
boolean_t __ptrauth_only should_ptrauth,
uint64_t __ptrauth_only ptrauth_discriminator)
{
if (!IP_VALID(port)) {
port = ipc_kobject_alloc_port(kobject, type,
- IPC_KOBJECT_ALLOC_MAKE_SEND | IPC_KOBJECT_ALLOC_NSREQUEST);
+ IPC_KOBJECT_ALLOC_MAKE_SEND | IPC_KOBJECT_ALLOC_NSREQUEST | alloc_opts);
#if __has_feature(ptrauth_calls)
if (should_ptrauth) {
}
/*
- * Routine: ipc_kobject_label_check
+ * Routine: ipc_kobject_label_substitute_task
+ * Purpose:
+ * Substitute a task control port for its immovable
+ * equivalent when the receiver is that task.
+ * Conditions:
+ * Space is write locked and active.
+ * Port is locked and active.
+ * Returns:
+ * - IP_NULL port if no substitution is to be done
+ * - a valid port if a substitution needs to happen
+ */
+static ipc_port_t
+ipc_kobject_label_substitute_task(
+ ipc_space_t space,
+ ipc_port_t port)
+{
+ ipc_port_t subst = IP_NULL;
+ task_t task = ipc_kobject_get(port);
+
+ if (task != TASK_NULL && task == space->is_task) {
+ if ((subst = port->ip_alt_port)) {
+ return subst;
+ }
+ }
+
+ return IP_NULL;
+}
+
+/*
+ * Routine: ipc_kobject_label_substitute_thread
+ * Purpose:
+ * Substitute a thread control port for its immovable
+ * equivalent when it belongs to the receiver task.
+ * Conditions:
+ * Space is write locked and active.
+ * Port is locked and active.
+ * Returns:
+ * - IP_NULL port if no substitution is to be done
+ * - a valid port if a substitution needs to happen
+ */
+static ipc_port_t
+ipc_kobject_label_substitute_thread(
+ ipc_space_t space,
+ ipc_port_t port)
+{
+ ipc_port_t subst = IP_NULL;
+ thread_t thread = ipc_kobject_get(port);
+
+ if (thread != THREAD_NULL && space->is_task == thread->task) {
+ if ((subst = port->ip_alt_port) != IP_NULL) {
+ return subst;
+ }
+ }
+
+ return IP_NULL;
+}
+
+/*
+ * Routine: ipc_kobject_label_check
* Purpose:
- * Check to see if the space is allowed to possess a
- * right for the given port. In order to qualify, the
- * space label must contain all the privileges listed
- * in the port/kobject label.
+ * Check to see if the space is allowed to possess
+ * a right for the given port. In order to qualify,
+ * the space label must contain all the privileges
+ * listed in the port/kobject label.
*
* Conditions:
* Space is write locked and active.
- * Port is locked and active.
+ * Port is locked and active.
+ *
+ * Returns:
+ * Whether the copyout is authorized.
+ *
+ * If a port substitution is requested, the space is unlocked,
+ * the port is unlocked and its "right" consumed.
+ *
+ * As of now, substituted ports only happen for send rights.
*/
-boolean_t
+bool
ipc_kobject_label_check(
- ipc_space_t space,
- ipc_port_t port,
- __unused mach_msg_type_name_t msgt_name)
+ ipc_space_t space,
+ ipc_port_t port,
+ mach_msg_type_name_t msgt_name,
+ ipc_object_copyout_flags_t *flags,
+ ipc_port_t *subst_portp)
{
ipc_kobject_label_t labelp;
+ ipc_label_t label;
assert(is_active(space));
assert(ip_active(port));
+ *subst_portp = IP_NULL;
+
/* Unlabled ports/kobjects are always allowed */
if (!ip_is_kolabeled(port)) {
- return TRUE;
+ return true;
}
/* Never OK to copyout the receive right for a labeled kobject */
if (msgt_name == MACH_MSG_TYPE_PORT_RECEIVE) {
- panic("ipc_kobject_label_check: attempted receive right copyout for labeled kobject");
+ panic("ipc_kobject_label_check: attempted receive right "
+ "copyout for labeled kobject");
}
labelp = port->ip_kolabel;
- return (labelp->ikol_label & space->is_label) == labelp->ikol_label;
+ label = labelp->ikol_label;
+
+ if ((*flags & IPC_OBJECT_COPYOUT_FLAGS_NO_LABEL_CHECK) == 0 &&
+ (label & IPC_LABEL_SUBST_MASK)) {
+ ipc_port_t subst = IP_NULL;
+
+ if (msgt_name != MACH_MSG_TYPE_PORT_SEND) {
+ return false;
+ }
+
+ switch (label & IPC_LABEL_SUBST_MASK) {
+ case IPC_LABEL_SUBST_TASK:
+ subst = ipc_kobject_label_substitute_task(space, port);
+ break;
+ case IPC_LABEL_SUBST_THREAD:
+ subst = ipc_kobject_label_substitute_thread(space, port);
+ break;
+ case IPC_LABEL_SUBST_ONCE:
+ /* the next check will _not_ substitute */
+ *flags |= IPC_OBJECT_COPYOUT_FLAGS_NO_LABEL_CHECK;
+ subst = ip_get_kobject(port);
+ break;
+ default:
+ panic("unexpected label: %llx\n", label);
+ }
+
+ if (subst != IP_NULL) {
+ ip_reference(subst);
+ is_write_unlock(space);
+ ipc_port_release_send_and_unlock(port);
+ port = ipc_port_make_send(subst);
+ ip_release(subst);
+ *subst_portp = port;
+ return true;
+ }
+ }
+
+ return (label & space->is_label & IPC_LABEL_SPACE_MASK) ==
+ (label & IPC_LABEL_SPACE_MASK);
}
boolean_t
ipc_voucher_attr_control_notify(request_header);
return TRUE;
+ case IKOT_PORT_SUBST_ONCE:
+ ipc_kobject_subst_once_notify(request_header);
+ return TRUE;
+
case IKOT_SEMAPHORE:
semaphore_notify(request_header);
return TRUE;
case IKOT_SUID_CRED:
suid_cred_notify(request_header);
return TRUE;
+ case IKOT_TASK_ID_TOKEN:
+ task_id_token_notify(request_header);
+ return TRUE;
#if HYPERVISOR
case IKOT_HYPERVISOR:
hv_port_notify(request_header);
#define IKOT_PSET 6
#define IKOT_PSET_NAME 7
#define IKOT_TIMER 8
-#define IKOT_PAGING_REQUEST 9
+#define IKOT_PORT_SUBST_ONCE 9
#define IKOT_MIG 10
#define IKOT_MEMORY_OBJECT 11
#define IKOT_XMM_PAGER 12
#define IKOT_THREAD_READ 47
#define IKOT_SUID_CRED 48
#define IKOT_HYPERVISOR 49
+#define IKOT_TASK_ID_TOKEN 50
/*
* Add new entries here and adjust IKOT_UNKNOWN.
* Please keep ipc/ipc_object.c:ikot_print_array up to date.
*/
-#define IKOT_UNKNOWN 50 /* magic catchall */
+#define IKOT_UNKNOWN 51 /* magic catchall */
#define IKOT_MAX_TYPE (IKOT_UNKNOWN+1) /* # of IKOT_ types */
/* set the bitstring index for kobject */
IPC_KOBJECT_ALLOC_IMMOVABLE_SEND = 0x00000008,
/* Add a label structure to the port */
IPC_KOBJECT_ALLOC_LABEL = 0x00000010,
+ /* Make all rights pinned (non dealloc-able) in an ipc space*/
+ IPC_KOBJECT_ALLOC_PINNED = 0x00000020,
});
/* Allocates a kobject port, never fails */
ipc_label_t label,
ipc_kobject_alloc_options_t options);
+extern ipc_port_t ipc_kobject_alloc_subst_once(
+ ipc_port_t target);
+
/* Makes a send right, lazily allocating a kobject port, arming for no-senders, never fails */
extern boolean_t ipc_kobject_make_send_lazy_alloc_port(
ipc_port_t *port_store,
ipc_kobject_t kobject,
ipc_kobject_type_t type,
+ ipc_kobject_alloc_options_t alloc_opts,
boolean_t should_ptrauth,
uint64_t ptrauth_discriminator) __result_use_check;
}
/* Check if a kobject can be copied out to a given space */
-extern boolean_t ipc_kobject_label_check(
- ipc_space_t space,
- ipc_port_t port,
- mach_msg_type_name_t msgt_name);
+extern bool ipc_kobject_label_check(
+ ipc_space_t space,
+ ipc_port_t port,
+ mach_msg_type_name_t msgt_name,
+ ipc_object_copyout_flags_t *flags,
+ ipc_port_t *subst_portp) __result_use_check;
+
+__result_use_check
+static inline bool
+ip_label_check(
+ ipc_space_t space,
+ ipc_port_t port,
+ mach_msg_type_name_t msgt_name,
+ ipc_object_copyout_flags_t *flags,
+ ipc_port_t *subst_portp)
+{
+ if (!ip_is_kolabeled(port)) {
+ *subst_portp = IP_NULL;
+ return true;
+ }
+ return ipc_kobject_label_check(space, port, msgt_name, flags, subst_portp);
+}
/* Release any kernel object resources associated with a port */
extern void ipc_kobject_destroy(
extern kern_return_t
uext_server(ipc_kmsg_t request, ipc_kmsg_t * reply);
+/* These boot-args decide if the pinned and immovable ports can be copied out to IPC space */
+__options_decl(ipc_control_port_options_t, uint32_t, {
+ IPC_CONTROL_PORT_OPTIONS_NONE = 0x00,
+
+ IPC_CONTROL_PORT_OPTIONS_PINNED_SOFT = 0x01,
+ IPC_CONTROL_PORT_OPTIONS_PINNED_HARD = 0x02,
+
+ IPC_CONTROL_PORT_OPTIONS_IMMOVABLE_SOFT = 0x10,
+ IPC_CONTROL_PORT_OPTIONS_IMMOVABLE_HARD = 0x20,
+});
+
+extern ipc_control_port_options_t ipc_control_port_options;
+extern bool pinned_control_port_enabled;
+extern bool immovable_control_port_enabled;
+
#endif /* MACH_KERNEL_PRIVATE */
#endif /* KERNEL_PRIVATE */
mach_msg_size_t send_size,
mach_msg_size_t rcv_size)
{
- return kernel_mach_msg_rpc(msg, send_size, rcv_size, TRUE, NULL);
+ return kernel_mach_msg_rpc(msg, send_size, rcv_size, TRUE, TRUE, NULL);
}
#endif /* IKM_SUPPORT_LEGACY */
mach_msg_size_t send_size,
mach_msg_size_t rcv_size)
{
- return kernel_mach_msg_rpc(msg, send_size, rcv_size, FALSE, NULL);
+ return kernel_mach_msg_rpc(msg, send_size, rcv_size, FALSE, TRUE, NULL);
}
mach_msg_return_t
__unused
#endif
boolean_t legacy,
+ boolean_t interruptible,
boolean_t *message_moved)
{
thread_t self = current_thread();
require_ip_active(reply);
/* JMM - why this check? */
- if (!self->active && !self->inspection) {
+ if (interruptible && !self->active && !self->inspection) {
ipc_port_dealloc_reply(reply);
self->ith_rpc_reply = IP_NULL;
return MACH_RCV_INTERRUPTED;
MACH_MSG_OPTION_NONE,
MACH_MSG_SIZE_MAX,
MACH_MSG_TIMEOUT_NONE,
- THREAD_INTERRUPTIBLE);
+ interruptible ? THREAD_INTERRUPTIBLE : THREAD_UNINT);
mr = self->ith_state;
kmsg = self->ith_kmsg;
}
assert(mr == MACH_RCV_INTERRUPTED);
-
+ assert(interruptible);
assert(reply == self->ith_rpc_reply);
if (self->ast & AST_APC) {
* if this is the first send right
*/
if (!ipc_kobject_make_send_lazy_alloc_port(&mig_object->port,
- (ipc_kobject_t) mig_object, IKOT_MIG, false, 0)) {
+ (ipc_kobject_t) mig_object, IKOT_MIG, IPC_KOBJECT_ALLOC_NONE, false, 0)) {
mig_object_deallocate(mig_object);
}
mach_msg_size_t send_size,
mach_msg_size_t rcv_size,
boolean_t legacy,
+ boolean_t interruptible,
boolean_t *message_moved);
#endif /* XNU_KERNEL_PRIVATE */
kr = ipc_object_copyin(task->itk_space, name,
MACH_MSG_TYPE_COPY_SEND, (ipc_object_t *)&fileport, 0, NULL,
- IPC_KMSG_FLAGS_ALLOW_IMMOVABLE_SEND);
+ IPC_OBJECT_COPYIN_FLAGS_ALLOW_IMMOVABLE_SEND);
if (kr != KERN_SUCCESS) {
return kr;
}
* semaphore_notify if this is the first send right
*/
if (!ipc_kobject_make_send_lazy_alloc_port(&semaphore->port,
- (ipc_kobject_t) semaphore, IKOT_SEMAPHORE, false, 0)) {
+ (ipc_kobject_t) semaphore, IKOT_SEMAPHORE, IPC_KOBJECT_ALLOC_NONE, false, 0)) {
semaphore_dereference(semaphore);
}
return semaphore->port;
#include <kern/kalloc.h>
#include <kern/thread.h>
#include <kern/misc_protos.h>
+#include <kdp/kdp_dyld.h>
#include <vm/vm_map.h>
#include <vm/vm_pageout.h>
extern boolean_t IOTaskHasEntitlement(task_t, const char *);
/* forward declarations */
-task_t convert_port_to_locked_task(ipc_port_t port, boolean_t eval);
-task_inspect_t convert_port_to_locked_task_inspect(ipc_port_t port);
-task_read_t convert_port_to_locked_task_read(ipc_port_t port);
-static task_read_t convert_port_to_task_read_locked(ipc_port_t port);
static kern_return_t port_allowed_with_task_flavor(int which, mach_task_flavor_t flavor);
static kern_return_t port_allowed_with_thread_flavor(int which, mach_thread_flavor_t flavor);
-static task_inspect_t convert_port_to_task_inspect_locked(ipc_port_t port);
static void ipc_port_bind_special_reply_port_locked(ipc_port_t port);
static kern_return_t ipc_port_unbind_special_reply_port(thread_t thread, boolean_t unbind_active_port);
kern_return_t task_conversion_eval(task_t caller, task_t victim);
static ipc_space_t convert_port_to_space_no_eval(ipc_port_t port);
-static task_t convert_port_to_task_no_eval(ipc_port_t port);
static thread_t convert_port_to_thread_no_eval(ipc_port_t port);
static ipc_port_t convert_task_to_port_with_flavor(task_t task, mach_task_flavor_t flavor);
static ipc_port_t convert_thread_to_port_with_flavor(thread_t thread, mach_thread_flavor_t flavor);
+static task_read_t convert_port_to_task_read_no_eval(ipc_port_t port);
+static thread_read_t convert_port_to_thread_read_no_eval(ipc_port_t port);
+static ipc_space_read_t convert_port_to_space_read_no_eval(ipc_port_t port);
/*
* Routine: ipc_task_init
ipc_space_t space;
ipc_port_t kport;
ipc_port_t nport;
-
+ ipc_port_t pport;
kern_return_t kr;
int i;
space->is_task = task;
- kport = ipc_port_alloc_kernel();
+ if (immovable_control_port_enabled) {
+ ipc_kobject_alloc_options_t options = IPC_KOBJECT_ALLOC_IMMOVABLE_SEND;
+ if (pinned_control_port_enabled) {
+ options |= IPC_KOBJECT_ALLOC_PINNED;
+ }
+ pport = ipc_kobject_alloc_port(IKO_NULL, IKOT_NONE, options);
- if (kport == IP_NULL) {
- panic("ipc_task_init");
+ kport = ipc_kobject_alloc_labeled_port(IKO_NULL, IKOT_TASK_CONTROL,
+ IPC_LABEL_SUBST_TASK, IPC_KOBJECT_ALLOC_NONE);
+ kport->ip_alt_port = pport;
+ } else {
+ kport = ipc_kobject_alloc_port(IKO_NULL, IKOT_TASK_CONTROL,
+ IPC_KOBJECT_ALLOC_NONE);
+
+ pport = kport;
}
nport = ipc_port_alloc_kernel();
panic("ipc_task_init");
}
+ if (pport == IP_NULL) {
+ panic("ipc_task_init");
+ }
+
itk_lock_init(task);
- task->itk_self[TASK_FLAVOR_CONTROL] = kport;
- task->itk_self[TASK_FLAVOR_NAME] = nport;
+ task->itk_task_ports[TASK_FLAVOR_CONTROL] = kport;
+ task->itk_task_ports[TASK_FLAVOR_NAME] = nport;
/* Lazily allocated on-demand */
- task->itk_self[TASK_FLAVOR_INSPECT] = IP_NULL;
- task->itk_self[TASK_FLAVOR_READ] = IP_NULL;
- task->itk_resume = IP_NULL;
+ task->itk_task_ports[TASK_FLAVOR_INSPECT] = IP_NULL;
+ task->itk_task_ports[TASK_FLAVOR_READ] = IP_NULL;
+ task->itk_dyld_notify = NULL;
+ task->itk_self = pport;
+ task->itk_resume = IP_NULL; /* Lazily allocated on-demand */
if (task_is_a_corpse_fork(task)) {
/*
* No sender's notification for corpse would not
}
} else {
itk_lock(parent);
- assert(parent->itk_self[TASK_FLAVOR_CONTROL] != IP_NULL);
+ assert(parent->itk_task_ports[TASK_FLAVOR_CONTROL] != IP_NULL);
/* inherit registered ports */
ipc_port_t nport;
ipc_port_t iport;
ipc_port_t rdport;
+ ipc_port_t pport;
itk_lock(task);
- kport = task->itk_self[TASK_FLAVOR_CONTROL];
+
+ assert(!task->ipc_active || task_is_a_corpse(task));
+ task->ipc_active = true;
+
+ kport = task->itk_task_ports[TASK_FLAVOR_CONTROL];
if (kport != IP_NULL) {
ipc_kobject_set(kport, (ipc_kobject_t) task, IKOT_TASK_CONTROL);
}
- nport = task->itk_self[TASK_FLAVOR_NAME];
+ nport = task->itk_task_ports[TASK_FLAVOR_NAME];
if (nport != IP_NULL) {
ipc_kobject_set(nport, (ipc_kobject_t) task, IKOT_TASK_NAME);
}
- iport = task->itk_self[TASK_FLAVOR_INSPECT];
+ iport = task->itk_task_ports[TASK_FLAVOR_INSPECT];
if (iport != IP_NULL) {
ipc_kobject_set(iport, (ipc_kobject_t) task, IKOT_TASK_INSPECT);
}
- rdport = task->itk_self[TASK_FLAVOR_READ];
+ rdport = task->itk_task_ports[TASK_FLAVOR_READ];
if (rdport != IP_NULL) {
ipc_kobject_set(rdport, (ipc_kobject_t) task, IKOT_TASK_READ);
}
+ pport = task->itk_self;
+ if (immovable_control_port_enabled && pport != IP_NULL) {
+ ipc_kobject_set(pport, (ipc_kobject_t) task, IKOT_TASK_CONTROL);
+ }
itk_unlock(task);
}
ipc_port_t iport;
ipc_port_t rdport;
ipc_port_t rport;
+ ipc_port_t pport;
itk_lock(task);
- kport = task->itk_self[TASK_FLAVOR_CONTROL];
+
+ /*
+ * This innocuous looking line is load bearing.
+ *
+ * It is used to disable the creation of lazy made ports.
+ * We must do so before we drop the last reference on the task,
+ * as task ports do not own a reference on the task, and
+ * convert_port_to_task* will crash trying to resurect a task.
+ */
+ task->ipc_active = false;
+
+ kport = task->itk_task_ports[TASK_FLAVOR_CONTROL];
if (kport != IP_NULL) {
- ipc_kobject_set(kport, IKO_NULL, IKOT_NONE);
+ ip_lock(kport);
+ kport->ip_alt_port = IP_NULL;
+ ipc_kobject_set_atomically(kport, IKO_NULL, IKOT_NONE);
+ ip_unlock(kport);
}
- nport = task->itk_self[TASK_FLAVOR_NAME];
+ nport = task->itk_task_ports[TASK_FLAVOR_NAME];
if (nport != IP_NULL) {
ipc_kobject_set(nport, IKO_NULL, IKOT_NONE);
}
- iport = task->itk_self[TASK_FLAVOR_INSPECT];
+ iport = task->itk_task_ports[TASK_FLAVOR_INSPECT];
if (iport != IP_NULL) {
ipc_kobject_set(iport, IKO_NULL, IKOT_NONE);
}
- rdport = task->itk_self[TASK_FLAVOR_READ];
+ rdport = task->itk_task_ports[TASK_FLAVOR_READ];
if (rdport != IP_NULL) {
ipc_kobject_set(rdport, IKO_NULL, IKOT_NONE);
}
+ pport = task->itk_self;
+ if (pport != kport && pport != IP_NULL) {
+ assert(immovable_control_port_enabled);
+ assert(pport->ip_immovable_send);
+ ipc_kobject_set(pport, IKO_NULL, IKOT_NONE);
+ }
rport = task->itk_resume;
if (rport != IP_NULL) {
ipc_port_t iport;
ipc_port_t rdport;
ipc_port_t rport;
- int i;
+ ipc_port_t pport;
+ ipc_port_t sself;
+ ipc_port_t *notifiers_ptr = NULL;
itk_lock(task);
- kport = task->itk_self[TASK_FLAVOR_CONTROL];
+
+ /*
+ * If we ever failed to clear ipc_active before the last reference
+ * was dropped, lazy ports might be made and used after the last
+ * reference is dropped and cause use after free (see comment in
+ * ipc_task_disable()).
+ */
+ assert(!task->ipc_active);
+
+ kport = task->itk_task_ports[TASK_FLAVOR_CONTROL];
+ sself = task->itk_settable_self;
if (kport == IP_NULL) {
/* the task is already terminated (can this happen?) */
itk_unlock(task);
return;
}
- task->itk_self[TASK_FLAVOR_CONTROL] = IP_NULL;
+ task->itk_task_ports[TASK_FLAVOR_CONTROL] = IP_NULL;
- rdport = task->itk_self[TASK_FLAVOR_READ];
- task->itk_self[TASK_FLAVOR_READ] = IP_NULL;
+ rdport = task->itk_task_ports[TASK_FLAVOR_READ];
+ task->itk_task_ports[TASK_FLAVOR_READ] = IP_NULL;
- iport = task->itk_self[TASK_FLAVOR_INSPECT];
- task->itk_self[TASK_FLAVOR_INSPECT] = IP_NULL;
+ iport = task->itk_task_ports[TASK_FLAVOR_INSPECT];
+ task->itk_task_ports[TASK_FLAVOR_INSPECT] = IP_NULL;
- nport = task->itk_self[TASK_FLAVOR_NAME];
+ nport = task->itk_task_ports[TASK_FLAVOR_NAME];
assert(nport != IP_NULL);
- task->itk_self[TASK_FLAVOR_NAME] = IP_NULL;
+ task->itk_task_ports[TASK_FLAVOR_NAME] = IP_NULL;
+
+ if (task->itk_dyld_notify) {
+ notifiers_ptr = task->itk_dyld_notify;
+ task->itk_dyld_notify = NULL;
+ }
+
+ if (immovable_control_port_enabled) {
+ pport = task->itk_self;
+ assert(pport != IP_NULL);
+ }
+
+ task->itk_self = IP_NULL;
rport = task->itk_resume;
task->itk_resume = IP_NULL;
itk_unlock(task);
/* release the naked send rights */
+ if (IP_VALID(sself)) {
+ ipc_port_release_send(sself);
+ }
- if (IP_VALID(task->itk_settable_self)) {
- ipc_port_release_send(task->itk_settable_self);
+ if (notifiers_ptr) {
+ for (int i = 0; i < DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT; i++) {
+ if (IP_VALID(notifiers_ptr[i])) {
+ ipc_port_release_send(notifiers_ptr[i]);
+ }
+ }
+ kfree(notifiers_ptr, DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT * sizeof(ipc_port_t));
}
- for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; i++) {
+ for (int i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; i++) {
if (IP_VALID(task->exc_actions[i].port)) {
ipc_port_release_send(task->exc_actions[i].port);
}
ipc_port_release_send(task->itk_debug_control);
}
- for (i = 0; i < TASK_PORT_REGISTER_MAX; i++) {
+ for (int i = 0; i < TASK_PORT_REGISTER_MAX; i++) {
if (IP_VALID(task->itk_registered[i])) {
ipc_port_release_send(task->itk_registered[i]);
}
}
/* destroy the kernel ports */
+ if (immovable_control_port_enabled) {
+ ip_lock(kport);
+ kport->ip_alt_port = IP_NULL;
+ ipc_kobject_set_atomically(kport, IKO_NULL, IKOT_NONE);
+ ip_unlock(kport);
+
+ /* pport == kport if immovability is off */
+ ipc_port_dealloc_kernel(pport);
+ }
ipc_port_dealloc_kernel(kport);
ipc_port_dealloc_kernel(nport);
if (iport != IP_NULL) {
ipc_task_reset(
task_t task)
{
- ipc_port_t old_kport, new_kport;
+ ipc_port_t old_kport, old_pport, new_kport, new_pport;
ipc_port_t old_sself;
ipc_port_t old_rdport;
ipc_port_t old_iport;
ipc_port_t old_exc_actions[EXC_TYPES_COUNT];
- int i;
+ ipc_port_t *notifiers_ptr = NULL;
#if CONFIG_MACF
/* Fresh label to unset credentials in existing labels. */
struct label *unset_label = mac_exc_create_label();
#endif
- new_kport = ipc_kobject_alloc_port((ipc_kobject_t)task, IKOT_TASK_CONTROL,
- IPC_KOBJECT_ALLOC_MAKE_SEND);
+ if (immovable_control_port_enabled) {
+ ipc_kobject_alloc_options_t options = IPC_KOBJECT_ALLOC_IMMOVABLE_SEND;
+ if (pinned_control_port_enabled) {
+ options |= IPC_KOBJECT_ALLOC_PINNED;
+ }
+
+ new_pport = ipc_kobject_alloc_port((ipc_kobject_t)task,
+ IKOT_TASK_CONTROL, options);
+
+ new_kport = ipc_kobject_alloc_labeled_port((ipc_kobject_t)task,
+ IKOT_TASK_CONTROL, IPC_LABEL_SUBST_TASK,
+ IPC_KOBJECT_ALLOC_NONE);
+ new_kport->ip_alt_port = new_pport;
+ } else {
+ new_kport = ipc_kobject_alloc_port((ipc_kobject_t)task,
+ IKOT_TASK_CONTROL, IPC_KOBJECT_ALLOC_NONE);
+
+ new_pport = new_kport;
+ }
itk_lock(task);
- old_kport = task->itk_self[TASK_FLAVOR_CONTROL];
- old_rdport = task->itk_self[TASK_FLAVOR_READ];
- old_iport = task->itk_self[TASK_FLAVOR_INSPECT];
+ old_kport = task->itk_task_ports[TASK_FLAVOR_CONTROL];
+ old_rdport = task->itk_task_ports[TASK_FLAVOR_READ];
+ old_iport = task->itk_task_ports[TASK_FLAVOR_INSPECT];
- if (old_kport == IP_NULL) {
+ old_pport = task->itk_self;
+
+ if (old_pport == IP_NULL) {
/* the task is already terminated (can this happen?) */
itk_unlock(task);
- ipc_port_release_send(new_kport);
ipc_port_dealloc_kernel(new_kport);
+ if (immovable_control_port_enabled) {
+ ipc_port_dealloc_kernel(new_pport);
+ }
#if CONFIG_MACF
mac_exc_free_label(unset_label);
#endif
}
old_sself = task->itk_settable_self;
- task->itk_settable_self = task->itk_self[TASK_FLAVOR_CONTROL] = new_kport;
+ task->itk_task_ports[TASK_FLAVOR_CONTROL] = new_kport;
+ task->itk_self = new_pport;
+
+ task->itk_settable_self = ipc_port_make_send(new_kport);
/* Set the old kport to IKOT_NONE and update the exec token while under the port lock */
ip_lock(old_kport);
+ old_kport->ip_alt_port = IP_NULL;
ipc_kobject_set_atomically(old_kport, IKO_NULL, IKOT_NONE);
task->exec_token += 1;
ip_unlock(old_kport);
/* Reset the read and inspect flavors of task port */
- task->itk_self[TASK_FLAVOR_READ] = IP_NULL;
- task->itk_self[TASK_FLAVOR_INSPECT] = IP_NULL;
+ task->itk_task_ports[TASK_FLAVOR_READ] = IP_NULL;
+ task->itk_task_ports[TASK_FLAVOR_INSPECT] = IP_NULL;
- for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; i++) {
+ if (immovable_control_port_enabled) {
+ ip_lock(old_pport);
+ ipc_kobject_set_atomically(old_pport, IKO_NULL, IKOT_NONE);
+ task->exec_token += 1;
+ ip_unlock(old_pport);
+ }
+
+ for (int i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; i++) {
old_exc_actions[i] = IP_NULL;
if (i == EXC_CORPSE_NOTIFY && task_corpse_pending_report(task)) {
}
task->itk_debug_control = IP_NULL;
+ if (task->itk_dyld_notify) {
+ notifiers_ptr = task->itk_dyld_notify;
+ task->itk_dyld_notify = NULL;
+ }
+
itk_unlock(task);
#if CONFIG_MACF
ipc_port_release_send(old_sself);
}
- for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; i++) {
+ if (notifiers_ptr) {
+ for (int i = 0; i < DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT; i++) {
+ if (IP_VALID(notifiers_ptr[i])) {
+ ipc_port_release_send(notifiers_ptr[i]);
+ }
+ }
+ kfree(notifiers_ptr, DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT * sizeof(ipc_port_t));
+ }
+
+ for (int i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; i++) {
if (IP_VALID(old_exc_actions[i])) {
ipc_port_release_send(old_exc_actions[i]);
}
/* destroy all task port flavors */
ipc_port_dealloc_kernel(old_kport);
+ if (immovable_control_port_enabled) {
+ ipc_port_dealloc_kernel(old_pport);
+ }
if (old_rdport != IP_NULL) {
ipc_port_dealloc_kernel(old_rdport);
}
void
ipc_thread_init(
- thread_t thread)
+ thread_t thread,
+ ipc_thread_init_options_t options)
{
ipc_port_t kport;
+ ipc_port_t pport;
+ ipc_kobject_alloc_options_t alloc_options = IPC_KOBJECT_ALLOC_NONE;
+
+ /*
+ * Having immovable_control_port_enabled boot-arg set does not guarantee
+ * thread control port should be made immovable/pinned, also check options.
+ *
+ * raw mach threads created via thread_create() have neither of INIT_PINNED
+ * or INIT_IMMOVABLE set.
+ */
+ if (immovable_control_port_enabled && (options & IPC_THREAD_INIT_IMMOVABLE)) {
+ alloc_options |= IPC_KOBJECT_ALLOC_IMMOVABLE_SEND;
+
+ if (pinned_control_port_enabled && (options & IPC_THREAD_INIT_PINNED)) {
+ alloc_options |= IPC_KOBJECT_ALLOC_PINNED;
+ }
+
+ pport = ipc_kobject_alloc_port((ipc_kobject_t)thread,
+ IKOT_THREAD_CONTROL, alloc_options);
+
+ kport = ipc_kobject_alloc_labeled_port((ipc_kobject_t)thread,
+ IKOT_THREAD_CONTROL, IPC_LABEL_SUBST_THREAD, IPC_KOBJECT_ALLOC_NONE);
+ kport->ip_alt_port = pport;
+ } else {
+ kport = ipc_kobject_alloc_port((ipc_kobject_t)thread,
+ IKOT_THREAD_CONTROL, IPC_KOBJECT_ALLOC_NONE);
+
+ pport = kport;
+ }
+
+ thread->ith_thread_ports[THREAD_FLAVOR_CONTROL] = kport;
+
+ thread->ith_settable_self = ipc_port_make_send(kport);
- kport = ipc_kobject_alloc_port((ipc_kobject_t)thread, IKOT_THREAD_CONTROL,
- IPC_KOBJECT_ALLOC_MAKE_SEND);
+ thread->ith_self = pport;
- thread->ith_settable_self = thread->ith_self[THREAD_FLAVOR_CONTROL] = kport;
- thread->ith_self[THREAD_FLAVOR_INSPECT] = IP_NULL;
- thread->ith_self[THREAD_FLAVOR_READ] = IP_NULL;
thread->ith_special_reply_port = NULL;
thread->exc_actions = NULL;
thread->ith_assertions = 0;
#endif
+ thread->ipc_active = true;
ipc_kmsg_queue_init(&thread->ith_messages);
thread->ith_rpc_reply = IP_NULL;
ipc_thread_disable(
thread_t thread)
{
- ipc_port_t kport = thread->ith_self[THREAD_FLAVOR_CONTROL];
- ipc_port_t iport = thread->ith_self[THREAD_FLAVOR_INSPECT];
- ipc_port_t rdport = thread->ith_self[THREAD_FLAVOR_READ];
+ ipc_port_t kport = thread->ith_thread_ports[THREAD_FLAVOR_CONTROL];
+ ipc_port_t iport = thread->ith_thread_ports[THREAD_FLAVOR_INSPECT];
+ ipc_port_t rdport = thread->ith_thread_ports[THREAD_FLAVOR_READ];
+ ipc_port_t pport = thread->ith_self;
+
+ /*
+ * This innocuous looking line is load bearing.
+ *
+ * It is used to disable the creation of lazy made ports.
+ * We must do so before we drop the last reference on the thread,
+ * as thread ports do not own a reference on the thread, and
+ * convert_port_to_thread* will crash trying to resurect a thread.
+ */
+ thread->ipc_active = false;
if (kport != IP_NULL) {
- ipc_kobject_set(kport, IKO_NULL, IKOT_NONE);
+ ip_lock(kport);
+ kport->ip_alt_port = IP_NULL;
+ ipc_kobject_set_atomically(kport, IKO_NULL, IKOT_NONE);
+ ip_unlock(kport);
}
if (iport != IP_NULL) {
ipc_kobject_set(rdport, IKO_NULL, IKOT_NONE);
}
+ if (pport != kport && pport != IP_NULL) {
+ assert(immovable_control_port_enabled);
+ assert(pport->ip_immovable_send);
+ ipc_kobject_set(pport, IKO_NULL, IKOT_NONE);
+ }
+
/* unbind the thread special reply port */
if (IP_VALID(thread->ith_special_reply_port)) {
ipc_port_unbind_special_reply_port(thread, TRUE);
ipc_port_t iport = IP_NULL;
ipc_port_t rdport = IP_NULL;
ipc_port_t ith_rpc_reply = IP_NULL;
+ ipc_port_t pport = IP_NULL;
thread_mtx_lock(thread);
- kport = thread->ith_self[THREAD_FLAVOR_CONTROL];
- iport = thread->ith_self[THREAD_FLAVOR_INSPECT];
- rdport = thread->ith_self[THREAD_FLAVOR_READ];
+ /*
+ * If we ever failed to clear ipc_active before the last reference
+ * was dropped, lazy ports might be made and used after the last
+ * reference is dropped and cause use after free (see comment in
+ * ipc_thread_disable()).
+ */
+ assert(!thread->ipc_active);
+
+ kport = thread->ith_thread_ports[THREAD_FLAVOR_CONTROL];
+ iport = thread->ith_thread_ports[THREAD_FLAVOR_INSPECT];
+ rdport = thread->ith_thread_ports[THREAD_FLAVOR_READ];
+ pport = thread->ith_self;
if (kport != IP_NULL) {
if (IP_VALID(thread->ith_settable_self)) {
ipc_port_release_send(thread->ith_settable_self);
}
- thread->ith_settable_self = thread->ith_self[THREAD_FLAVOR_CONTROL] = IP_NULL;
- thread->ith_self[THREAD_FLAVOR_INSPECT] = IP_NULL;
- thread->ith_self[THREAD_FLAVOR_READ] = IP_NULL;
+ thread->ith_thread_ports[THREAD_FLAVOR_CONTROL] = IP_NULL;
+ thread->ith_thread_ports[THREAD_FLAVOR_READ] = IP_NULL;
+ thread->ith_thread_ports[THREAD_FLAVOR_INSPECT] = IP_NULL;
+ thread->ith_settable_self = IP_NULL;
+ thread->ith_self = IP_NULL;
if (thread->exc_actions != NULL) {
for (int i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; ++i) {
thread_mtx_unlock(thread);
+ if (pport != kport && pport != IP_NULL) {
+ /* this thread has immovable contorl port */
+ ip_lock(kport);
+ kport->ip_alt_port = IP_NULL;
+ ipc_kobject_set_atomically(kport, IKO_NULL, IKOT_NONE);
+ ip_unlock(kport);
+ ipc_port_dealloc_kernel(pport);
+ }
if (kport != IP_NULL) {
ipc_port_dealloc_kernel(kport);
}
ipc_thread_reset(
thread_t thread)
{
- ipc_port_t old_kport, new_kport;
+ ipc_port_t old_kport, new_kport, old_pport, new_pport;
ipc_port_t old_sself;
ipc_port_t old_rdport;
ipc_port_t old_iport;
ipc_port_t old_exc_actions[EXC_TYPES_COUNT];
boolean_t has_old_exc_actions = FALSE;
+ boolean_t thread_is_immovable, thread_is_pinned;
int i;
#if CONFIG_MACF
struct label *new_label = mac_exc_create_label();
#endif
- new_kport = ipc_kobject_alloc_port((ipc_kobject_t)thread, IKOT_THREAD_CONTROL,
- IPC_KOBJECT_ALLOC_MAKE_SEND);
+ thread_is_immovable = thread->ith_self->ip_immovable_send;
+ thread_is_pinned = thread->ith_self->ip_pinned;
+
+ if (thread_is_immovable) {
+ ipc_kobject_alloc_options_t alloc_options = IPC_KOBJECT_ALLOC_NONE;
+
+ if (thread_is_pinned) {
+ assert(pinned_control_port_enabled);
+ alloc_options |= IPC_KOBJECT_ALLOC_PINNED;
+ }
+ if (thread_is_immovable) {
+ alloc_options |= IPC_KOBJECT_ALLOC_IMMOVABLE_SEND;
+ }
+ new_pport = ipc_kobject_alloc_port((ipc_kobject_t)thread,
+ IKOT_THREAD_CONTROL, alloc_options);
+
+ new_kport = ipc_kobject_alloc_labeled_port((ipc_kobject_t)thread,
+ IKOT_THREAD_CONTROL, IPC_LABEL_SUBST_THREAD,
+ IPC_KOBJECT_ALLOC_NONE);
+ new_kport->ip_alt_port = new_pport;
+ } else {
+ new_kport = ipc_kobject_alloc_port((ipc_kobject_t)thread,
+ IKOT_THREAD_CONTROL, IPC_KOBJECT_ALLOC_NONE);
+
+ new_pport = new_kport;
+ }
thread_mtx_lock(thread);
- old_kport = thread->ith_self[THREAD_FLAVOR_CONTROL];
- old_rdport = thread->ith_self[THREAD_FLAVOR_READ];
- old_iport = thread->ith_self[THREAD_FLAVOR_INSPECT];
+ old_kport = thread->ith_thread_ports[THREAD_FLAVOR_CONTROL];
+ old_rdport = thread->ith_thread_ports[THREAD_FLAVOR_READ];
+ old_iport = thread->ith_thread_ports[THREAD_FLAVOR_INSPECT];
+
old_sself = thread->ith_settable_self;
+ old_pport = thread->ith_self;
if (old_kport == IP_NULL && thread->inspection == FALSE) {
- /* the is already terminated (can this happen?) */
+ /* thread is already terminated (can this happen?) */
thread_mtx_unlock(thread);
- ipc_port_release_send(new_kport);
ipc_port_dealloc_kernel(new_kport);
+ if (thread_is_immovable) {
+ ipc_port_dealloc_kernel(new_pport);
+ }
#if CONFIG_MACF
mac_exc_free_label(new_label);
#endif
return;
}
- thread->ith_settable_self = thread->ith_self[THREAD_FLAVOR_CONTROL] = new_kport;
- thread->ith_self[THREAD_FLAVOR_READ] = IP_NULL;
- thread->ith_self[THREAD_FLAVOR_INSPECT] = IP_NULL;
+ thread->ipc_active = true;
+ thread->ith_thread_ports[THREAD_FLAVOR_CONTROL] = new_kport;
+ thread->ith_self = new_pport;
+ thread->ith_settable_self = ipc_port_make_send(new_kport);
+ thread->ith_thread_ports[THREAD_FLAVOR_INSPECT] = IP_NULL;
+ thread->ith_thread_ports[THREAD_FLAVOR_READ] = IP_NULL;
if (old_kport != IP_NULL) {
- ipc_kobject_set(old_kport, IKO_NULL, IKOT_NONE);
+ ip_lock(old_kport);
+ old_kport->ip_alt_port = IP_NULL;
+ ipc_kobject_set_atomically(old_kport, IKO_NULL, IKOT_NONE);
+ ip_unlock(old_kport);
}
if (old_rdport != IP_NULL) {
ipc_kobject_set(old_rdport, IKO_NULL, IKOT_NONE);
if (old_iport != IP_NULL) {
ipc_kobject_set(old_iport, IKO_NULL, IKOT_NONE);
}
+ if (thread_is_immovable && old_pport != IP_NULL) {
+ ipc_kobject_set(old_pport, IKO_NULL, IKOT_NONE);
+ }
/*
* Only ports that were set by root-owned processes
ipc_port_dealloc_kernel(old_iport);
}
+ if (thread_is_immovable && old_pport != IP_NULL) {
+ ipc_port_dealloc_kernel(old_pport);
+ }
+
/* unbind the thread special reply port */
if (IP_VALID(thread->ith_special_reply_port)) {
ipc_port_unbind_special_reply_port(thread, TRUE);
retrieve_task_self_fast(
task_t task)
{
- __assert_only ipc_port_t sright;
- ipc_port_t port;
+ ipc_port_t port = IP_NULL;
assert(task == current_task());
itk_lock(task);
- assert(task->itk_self[TASK_FLAVOR_CONTROL] != IP_NULL);
-
- if ((port = task->itk_settable_self) == task->itk_self[TASK_FLAVOR_CONTROL]) {
- /* no interposing */
- sright = ipc_port_copy_send(port);
- assert(sright == port);
+ assert(task->itk_self != IP_NULL);
+
+ if (task->itk_settable_self == task->itk_task_ports[TASK_FLAVOR_CONTROL]) {
+ /* no interposing, return the IMMOVABLE port */
+ port = ipc_port_make_send(task->itk_self);
+ if (immovable_control_port_enabled) {
+ assert(port->ip_immovable_send == 1);
+ if (pinned_control_port_enabled) {
+ /* pinned port is also immovable */
+ assert(port->ip_pinned == 1);
+ }
+ }
} else {
- port = ipc_port_copy_send(port);
+ port = ipc_port_copy_send(task->itk_settable_self);
}
itk_unlock(task);
return port;
}
+/*
+ * Routine: mach_task_is_self
+ * Purpose:
+ * [MIG call] Checks if the task (control/read/inspect/name/movable)
+ * port is pointing to current_task.
+ */
+kern_return_t
+mach_task_is_self(
+ task_t task,
+ boolean_t *is_self)
+{
+ if (task == TASK_NULL) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ *is_self = (task == current_task());
+
+ return KERN_SUCCESS;
+}
+
/*
* Routine: retrieve_thread_self_fast
* Purpose:
retrieve_thread_self_fast(
thread_t thread)
{
- __assert_only ipc_port_t sright;
- ipc_port_t port;
+ ipc_port_t port = IP_NULL;
assert(thread == current_thread());
thread_mtx_lock(thread);
- assert(thread->ith_self[THREAD_FLAVOR_CONTROL] != IP_NULL);
+ assert(thread->ith_self != IP_NULL);
- if ((port = thread->ith_settable_self) == thread->ith_self[THREAD_FLAVOR_CONTROL]) {
- /* no interposing */
- sright = ipc_port_copy_send(port);
- assert(sright == port);
+ if (thread->ith_settable_self == thread->ith_thread_ports[THREAD_FLAVOR_CONTROL]) {
+ /* no interposing, return IMMOVABLE_PORT */
+ port = ipc_port_make_send(thread->ith_self);
} else {
- port = ipc_port_copy_send(port);
+ port = ipc_port_copy_send(thread->ith_settable_self);
}
thread_mtx_unlock(thread);
int which,
ipc_port_t *portp);
-kern_return_t
-static
+static kern_return_t
thread_get_special_port_internal(
thread_inspect_t thread,
int which,
}
*portp = port;
-
return KERN_SUCCESS;
}
return thread_get_special_port_internal(thread, which, portp, THREAD_FLAVOR_CONTROL);
}
+static ipc_port_t
+thread_get_non_substituted_self(thread_t thread)
+{
+ ipc_port_t port = IP_NULL;
+
+ thread_mtx_lock(thread);
+ port = thread->ith_settable_self;
+ if (IP_VALID(port)) {
+ ip_reference(port);
+ }
+ thread_mtx_unlock(thread);
+
+ if (IP_VALID(port)) {
+ /* consumes the port reference */
+ return ipc_kobject_alloc_subst_once(port);
+ }
+
+ return port;
+}
+
kern_return_t
thread_get_special_port_from_user(
mach_port_t port,
ipc_port_t *portp)
{
ipc_kobject_type_t kotype;
- kern_return_t kr;
+ mach_thread_flavor_t flavor;
+ kern_return_t kr = KERN_SUCCESS;
- thread_t thread = convert_port_to_thread_check_type(port, &kotype, THREAD_FLAVOR_INSPECT, FALSE);
+ thread_t thread = convert_port_to_thread_check_type(port, &kotype,
+ THREAD_FLAVOR_INSPECT, FALSE);
if (thread == THREAD_NULL) {
return KERN_INVALID_ARGUMENT;
}
+ if (which == THREAD_KERNEL_PORT && thread->task == current_task()) {
+#if CONFIG_MACF
+ /*
+ * only check for threads belong to current_task,
+ * because foreign thread ports are always movable
+ */
+ if (mac_task_check_get_movable_control_port()) {
+ kr = KERN_DENIED;
+ goto out;
+ }
+#endif
+ if (kotype == IKOT_THREAD_CONTROL) {
+ *portp = thread_get_non_substituted_self(thread);
+ goto out;
+ }
+ }
+
switch (kotype) {
case IKOT_THREAD_CONTROL:
- kr = thread_get_special_port_internal(thread, which, portp, THREAD_FLAVOR_CONTROL);
+ flavor = THREAD_FLAVOR_CONTROL;
break;
case IKOT_THREAD_READ:
- kr = thread_get_special_port_internal(thread, which, portp, THREAD_FLAVOR_READ);
+ flavor = THREAD_FLAVOR_READ;
break;
case IKOT_THREAD_INSPECT:
- kr = thread_get_special_port_internal(thread, which, portp, THREAD_FLAVOR_INSPECT);
+ flavor = THREAD_FLAVOR_INSPECT;
break;
default:
panic("strange kobject type");
- break;
}
+ kr = thread_get_special_port_internal(thread, which, portp, flavor);
+out:
thread_deallocate(thread);
return kr;
}
* Returns:
* KERN_SUCCESS Changed the special port.
* KERN_INVALID_ARGUMENT The thread is null.
+ * KERN_INVALID_RIGHT Port is marked as immovable.
* KERN_FAILURE The thread is dead.
* KERN_INVALID_ARGUMENT Invalid special port.
* KERN_NO_ACCESS Restricted access to set port.
thread_set_special_port(
thread_t thread,
int which,
- ipc_port_t port)
+ ipc_port_t port)
{
kern_return_t result = KERN_SUCCESS;
ipc_port_t *whichp, old = IP_NULL;
return KERN_INVALID_ARGUMENT;
}
+ if (IP_VALID(port) && (port->ip_immovable_receive || port->ip_immovable_send)) {
+ return KERN_INVALID_RIGHT;
+ }
+
switch (which) {
case THREAD_KERNEL_PORT:
#if CONFIG_CSR
* Conditions:
* Nothing locked.
* Returns:
- * KERN_SUCCESS Extracted a send right.
+ * KERN_SUCCESS Extracted a send right.
* KERN_INVALID_ARGUMENT The task is null.
- * KERN_FAILURE The task/space is dead.
+ * KERN_FAILURE The task/space is dead.
* KERN_INVALID_ARGUMENT Invalid special port.
*/
}
itk_lock(task);
- if (task->itk_self[TASK_FLAVOR_CONTROL] == IP_NULL) {
+ if (!task->ipc_active) {
itk_unlock(task);
return KERN_FAILURE;
}
switch (which) {
case TASK_KERNEL_PORT:
port = ipc_port_copy_send(task->itk_settable_self);
+ itk_unlock(task);
break;
case TASK_READ_PORT:
/* convert_task_to_port_with_flavor consumes a task reference */
task_reference(task);
port = convert_task_to_port_with_flavor(task, current_flavor);
- goto copyout;
+ break;
case TASK_NAME_PORT:
- port = ipc_port_make_send(task->itk_self[TASK_FLAVOR_NAME]);
+ port = ipc_port_make_send(task->itk_task_ports[TASK_FLAVOR_NAME]);
+ itk_unlock(task);
break;
case TASK_HOST_PORT:
port = ipc_port_copy_send(task->itk_host);
+ itk_unlock(task);
break;
case TASK_BOOTSTRAP_PORT:
port = ipc_port_copy_send(task->itk_bootstrap);
+ itk_unlock(task);
break;
case TASK_SEATBELT_PORT:
port = ipc_port_copy_send(task->itk_seatbelt);
+ itk_unlock(task);
break;
case TASK_ACCESS_PORT:
port = ipc_port_copy_send(task->itk_task_access);
+ itk_unlock(task);
break;
case TASK_DEBUG_CONTROL_PORT:
port = ipc_port_copy_send(task->itk_debug_control);
+ itk_unlock(task);
break;
default:
return KERN_INVALID_ARGUMENT;
}
- itk_unlock(task);
-
-copyout:
*portp = port;
return KERN_SUCCESS;
}
return task_get_special_port_internal(task, which, portp, TASK_FLAVOR_CONTROL);
}
+static ipc_port_t
+task_get_non_substituted_self(task_t task)
+{
+ ipc_port_t port = IP_NULL;
+
+ itk_lock(task);
+ port = task->itk_settable_self;
+ if (IP_VALID(port)) {
+ ip_reference(port);
+ }
+ itk_unlock(task);
+
+ if (IP_VALID(port)) {
+ /* consumes the port reference */
+ return ipc_kobject_alloc_subst_once(port);
+ }
+
+ return port;
+}
kern_return_t
task_get_special_port_from_user(
mach_port_t port,
ipc_port_t *portp)
{
ipc_kobject_type_t kotype;
- kern_return_t kr;
+ mach_task_flavor_t flavor;
+ kern_return_t kr = KERN_SUCCESS;
- task_t task = convert_port_to_task_check_type(port, &kotype, TASK_FLAVOR_INSPECT, FALSE);
+ task_t task = convert_port_to_task_check_type(port, &kotype,
+ TASK_FLAVOR_INSPECT, FALSE);
if (task == TASK_NULL) {
return KERN_INVALID_ARGUMENT;
}
+ if (which == TASK_KERNEL_PORT && task == current_task()) {
+#if CONFIG_MACF
+ /*
+ * only check for current_task,
+ * because foreign task ports are always movable
+ */
+ if (mac_task_check_get_movable_control_port()) {
+ kr = KERN_DENIED;
+ goto out;
+ }
+#endif
+ if (kotype == IKOT_TASK_CONTROL) {
+ *portp = task_get_non_substituted_self(task);
+ goto out;
+ }
+ }
+
switch (kotype) {
case IKOT_TASK_CONTROL:
- kr = task_get_special_port_internal(task, which, portp, TASK_FLAVOR_CONTROL);
+ flavor = TASK_FLAVOR_CONTROL;
break;
case IKOT_TASK_READ:
- kr = task_get_special_port_internal(task, which, portp, TASK_FLAVOR_READ);
+ flavor = TASK_FLAVOR_READ;
break;
case IKOT_TASK_INSPECT:
- kr = task_get_special_port_internal(task, which, portp, TASK_FLAVOR_INSPECT);
+ flavor = TASK_FLAVOR_INSPECT;
break;
default:
panic("strange kobject type");
- break;
}
+ kr = task_get_special_port_internal(task, which, portp, flavor);
+out:
task_deallocate(task);
return kr;
}
* Nothing locked. If successful, consumes
* the supplied send right.
* Returns:
- * KERN_SUCCESS Changed the special port.
+ * KERN_SUCCESS Changed the special port.
* KERN_INVALID_ARGUMENT The task is null.
- * KERN_FAILURE The task/space is dead.
+ * KERN_INVALID_RIGHT Port is marked as immovable.
+ * KERN_FAILURE The task/space is dead.
* KERN_INVALID_ARGUMENT Invalid special port.
- * KERN_NO_ACCESS Restricted access to set port.
+ * KERN_NO_ACCESS Restricted access to set port.
*/
kern_return_t
return KERN_NO_ACCESS;
}
+ if (IP_VALID(port) && (port->ip_immovable_receive || port->ip_immovable_send)) {
+ return KERN_INVALID_RIGHT;
+ }
+
switch (which) {
case TASK_KERNEL_PORT:
case TASK_HOST_PORT:
}
itk_lock(task);
- if (task->itk_self[TASK_FLAVOR_CONTROL] == IP_NULL) {
+ if (!task->ipc_active) {
rc = KERN_FAILURE;
goto out_unlock;
}
* Nothing locked. If successful, consumes
* the supplied rights and memory.
* Returns:
- * KERN_SUCCESS Stashed the port rights.
+ * KERN_SUCCESS Stashed the port rights.
+ * KERN_INVALID_RIGHT Port in array is marked immovable.
* KERN_INVALID_ARGUMENT The task is null.
* KERN_INVALID_ARGUMENT The task is dead.
* KERN_INVALID_ARGUMENT The memory param is null.
for (i = 0; i < portsCnt; i++) {
ports[i] = memory[i];
+ if (IP_VALID(ports[i]) && (ports[i]->ip_immovable_receive || ports[i]->ip_immovable_send)) {
+ return KERN_INVALID_RIGHT;
+ }
}
for (; i < TASK_PORT_REGISTER_MAX; i++) {
ports[i] = IP_NULL;
}
itk_lock(task);
- if (task->itk_self[TASK_FLAVOR_CONTROL] == IP_NULL) {
+ if (!task->ipc_active) {
itk_unlock(task);
return KERN_INVALID_ARGUMENT;
}
}
itk_lock(task);
- if (task->itk_self[TASK_FLAVOR_CONTROL] == IP_NULL) {
+ if (!task->ipc_active) {
itk_unlock(task);
kfree(memory, size);
* Conditions:
* Nothing locked, blocking OK.
*/
-task_t
+static task_t
convert_port_to_locked_task(ipc_port_t port, boolean_t eval)
{
int try_failed_count = 0;
* Conditions:
* Nothing locked, blocking OK.
*/
-task_inspect_t
+static task_inspect_t
convert_port_to_locked_task_inspect(ipc_port_t port)
{
int try_failed_count = 0;
* Conditions:
* Nothing locked, blocking OK.
*/
-task_read_t
-convert_port_to_locked_task_read(ipc_port_t port)
+static task_read_t
+convert_port_to_locked_task_read(
+ ipc_port_t port,
+ boolean_t eval)
{
int try_failed_count = 0;
while (IP_VALID(port)) {
+ task_t ct = current_task();
task_read_t task;
ip_lock(port);
ip_unlock(port);
return TASK_READ_NULL;
}
- task = (task_read_t)port->ip_kobject;
+ task = (task_read_t)ipc_kobject_get(port);
assert(task != TASK_READ_NULL);
+
+ if (eval && task_conversion_eval(ct, task)) {
+ ip_unlock(port);
+ return TASK_READ_NULL;
+ }
+
/*
* Normal lock ordering puts task_lock() before ip_lock().
* Attempt out-of-order locking here.
static task_read_t
convert_port_to_task_read_locked(
- ipc_port_t port)
+ ipc_port_t port,
+ boolean_t eval)
{
task_read_t task = TASK_READ_NULL;
if (ip_kotype(port) == IKOT_TASK_CONTROL ||
ip_kotype(port) == IKOT_TASK_READ) {
task_t ct = current_task();
- task = (task_t)port->ip_kobject;
+ task = (task_read_t)ipc_kobject_get(port);
assert(task != TASK_READ_NULL);
- if (task_conversion_eval(ct, task)) {
+ if (eval && task_conversion_eval(ct, task)) {
return TASK_READ_NULL;
}
break;
case IKOT_TASK_READ:
if (at_most >= TASK_FLAVOR_READ) {
- task = convert_port_to_task_read(port);
+ task = eval_check ? convert_port_to_task_read(port) : convert_port_to_task_read_no_eval(port);
if (task != TASK_READ_NULL) {
type = IKOT_TASK_READ;
}
break;
case IKOT_THREAD_READ:
if (at_most >= THREAD_FLAVOR_READ) {
- thread = convert_port_to_thread_read(port);
+ thread = eval_check ? convert_port_to_thread_read(port) : convert_port_to_thread_read_no_eval(port);
if (thread != THREAD_READ_NULL) {
type = IKOT_THREAD_READ;
}
break;
case IKOT_TASK_READ:
if (at_most >= TASK_FLAVOR_READ) {
- space = convert_port_to_space_read(port);
+ space = eval_check ? convert_port_to_space_read(port) : convert_port_to_space_read_no_eval(port);
if (space != IPC_SPACE_READ_NULL) {
type = IKOT_TASK_READ;
}
if (IP_VALID(port)) {
ip_lock(port);
if (ip_active(port)) {
- task = convert_port_to_task_read_locked(port);
+ task = convert_port_to_task_read_locked(port, TRUE);
+ }
+ ip_unlock(port);
+ }
+
+ return task;
+}
+
+static task_read_t
+convert_port_to_task_read_no_eval(
+ ipc_port_t port)
+{
+ task_read_t task = TASK_READ_NULL;
+
+ if (IP_VALID(port)) {
+ ip_lock(port);
+ if (ip_active(port)) {
+ task = convert_port_to_task_read_locked(port, FALSE);
}
ip_unlock(port);
}
task = convert_port_to_locked_task(port, eval);
break;
case TASK_FLAVOR_READ:
- task = convert_port_to_locked_task_read(port);
+ task = convert_port_to_locked_task_read(port, eval);
break;
case TASK_FLAVOR_INSPECT:
task = convert_port_to_locked_task_inspect(port);
return convert_port_to_space_with_flavor(port, TASK_FLAVOR_READ, TRUE);
}
+static ipc_space_read_t
+convert_port_to_space_read_no_eval(
+ ipc_port_t port)
+{
+ return convert_port_to_space_with_flavor(port, TASK_FLAVOR_READ, FALSE);
+}
+
ipc_space_inspect_t
convert_port_to_space_inspect(
ipc_port_t port)
switch (flavor) {
case TASK_FLAVOR_CONTROL:
- task = convert_port_to_locked_task(port, TRUE);
+ task = convert_port_to_locked_task(port, TRUE); /* always eval */
break;
case TASK_FLAVOR_READ:
- task = convert_port_to_locked_task_read(port);
+ task = convert_port_to_locked_task_read(port, TRUE); /* always eval */
break;
case TASK_FLAVOR_INSPECT:
- task = convert_port_to_locked_task_inspect(port);
+ task = convert_port_to_locked_task_inspect(port); /* always no eval */
break;
default:
task = TASK_NULL;
pmap_require(map->pmap);
}
- vm_map_reference_swap(map);
+ vm_map_reference(map);
task_unlock(task);
return map;
}
if (ip_kotype(port) == IKOT_THREAD_CONTROL ||
ip_kotype(port) == IKOT_THREAD_READ ||
ip_kotype(port) == IKOT_THREAD_INSPECT) {
- thread = (thread_inspect_t)port->ip_kobject;
+ thread = (thread_inspect_t)ipc_kobject_get(port);
assert(thread != THREAD_INSPECT_NULL);
thread_reference_internal((thread_t)thread);
}
*/
static thread_read_t
convert_port_to_thread_read_locked(
- ipc_port_t port)
+ ipc_port_t port,
+ boolean_t eval)
{
thread_read_t thread = THREAD_READ_NULL;
assert(thread != THREAD_READ_NULL);
/* Use task conversion rules for thread control conversions */
- if (task_conversion_eval(current_task(), thread->task) != KERN_SUCCESS) {
+ if (eval && task_conversion_eval(current_task(), thread->task) != KERN_SUCCESS) {
return THREAD_READ_NULL;
}
if (IP_VALID(port)) {
ip_lock(port);
if (ip_active(port)) {
- thread = convert_port_to_thread_read_locked(port);
+ thread = convert_port_to_thread_read_locked(port, TRUE);
+ }
+ ip_unlock(port);
+ }
+
+ return thread;
+}
+
+static thread_read_t
+convert_port_to_thread_read_no_eval(
+ ipc_port_t port)
+{
+ thread_read_t thread = THREAD_READ_NULL;
+
+ if (IP_VALID(port)) {
+ ip_lock(port);
+ if (ip_active(port)) {
+ thread = convert_port_to_thread_read_locked(port, FALSE);
}
ip_unlock(port);
}
thread_mtx_lock(thread);
- if (thread->ith_self[THREAD_FLAVOR_CONTROL] == IP_NULL) {
+ if (!thread->ipc_active) {
goto exit;
}
if (flavor == THREAD_FLAVOR_CONTROL) {
- port = ipc_port_make_send(thread->ith_self[flavor]);
+ port = ipc_port_make_send(thread->ith_thread_ports[flavor]);
} else {
- if (!thread->active) {
- goto exit;
- }
ipc_kobject_type_t kotype = (flavor == THREAD_FLAVOR_READ) ? IKOT_THREAD_READ : IKOT_THREAD_INSPECT;
/*
* Claim a send right on the thread read/inspect port, and request a no-senders
* send-once notification firing, and this is done under the thread mutex
* rather than with atomics.
*/
- (void)ipc_kobject_make_send_lazy_alloc_port(&thread->ith_self[flavor], (ipc_kobject_t)thread,
- kotype, false, 0);
- port = thread->ith_self[flavor];
+ (void)ipc_kobject_make_send_lazy_alloc_port(&thread->ith_thread_ports[flavor], (ipc_kobject_t)thread,
+ kotype, IPC_KOBJECT_ALLOC_IMMOVABLE_SEND, false, 0);
+ port = thread->ith_thread_ports[flavor];
}
exit:
if (MACH_PORT_VALID(name)) {
kr = ipc_port_translate_send(current_space(), name, &kport);
if (kr == KERN_SUCCESS) {
- tr = convert_port_to_task_read_locked(kport);
+ tr = convert_port_to_task_read_locked(kport, TRUE);
ip_unlock(kport);
}
}
* Purpose:
* Convert from a port name to a task reference
* A name of MACH_PORT_NULL is valid for the null task.
- * It doesnt run the task_conversion_eval check if the port
- * is of type IKOT_TASK_CONTROL.
+ * Skips task_conversion_eval() during conversion.
* Conditions:
* Nothing locked.
*/
if (MACH_PORT_VALID(name)) {
kr = ipc_port_translate_send(current_space(), name, &kport);
if (kr == KERN_SUCCESS) {
- switch (ip_kotype(kport)) {
- case IKOT_TASK_CONTROL:
- tr = convert_port_to_task_locked(kport, NULL, FALSE);
- break;
- case IKOT_TASK_READ:
- tr = convert_port_to_task_read_locked(kport);
- break;
- default:
- break;
- }
+ tr = convert_port_to_task_read_locked(kport, FALSE);
ip_unlock(kport);
}
}
return tr;
}
-/*
- * Routine: port_name_to_task_inspect
- * Purpose:
- * Convert from a port name to a task reference
- * A name of MACH_PORT_NULL is valid for the null task.
- * Conditions:
- * Nothing locked.
- */
-task_inspect_t
-port_name_to_task_inspect(
- mach_port_name_t name)
-{
- ipc_port_t kport;
- kern_return_t kr;
- task_inspect_t ti = TASK_INSPECT_NULL;
-
- if (MACH_PORT_VALID(name)) {
- kr = ipc_port_translate_send(current_space(), name, &kport);
- if (kr == KERN_SUCCESS) {
- ti = convert_port_to_task_inspect_locked(kport);
- ip_unlock(kport);
- }
- }
- return ti;
-}
-
/*
* Routine: port_name_to_task_name
* Purpose:
itk_lock(task);
+ if (!task->ipc_active) {
+ goto exit;
+ }
+
switch (flavor) {
case TASK_FLAVOR_CONTROL:
case TASK_FLAVOR_NAME:
- port = ipc_port_make_send(task->itk_self[flavor]);
+ port = ipc_port_make_send(task->itk_task_ports[flavor]);
break;
/*
* Claim a send right on the task read/inspect port, and request a no-senders
*/
case TASK_FLAVOR_READ:
case TASK_FLAVOR_INSPECT:
- if (task->itk_self[TASK_FLAVOR_CONTROL] == IP_NULL) {
- /* task is either disabled or terminated */
- goto exit;
- }
kotype = (flavor == TASK_FLAVOR_READ) ? IKOT_TASK_READ : IKOT_TASK_INSPECT;
- (void)ipc_kobject_make_send_lazy_alloc_port((ipc_port_t *) &task->itk_self[flavor],
- (ipc_kobject_t)task, kotype, true, OS_PTRAUTH_DISCRIMINATOR("task.itk_self"));
- port = task->itk_self[flavor];
+ (void)ipc_kobject_make_send_lazy_alloc_port((ipc_port_t *) &task->itk_task_ports[flavor],
+ (ipc_kobject_t)task, kotype, IPC_KOBJECT_ALLOC_IMMOVABLE_SEND, true,
+ OS_PTRAUTH_DISCRIMINATOR("task.itk_task_ports"));
+ port = task->itk_task_ports[flavor];
break;
}
return convert_task_to_port_with_flavor(task, TASK_FLAVOR_NAME);
}
+ipc_port_t
+convert_task_to_port_pinned(
+ task_t task)
+{
+ ipc_port_t port = IP_NULL;
+
+ itk_lock(task);
+
+ if (task->ipc_active && task->itk_self != IP_NULL) {
+ port = ipc_port_make_send(task->itk_self);
+ }
+
+ itk_unlock(task);
+ task_deallocate(task);
+ return port;
+}
/*
* Routine: convert_task_suspend_token_to_port
* Purpose:
return port;
}
+ipc_port_t
+convert_thread_to_port_pinned(
+ thread_t thread)
+{
+ ipc_port_t port = IP_NULL;
+
+ thread_mtx_lock(thread);
+
+ if (thread->ipc_active && thread->ith_self != IP_NULL) {
+ port = ipc_port_make_send(thread->ith_self);
+ }
+
+ thread_mtx_unlock(thread);
+ thread_deallocate(thread);
+ return port;
+}
/*
* Routine: space_deallocate
* Purpose:
}
}
- if (IP_VALID(new_port)) { /* consume send right */
+ if (IP_VALID(new_port)) { /* consume send right */
ipc_port_release_send(new_port);
}
itk_lock(task);
- if (task->itk_self[TASK_FLAVOR_CONTROL] == IP_NULL) {
+ if (!task->ipc_active) {
itk_unlock(task);
-
return KERN_FAILURE;
}
}
}
- if (IP_VALID(new_port)) { /* consume send right */
+ if (IP_VALID(new_port)) { /* consume send right */
ipc_port_release_send(new_port);
}
}
}
- if (IP_VALID(new_port)) { /* consume send right */
+ if (IP_VALID(new_port)) { /* consume send right */
ipc_port_release_send(new_port);
}
itk_lock(task);
- if (task->itk_self[TASK_FLAVOR_CONTROL] == IP_NULL) {
+ if (!task->ipc_active) {
itk_unlock(task);
#if CONFIG_MACF
mac_exc_free_label(new_label);
}
}
- if (IP_VALID(new_port)) { /* consume send right */
+ if (IP_VALID(new_port)) { /* consume send right */
ipc_port_release_send(new_port);
}
* Illegal mask bit set.
* KERN_FAILURE The thread is dead.
*/
-kern_return_t
-thread_get_exception_ports(
- thread_t thread,
- exception_mask_t exception_mask,
- exception_mask_array_t masks,
- mach_msg_type_number_t *CountCnt,
- exception_port_array_t ports,
- exception_behavior_array_t behaviors,
- thread_state_flavor_array_t flavors);
-
-kern_return_t
-thread_get_exception_ports(
- thread_t thread,
- exception_mask_t exception_mask,
+static kern_return_t
+thread_get_exception_ports_internal(
+ thread_t thread,
+ exception_mask_t exception_mask,
exception_mask_array_t masks,
mach_msg_type_number_t *CountCnt,
+ exception_port_info_array_t ports_info,
exception_port_array_t ports,
exception_behavior_array_t behaviors,
thread_state_flavor_array_t flavors)
{
- unsigned int i, j, count;
+ unsigned int count;
+ boolean_t info_only = (ports_info != NULL);
+ boolean_t dbg_ok = TRUE;
+ ipc_port_t port_ptrs[EXC_TYPES_COUNT]; /* pointers only, does not hold right */
if (thread == THREAD_NULL) {
return KERN_INVALID_ARGUMENT;
return KERN_INVALID_ARGUMENT;
}
+ if (!info_only && !ports) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+#if !(DEVELOPMENT || DEBUG) && CONFIG_MACF
+ if (info_only && mac_task_check_expose_task(kernel_task, TASK_FLAVOR_CONTROL) == 0) {
+ dbg_ok = TRUE;
+ } else {
+ dbg_ok = FALSE;
+ }
+#endif
+
thread_mtx_lock(thread);
if (!thread->active) {
goto done;
}
- for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; ++i) {
+ for (int i = FIRST_EXCEPTION, j = 0; i < EXC_TYPES_COUNT; ++i) {
if (exception_mask & (1 << i)) {
+ ipc_port_t exc_port = thread->exc_actions[i].port;
+ exception_behavior_t exc_behavior = thread->exc_actions[i].behavior;
+ thread_state_flavor_t exc_flavor = thread->exc_actions[i].flavor;
+
for (j = 0; j < count; ++j) {
/*
* search for an identical entry, if found
* set corresponding mask for this exception.
*/
- if (thread->exc_actions[i].port == ports[j] &&
- thread->exc_actions[i].behavior == behaviors[j] &&
- thread->exc_actions[i].flavor == flavors[j]) {
+ if (exc_port == port_ptrs[j] &&
+ exc_behavior == behaviors[j] &&
+ exc_flavor == flavors[j]) {
masks[j] |= (1 << i);
break;
}
}
- if (j == count) {
+ if (j == count && count < *CountCnt) {
masks[j] = (1 << i);
- ports[j] = ipc_port_copy_send(thread->exc_actions[i].port);
- behaviors[j] = thread->exc_actions[i].behavior;
- flavors[j] = thread->exc_actions[i].flavor;
- ++count;
- if (count >= *CountCnt) {
- break;
+ port_ptrs[j] = exc_port;
+
+ if (info_only) {
+ if (!dbg_ok || !IP_VALID(exc_port)) {
+ /* avoid taking port lock if !dbg_ok */
+ ports_info[j] = (ipc_info_port_t){ .iip_port_object = 0, .iip_receiver_object = 0 };
+ } else {
+ uintptr_t receiver;
+ (void)ipc_port_get_receiver_task(exc_port, &receiver);
+ ports_info[j].iip_port_object = (natural_t)VM_KERNEL_ADDRPERM(exc_port);
+ ports_info[j].iip_receiver_object = receiver ? (natural_t)VM_KERNEL_ADDRPERM(receiver) : 0;
+ }
+ } else {
+ ports[j] = ipc_port_copy_send(exc_port);
}
+ behaviors[j] = exc_behavior;
+ flavors[j] = exc_flavor;
+ ++count;
}
}
}
return KERN_SUCCESS;
}
+static kern_return_t
+thread_get_exception_ports(
+ thread_t thread,
+ exception_mask_t exception_mask,
+ exception_mask_array_t masks,
+ mach_msg_type_number_t *CountCnt,
+ exception_port_array_t ports,
+ exception_behavior_array_t behaviors,
+ thread_state_flavor_array_t flavors)
+{
+ return thread_get_exception_ports_internal(thread, exception_mask, masks, CountCnt,
+ NULL, ports, behaviors, flavors);
+}
+
kern_return_t
-thread_get_exception_ports_from_user(
+thread_get_exception_ports_info(
mach_port_t port,
exception_mask_t exception_mask,
exception_mask_array_t masks,
- mach_msg_type_number_t *CountCnt,
- exception_port_array_t ports,
+ mach_msg_type_number_t *CountCnt,
+ exception_port_info_array_t ports_info,
exception_behavior_array_t behaviors,
thread_state_flavor_array_t flavors)
{
kern_return_t kr;
- thread_t thread = convert_port_to_thread_check_type(port, NULL, THREAD_FLAVOR_CONTROL, FALSE);
+ thread_t thread = convert_port_to_thread_read_no_eval(port);
if (thread == THREAD_NULL) {
return KERN_INVALID_ARGUMENT;
}
- kr = thread_get_exception_ports(thread, exception_mask, masks, CountCnt, ports, behaviors, flavors);
+ kr = thread_get_exception_ports_internal(thread, exception_mask, masks, CountCnt,
+ ports_info, NULL, behaviors, flavors);
thread_deallocate(thread);
return kr;
}
kern_return_t
-task_get_exception_ports(
- task_t task,
- exception_mask_t exception_mask,
+thread_get_exception_ports_from_user(
+ mach_port_t port,
+ exception_mask_t exception_mask,
exception_mask_array_t masks,
- mach_msg_type_number_t *CountCnt,
+ mach_msg_type_number_t *CountCnt,
exception_port_array_t ports,
exception_behavior_array_t behaviors,
- thread_state_flavor_array_t flavors);
+ thread_state_flavor_array_t flavors)
+{
+ kern_return_t kr;
-kern_return_t
-task_get_exception_ports(
- task_t task,
- exception_mask_t exception_mask,
+ thread_t thread = convert_port_to_thread_no_eval(port);
+
+ if (thread == THREAD_NULL) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ kr = thread_get_exception_ports(thread, exception_mask, masks, CountCnt, ports, behaviors, flavors);
+
+ thread_deallocate(thread);
+ return kr;
+}
+
+static kern_return_t
+task_get_exception_ports_internal(
+ task_t task,
+ exception_mask_t exception_mask,
exception_mask_array_t masks,
mach_msg_type_number_t *CountCnt,
+ exception_port_info_array_t ports_info,
exception_port_array_t ports,
exception_behavior_array_t behaviors,
thread_state_flavor_array_t flavors)
{
- unsigned int i, j, count;
+ unsigned int count;
+ boolean_t info_only = (ports_info != NULL);
+ boolean_t dbg_ok = TRUE;
+ ipc_port_t port_ptrs[EXC_TYPES_COUNT]; /* pointers only, does not hold right */
if (task == TASK_NULL) {
return KERN_INVALID_ARGUMENT;
return KERN_INVALID_ARGUMENT;
}
+ if (!info_only && !ports) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+#if !(DEVELOPMENT || DEBUG) && CONFIG_MACF
+ if (info_only && mac_task_check_expose_task(kernel_task, TASK_FLAVOR_CONTROL) == 0) {
+ dbg_ok = TRUE;
+ } else {
+ dbg_ok = FALSE;
+ }
+#endif
+
itk_lock(task);
- if (task->itk_self[TASK_FLAVOR_CONTROL] == IP_NULL) {
+ if (!task->ipc_active) {
itk_unlock(task);
-
return KERN_FAILURE;
}
count = 0;
- for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; ++i) {
+ for (int i = FIRST_EXCEPTION, j = 0; i < EXC_TYPES_COUNT; ++i) {
if (exception_mask & (1 << i)) {
+ ipc_port_t exc_port = task->exc_actions[i].port;
+ exception_behavior_t exc_behavior = task->exc_actions[i].behavior;
+ thread_state_flavor_t exc_flavor = task->exc_actions[i].flavor;
+
for (j = 0; j < count; ++j) {
/*
* search for an identical entry, if found
* set corresponding mask for this exception.
*/
- if (task->exc_actions[i].port == ports[j] &&
- task->exc_actions[i].behavior == behaviors[j] &&
- task->exc_actions[i].flavor == flavors[j]) {
+ if (exc_port == port_ptrs[j] &&
+ exc_behavior == behaviors[j] &&
+ exc_flavor == flavors[j]) {
masks[j] |= (1 << i);
break;
}
}
- if (j == count) {
+ if (j == count && count < *CountCnt) {
masks[j] = (1 << i);
- ports[j] = ipc_port_copy_send(task->exc_actions[i].port);
- behaviors[j] = task->exc_actions[i].behavior;
- flavors[j] = task->exc_actions[i].flavor;
- ++count;
- if (count > *CountCnt) {
- break;
+ port_ptrs[j] = exc_port;
+
+ if (info_only) {
+ if (!dbg_ok || !IP_VALID(exc_port)) {
+ /* avoid taking port lock if !dbg_ok */
+ ports_info[j] = (ipc_info_port_t){ .iip_port_object = 0, .iip_receiver_object = 0 };
+ } else {
+ uintptr_t receiver;
+ (void)ipc_port_get_receiver_task(exc_port, &receiver);
+ ports_info[j].iip_port_object = (natural_t)VM_KERNEL_ADDRPERM(exc_port);
+ ports_info[j].iip_receiver_object = receiver ? (natural_t)VM_KERNEL_ADDRPERM(receiver) : 0;
+ }
+ } else {
+ ports[j] = ipc_port_copy_send(exc_port);
}
+ behaviors[j] = exc_behavior;
+ flavors[j] = exc_flavor;
+ ++count;
}
}
}
return KERN_SUCCESS;
}
+static kern_return_t
+task_get_exception_ports(
+ task_t task,
+ exception_mask_t exception_mask,
+ exception_mask_array_t masks,
+ mach_msg_type_number_t *CountCnt,
+ exception_port_array_t ports,
+ exception_behavior_array_t behaviors,
+ thread_state_flavor_array_t flavors)
+{
+ return task_get_exception_ports_internal(task, exception_mask, masks, CountCnt,
+ NULL, ports, behaviors, flavors);
+}
+
+kern_return_t
+task_get_exception_ports_info(
+ mach_port_t port,
+ exception_mask_t exception_mask,
+ exception_mask_array_t masks,
+ mach_msg_type_number_t *CountCnt,
+ exception_port_info_array_t ports_info,
+ exception_behavior_array_t behaviors,
+ thread_state_flavor_array_t flavors)
+{
+ kern_return_t kr;
+
+ task_t task = convert_port_to_task_read_no_eval(port);
+
+ if (task == TASK_NULL) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ kr = task_get_exception_ports_internal(task, exception_mask, masks, CountCnt,
+ ports_info, NULL, behaviors, flavors);
+
+ task_deallocate(task);
+ return kr;
+}
+
kern_return_t
task_get_exception_ports_from_user(
mach_port_t port,
{
kern_return_t kr;
- task_t task = convert_port_to_task_check_type(port, NULL, TASK_FLAVOR_CONTROL, FALSE);
+ task_t task = convert_port_to_task_no_eval(port);
if (task == TASK_NULL) {
return KERN_INVALID_ARGUMENT;
task_deallocate(task);
return kr;
}
+
+/*
+ * Routine: ipc_thread_port_unpin
+ * Purpose:
+ * Called on the thread port when the thread is
+ * terminating so that the last ref can be deallocated
+ * without a guard exception.
+ * Conditions:
+ * Thread mutex lock is held.
+ * check_bit should be set to true only when port is expected
+ * to have ip_pinned bit set.
+ */
+void
+ipc_thread_port_unpin(
+ ipc_port_t port,
+ __unused bool check_bit)
+{
+ if (port == IP_NULL) {
+ return;
+ }
+ ip_lock(port);
+ imq_lock(&port->ip_messages);
+#if DEVELOPMENT || DEBUG
+ if (pinned_control_port_enabled && check_bit) {
+ assert(ip_is_control(port)); /*remove once we get rid of boot-arg */
+ assert(port->ip_pinned == 1);
+ }
+#endif
+ port->ip_pinned = 0;
+ imq_unlock(&port->ip_messages);
+ ip_unlock(port);
+}
extern void ipc_task_terminate(
task_t task);
+__options_decl(ipc_thread_init_options_t, uint32_t, {
+ IPC_THREAD_INIT_NONE = 0x00,
+ IPC_THREAD_INIT_PINNED = 0x01,
+ IPC_THREAD_INIT_IMMOVABLE = 0x02,
+});
+
/* Initialize a thread's IPC state */
extern void ipc_thread_init(
- thread_t thread);
+ thread_t thread,
+ ipc_thread_init_options_t options);
extern void ipc_thread_init_exc_actions(
thread_t thread);
extern task_t convert_port_to_task(
ipc_port_t port);
+/* Convert from a port to a pinned task */
+extern task_t convert_port_to_task_pinned(
+ ipc_port_t port);
+
extern task_t
convert_port_to_task_with_exec_token(
ipc_port_t port,
extern task_read_t port_name_to_task_read_no_eval(
mach_port_name_t name);
-extern task_inspect_t port_name_to_task_inspect(
- mach_port_name_t name);
-
extern task_t port_name_to_task_name(
mach_port_name_t name);
extern void space_inspect_deallocate(
ipc_space_inspect_t space);
+#if MACH_KERNEL_PRIVATE
+extern void ipc_thread_port_unpin(
+ ipc_port_t port,
+ bool check_bit);
+#endif
+
#endif /* _KERN_IPC_TT_H_ */
zalloc_flags_t flags,
vm_allocation_site_t *site)
{
- int kma_flags = KMA_ATOMIC | KMA_KOBJECT;
- vm_tag_t tag = VM_KERN_MEMORY_KALLOC;
+ int kma_flags = KMA_ATOMIC;
+ vm_tag_t tag;
vm_map_t alloc_map;
vm_offset_t addr;
return (struct kalloc_result){ };
}
+#ifndef __x86_64__
+ /*
+ * (73465472) on Intel we didn't use to pass this flag,
+ * which in turned allowed kalloc_large() memory to be shared
+ * with user directly.
+ *
+ * We're bound by this unfortunate ABI.
+ */
+ kma_flags |= KMA_KOBJECT;
+#endif
if (flags & Z_NOPAGEWAIT) {
kma_flags |= KMA_NOPAGEWAIT;
}
alloc_map = kalloc_map_for_size(size);
- if (site) {
- tag = vm_tag_alloc(site);
+ tag = zalloc_flags_get_tag(flags);
+ if (tag == VM_KERN_MEMORY_NONE) {
+ if (site) {
+ tag = vm_tag_alloc(site);
+ } else {
+ tag = VM_KERN_MEMORY_KALLOC;
+ }
}
if (kmem_alloc_flags(alloc_map, &addr, size, tag, kma_flags) != KERN_SUCCESS) {
zalloc_flags_t flags,
vm_allocation_site_t *site)
{
- vm_tag_t tag = VM_KERN_MEMORY_KALLOC;
vm_size_t size;
void *addr;
zone_t z;
* Kasan for kalloc heaps will put the redzones *inside*
* the allocation, and hence augment its size.
*
- * kalloc heaps do not use zone_t::kasan_redzone.
+ * kalloc heaps do not use zone_t::z_kasan_redzone.
*/
#if KASAN_KALLOC
size = kasan_alloc_resize(req_size);
assert(size <= zone_elem_size(z));
#if VM_MAX_TAG_ZONES
- if (z->tags && site) {
- tag = vm_tag_alloc(site);
- if ((flags & (Z_NOWAIT | Z_NOPAGEWAIT)) && !vm_allocation_zone_totals[tag]) {
- tag = VM_KERN_MEMORY_KALLOC;
+ if (z->tags) {
+ vm_tag_t tag = zalloc_flags_get_tag(flags);
+ if (tag == VM_KERN_MEMORY_NONE && site) {
+ tag = vm_tag_alloc(site);
+ }
+ if (tag != VM_KERN_MEMORY_NONE) {
+ tag = vm_tag_will_update_zone(tag, z->tag_zone_index,
+ flags & (Z_WAITOK | Z_NOWAIT | Z_NOPAGEWAIT));
}
+ flags |= Z_VM_TAG(tag);
}
#endif
- addr = zalloc_ext(z, kheap->kh_stats ?: z->z_stats,
- flags | Z_VM_TAG(tag), zone_elem_size(z) - size);
+ addr = zalloc_ext(z, kheap->kh_stats ?: z->z_stats, flags);
#if KASAN_KALLOC
addr = (void *)kasan_alloc((vm_offset_t)addr, zone_elem_size(z),
#define STACKSHOT_KCTYPE_TASK_SNAPSHOT 0x905u /* task_snapshot_v2 */
#define STACKSHOT_KCTYPE_THREAD_SNAPSHOT 0x906u /* thread_snapshot_v2, thread_snapshot_v3 */
#define STACKSHOT_KCTYPE_DONATING_PIDS 0x907u /* int[] */
-#define STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO 0x908u /* same as KCDATA_TYPE_LIBRARY_LOADINFO64 */
+#define STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO 0x908u /* dyld_shared_cache_loadinfo */
#define STACKSHOT_KCTYPE_THREAD_NAME 0x909u /* char[] */
#define STACKSHOT_KCTYPE_KERN_STACKFRAME 0x90Au /* struct stack_snapshot_frame32 */
#define STACKSHOT_KCTYPE_KERN_STACKFRAME64 0x90Bu /* struct stack_snapshot_frame64 */
uuid_t imageUUID;
};
+/*
+ * N.B.: Newer kernels output dyld_shared_cache_loadinfo structures
+ * instead of this, since the field names match their contents better.
+ */
struct dyld_uuid_info_64_v2 {
uint64_t imageLoadAddress; /* XXX image slide */
uuid_t imageUUID;
/* end of version 1 of dyld_uuid_info_64. sizeof v1 was 24 */
- uint64_t imageSlidBaseAddress; /* slid base address of image */
+ uint64_t imageSlidBaseAddress; /* slid base address or slid first mapping of image */
+};
+
+/*
+ * This is the renamed version of dyld_uuid_info_64 with more accurate
+ * field names, for STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO. Any users
+ * must be aware of the dyld_uuid_info_64* version history and ensure
+ * the fields they are accessing are within the actual bounds.
+ *
+ * OLD_FIELD NEW_FIELD
+ * imageLoadAddress sharedCacheSlide
+ * imageUUID sharedCacheUUID
+ * imageSlidBaseAddress sharedCacheUnreliableSlidBaseAddress
+ * - sharedCacheSlidFirstMapping
+ */
+struct dyld_shared_cache_loadinfo {
+ uint64_t sharedCacheSlide; /* image slide value */
+ uuid_t sharedCacheUUID;
+ /* end of version 1 of dyld_uuid_info_64. sizeof v1 was 24 */
+ uint64_t sharedCacheUnreliableSlidBaseAddress; /* for backwards-compatibility; use sharedCacheSlidFirstMapping if available */
+ /* end of version 2 of dyld_uuid_info_64. sizeof v2 was 32 */
+ uint64_t sharedCacheSlidFirstMapping; /* slid base address of first mapping */
};
struct dyld_aot_cache_uuid_info {
- uint64_t x86SlidBaseAddress; /* slid base address of x86 shared cache */
+ uint64_t x86SlidBaseAddress; /* slid first mapping address of x86 shared cache */
uuid_t x86UUID; /* UUID of x86 shared cache */
- uint64_t aotSlidBaseAddress; /* slide base address of aot cache */
+ uint64_t aotSlidBaseAddress; /* slide first mapping address of aot cache */
uuid_t aotUUID; /* UUID of aot shared cache */
};
kTaskIsDirtyTracked = 0x4000000,
kTaskAllowIdleExit = 0x8000000,
kTaskIsTranslated = 0x10000000,
+ kTaskSharedRegionNone = 0x20000000, /* task doesn't have a shared region */
+ kTaskSharedRegionSystem = 0x40000000, /* task is attached to system shared region */
+ kTaskSharedRegionOther = 0x80000000, /* task is attached to a different shared region */
};
enum thread_snapshot_flags {
uint64_t stackshot_duration_outer;
} __attribute__((packed));
+struct stackshot_duration_v2 {
+ uint64_t stackshot_duration;
+ uint64_t stackshot_duration_outer;
+ uint64_t stackshot_duration_prior;
+} __attribute__((packed));
+
struct stackshot_fault_stats {
uint32_t sfs_pages_faulted_in; /* number of pages faulted in using KDP fault path */
uint64_t sfs_time_spent_faulting; /* MATUs spent faulting */
#include <kern/coalition.h>
#include <kern/processor.h>
#include <kern/host_statistics.h>
+#include <kern/counter.h>
#include <kern/thread.h>
#include <kern/thread_group.h>
#include <kern/task.h>
static boolean_t stack_enable_faulting = FALSE;
static struct stackshot_fault_stats fault_stats;
+static uint32_t stackshot_initial_estimate;
+static uint64_t stackshot_duration_prior_abs; /* prior attempts, abs */
static unaligned_u64 * stackshot_duration_outer;
static uint64_t stackshot_microsecs;
goto out;
}
+ stackshot_initial_estimate = 0;
+ stackshot_duration_prior_abs = 0;
+ stackshot_duration_outer = NULL;
+ uint64_t time_start = mach_absolute_time();
+
istate = ml_set_interrupts_enabled(FALSE);
/* Preload trace parameters*/
ml_set_interrupts_enabled(istate);
+ uint64_t time_end = mach_absolute_time();
+ if (stackshot_duration_outer) {
+ *stackshot_duration_outer = time_end - time_start;
+ }
*bytes_traced = kdp_stack_snapshot_bytes_traced();
out:
goto error_exit;
}
+ stackshot_duration_prior_abs = 0;
stackshotbuf_size = get_stackshot_estsize(size_hint);
+ stackshot_initial_estimate = stackshotbuf_size;
for (; stackshotbuf_size <= max_tracebuf_size; stackshotbuf_size <<= 1) {
if (kmem_alloc_flags(kernel_map, (vm_offset_t *)&stackshotbuf, stackshotbuf_size, VM_KERN_MEMORY_DIAG, KMA_ZERO) != KERN_SUCCESS) {
/*
* If we didn't allocate a big enough buffer, deallocate and try again.
*/
+ stackshot_duration_prior_abs +=
+ (time_end - time_start);
continue;
} else {
goto error_exit;
kern_return_t error = KERN_SUCCESS;
uint64_t shared_cache_slide = 0;
- uint64_t shared_cache_base_address = 0;
+ uint64_t shared_cache_first_mapping = 0;
uint32_t kdp_fault_results = 0;
- struct dyld_uuid_info_64_v2 shared_cache_data = {0};
+ struct dyld_shared_cache_loadinfo shared_cache_data = {0};
assert(task_snap_ss_flags != NULL);
+ /* Get basic info about the shared region pointer, regardless of any failures */
+ if (task->shared_region == NULL) {
+ *task_snap_ss_flags |= kTaskSharedRegionNone;
+ } else if (task->shared_region == primary_system_shared_region) {
+ *task_snap_ss_flags |= kTaskSharedRegionSystem;
+ } else {
+ *task_snap_ss_flags |= kTaskSharedRegionOther;
+ }
+
if (task->shared_region && ml_validate_nofault((vm_offset_t)task->shared_region, sizeof(struct vm_shared_region))) {
struct vm_shared_region *sr = task->shared_region;
- shared_cache_base_address = sr->sr_base_address + sr->sr_first_mapping;
+ shared_cache_first_mapping = sr->sr_base_address + sr->sr_first_mapping;
} else {
*task_snap_ss_flags |= kTaskSharedRegionInfoUnavailable;
}
/* We haven't copied in the shared region UUID yet as part of setup */
- if (!shared_cache_base_address || !task->shared_region->sr_uuid_copied) {
+ if (!shared_cache_first_mapping || !task->shared_region->sr_uuid_copied) {
goto error_exit;
}
*/
shared_cache_slide = task->shared_region->sr_slide;
- if (task->shared_region == init_task_shared_region) {
+ if (task->shared_region == primary_system_shared_region) {
/* skip adding shared cache info -- it's the same as the system level one */
goto error_exit;
}
- shared_cache_data.imageLoadAddress = shared_cache_slide;
- stackshot_memcpy(&shared_cache_data.imageUUID, task->shared_region->sr_uuid, sizeof(task->shared_region->sr_uuid));
- shared_cache_data.imageSlidBaseAddress = shared_cache_base_address;
- kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO, sizeof(struct dyld_uuid_info_64_v2), &shared_cache_data));
+ /*
+ * Historically, this data was in a dyld_uuid_info_64 structure, but the
+ * naming of both the structure and fields for this use wasn't great. The
+ * dyld_shared_cache_loadinfo structure has better names, but the same
+ * layout and content as the original.
+ *
+ * The imageSlidBaseAddress/sharedCacheUnreliableSlidBaseAddress field
+ * has been used inconsistently for STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT
+ * entries; here, it's the slid first mapping, and we leave it that way
+ * for backwards compatibility.
+ */
+ shared_cache_data.sharedCacheSlide = shared_cache_slide;
+ stackshot_memcpy(&shared_cache_data.sharedCacheUUID, task->shared_region->sr_uuid, sizeof(task->shared_region->sr_uuid));
+ shared_cache_data.sharedCacheUnreliableSlidBaseAddress = shared_cache_first_mapping;
+ shared_cache_data.sharedCacheSlidFirstMapping = shared_cache_first_mapping;
+ kcd_exit_on_error(kcdata_push_data(kcd, STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO, sizeof(shared_cache_data), &shared_cache_data));
error_exit:
if (kdp_fault_results & KDP_FAULT_RESULT_PAGED_OUT) {
cur_tsnap->ts_did_throttle = (uint32_t) proc_did_throttle_from_task(task);
cur_tsnap->ts_suspend_count = task->suspend_count;
- cur_tsnap->ts_faults = task->faults;
+ cur_tsnap->ts_faults = counter_load(&task->faults);
cur_tsnap->ts_pageins = task->pageins;
cur_tsnap->ts_cow_faults = task->cow_faults;
cur_tsnap->ts_latency_qos = (task->effective_policy.tep_latency_qos == LATENCY_QOS_TIER_UNSPECIFIED) ?
cur_tsnap->tds_max_resident_size = get_task_resident_max(task);
cur_tsnap->tds_suspend_count = task->suspend_count;
- cur_tsnap->tds_faults = task->faults;
+ cur_tsnap->tds_faults = counter_load(&task->faults);
cur_tsnap->tds_pageins = task->pageins;
cur_tsnap->tds_cow_faults = task->cow_faults;
cur_tsnap->tds_was_throttled = (uint32_t)proc_was_throttled_from_task(task);
if (trace_flags & STACKSHOT_PAGE_TABLES) {
kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, stack_snapshot_pagetable_mask, "stackshot_pagetable_mask"));
}
+ if (stackshot_initial_estimate != 0) {
+ kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, stackshot_initial_estimate, "stackshot_size_estimate"));
+ }
#if STACKSHOT_COLLECTS_LATENCY_INFO
latency_info.setup_latency = mach_absolute_time();
kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, KCDATA_TYPE_USECS_SINCE_EPOCH, sizeof(uint64_t), &stackshot_microsecs));
/* record system level shared cache load info (if available) */
- if (!collect_delta_stackshot && init_task_shared_region &&
- ml_validate_nofault((vm_offset_t)init_task_shared_region, sizeof(struct vm_shared_region))) {
- struct dyld_uuid_info_64_v2 sys_shared_cache_info = {0};
+ if (!collect_delta_stackshot && primary_system_shared_region &&
+ ml_validate_nofault((vm_offset_t)primary_system_shared_region, sizeof(struct vm_shared_region))) {
+ struct dyld_shared_cache_loadinfo sys_shared_cache_info = {0};
- stackshot_memcpy(sys_shared_cache_info.imageUUID, &init_task_shared_region->sr_uuid, sizeof(init_task_shared_region->sr_uuid));
- sys_shared_cache_info.imageLoadAddress =
- init_task_shared_region->sr_slide;
- sys_shared_cache_info.imageSlidBaseAddress =
- init_task_shared_region->sr_slide + init_task_shared_region->sr_base_address;
+ /*
+ * Historically, this data was in a dyld_uuid_info_64 structure, but the
+ * naming of both the structure and fields for this use isn't great. The
+ * dyld_shared_cache_loadinfo structure has better names, but the same
+ * layout and content as the original.
+ *
+ * The imageSlidBaseAddress/sharedCacheUnreliableSlidBaseAddress field
+ * has been used inconsistently for STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT
+ * entries; here, it's the slid base address, and we leave it that way
+ * for backwards compatibility.
+ */
+ stackshot_memcpy(sys_shared_cache_info.sharedCacheUUID, &primary_system_shared_region->sr_uuid, sizeof(primary_system_shared_region->sr_uuid));
+ sys_shared_cache_info.sharedCacheSlide =
+ primary_system_shared_region->sr_slide;
+ sys_shared_cache_info.sharedCacheUnreliableSlidBaseAddress =
+ primary_system_shared_region->sr_slide + primary_system_shared_region->sr_base_address;
+ sys_shared_cache_info.sharedCacheSlidFirstMapping =
+ primary_system_shared_region->sr_base_address + primary_system_shared_region->sr_first_mapping;
kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO,
- sizeof(struct dyld_uuid_info_64_v2), &sys_shared_cache_info));
+ sizeof(sys_shared_cache_info), &sys_shared_cache_info));
if (trace_flags & STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT) {
/*
* Include a map of the system shared cache layout if it has been populated
* (which is only when the system is using a custom shared cache).
*/
- if (init_task_shared_region->sr_images && ml_validate_nofault((vm_offset_t)init_task_shared_region->sr_images,
- (init_task_shared_region->sr_images_count * sizeof(struct dyld_uuid_info_64)))) {
- assert(init_task_shared_region->sr_images_count != 0);
- kcd_exit_on_error(kcdata_push_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_SYS_SHAREDCACHE_LAYOUT, sizeof(struct dyld_uuid_info_64), init_task_shared_region->sr_images_count, init_task_shared_region->sr_images));
+ if (primary_system_shared_region->sr_images && ml_validate_nofault((vm_offset_t)primary_system_shared_region->sr_images,
+ (primary_system_shared_region->sr_images_count * sizeof(struct dyld_uuid_info_64)))) {
+ assert(primary_system_shared_region->sr_images_count != 0);
+ kcd_exit_on_error(kcdata_push_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_SYS_SHAREDCACHE_LAYOUT, sizeof(struct dyld_uuid_info_64), primary_system_shared_region->sr_images_count, primary_system_shared_region->sr_images));
}
}
}
if (!panic_stackshot && (trace_flags & STACKSHOT_SAVE_JETSAM_COALITIONS)) {
coalition_begin_cpu_cycle_count = mt_cur_cpu_cycles();
}
-#endif
+#endif /* INTERRUPT_MASKED_DEBUG && MONOTONIC */
/* Iterate over coalitions */
if (trace_flags & STACKSHOT_SAVE_JETSAM_COALITIONS) {
kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - coalition_begin_cpu_cycle_count),
"coalitions_cpu_cycle_count"));
}
-#endif
+#endif /* INTERRUPT_MASKED_DEBUG && MONOTONIC */
}
#else
trace_flags &= ~(STACKSHOT_SAVE_JETSAM_COALITIONS);
}
}
-
#if STACKSHOT_COLLECTS_LATENCY_INFO
latency_info.total_terminated_task_iteration_latency = mach_absolute_time() - latency_info.total_terminated_task_iteration_latency;
#endif /* STACKSHOT_COLLECTS_LATENCY_INFO */
/* update timestamp of the stackshot */
abs_time_end = mach_absolute_time();
-#if DEVELOPMENT || DEBUG
- struct stackshot_duration stackshot_duration;
- stackshot_duration.stackshot_duration = (abs_time_end - abs_time);
- stackshot_duration.stackshot_duration_outer = 0;
+ struct stackshot_duration_v2 stackshot_duration = {
+ .stackshot_duration = (abs_time_end - abs_time),
+ .stackshot_duration_outer = 0,
+ .stackshot_duration_prior = stackshot_duration_prior_abs,
+ };
if ((trace_flags & STACKSHOT_DO_COMPRESS) == 0) {
kcd_exit_on_error(kcdata_get_memory_addr(stackshot_kcdata_p, STACKSHOT_KCTYPE_STACKSHOT_DURATION,
- sizeof(struct stackshot_duration), &out_addr));
- struct stackshot_duration *duration_p = (void *) out_addr;
+ sizeof(struct stackshot_duration_v2), &out_addr));
+ struct stackshot_duration_v2 *duration_p = (void *) out_addr;
stackshot_memcpy(duration_p, &stackshot_duration, sizeof(*duration_p));
stackshot_duration_outer = (unaligned_u64 *)&duration_p->stackshot_duration_outer;
} else {
kcd_exit_on_error(kcdata_push_data(stackshot_kcdata_p, STACKSHOT_KCTYPE_STACKSHOT_DURATION, sizeof(stackshot_duration), &stackshot_duration));
stackshot_duration_outer = NULL;
}
-#endif
#if INTERRUPT_MASKED_DEBUG && MONOTONIC
if (!panic_stackshot) {
*pBytesTraced = (uint32_t) kcdata_memory_get_used_bytes(stackshot_kcdata_p);
*pBytesUncompressed = (uint32_t) kcdata_memory_get_uncompressed_bytes(stackshot_kcdata_p);
-error_exit:
+error_exit:;
#if INTERRUPT_MASKED_DEBUG
- if (trace_flags & STACKSHOT_DO_COMPRESS) {
+ bool disable_interrupts_masked_check = kern_feature_override(
+ KF_INTERRUPT_MASKED_DEBUG_STACKSHOT_OVRD) ||
+ (trace_flags & STACKSHOT_DO_COMPRESS) != 0;
+
+#if STACKSHOT_INTERRUPTS_MASKED_CHECK_DISABLED
+ disable_interrupts_masked_check = true;
+#endif /* STACKSHOT_INTERRUPTS_MASKED_CHECK_DISABLED */
+
+ if (disable_interrupts_masked_check) {
ml_spin_debug_clear_self();
}
-#if defined(STACKSHOT_INTERRUPTS_MASKED_CHECK_DISABLED)
- ml_spin_debug_clear_self();
-#endif
if (!panic_stackshot && interrupt_masked_debug) {
/*
*/
ml_check_stackshot_interrupt_disabled_duration(current_thread());
}
-#endif
+#endif /* INTERRUPT_MASKED_DEBUG */
stack_enable_faulting = FALSE;
uint64_t compressions = 0;
uint64_t decompressions = 0;
- percpu_foreach(stat, vm_stat) {
- compressions += stat->compressions;
- decompressions += stat->decompressions;
- }
+ compressions = counter_load(&vm_statistics_compressions);
+ decompressions = counter_load(&vm_statistics_decompressions);
memio_snap->snapshot_magic = STACKSHOT_MEM_AND_IO_SNAPSHOT_MAGIC;
memio_snap->free_pages = vm_page_free_count;
{
struct thread_group_snapshot_v2 *thread_groups = (struct thread_group_snapshot_v2 *)arg;
struct thread_group_snapshot_v2 *tgs = &thread_groups[i];
- uint64_t flags = kdp_thread_group_get_flags(tg);
+ uint32_t flags = thread_group_get_flags(tg);
tgs->tgs_id = thread_group_get_id(tg);
stackshot_memcpy(tgs->tgs_name, thread_group_get_name(tg), THREAD_GROUP_MAXNAME);
tgs->tgs_flags = ((flags & THREAD_GROUP_FLAGS_EFFICIENT) ? kThreadGroupEfficient : 0) |
#include <mach-o/loader.h>
#include <libkern/kernel_mach_header.h>
#include <libkern/prelink.h>
+#include <libkern/OSKextLibPrivate.h>
#include <san/kasan.h>
#define KASLR_IOREG_DEBUG 0
kern_return_t
kext_receipt(void **addrp, size_t *sizep)
{
+ kern_return_t ret = KERN_FAILURE;
if (addrp == NULL || sizep == NULL) {
- return KERN_FAILURE;
+ goto finish;
}
kernel_mach_header_t *kc = PE_get_kc_header(KCKindAuxiliary);
if (kc == NULL) {
- return KERN_FAILURE;
+ ret = KERN_MISSING_KC;
+ goto finish;
+ }
+
+ /*
+ * This will be set in early boot once we've successfully checked that
+ * the AuxKC is properly linked against the BootKC. If this isn't set,
+ * and we have a valid AuxKC mach header, then the booter gave us a
+ * bad KC.
+ */
+ if (auxkc_uuid_valid == FALSE) {
+ ret = KERN_INVALID_KC;
+ goto finish;
}
size_t size;
void *addr = getsectdatafromheader(kc,
kReceiptInfoSegment, kAuxKCReceiptSection, &size);
if (addr == NULL) {
- return KERN_FAILURE;
+ ret = KERN_INVALID_KC;
+ goto finish;
}
*addrp = addr;
*sizep = size;
- return KERN_SUCCESS;
+ ret = KERN_SUCCESS;
+
+finish:
+ /*
+ * If we do return success, we'll want to wait for the other side to
+ * call kext_receipt_set_queried themselves, so we can confirm that
+ * it made the roundtrip before allowing third party kexts to load.
+ */
+ if (ret != KERN_SUCCESS) {
+ kext_receipt_set_queried();
+ }
+ return ret;
+}
+
+/*
+ * Returns KERN_FAILURE if the variable was already set.
+ */
+kern_return_t
+kext_receipt_set_queried()
+{
+ return OSKextSetReceiptQueried();
}
kern_return_t kext_receipt(void **addrp, size_t *sizep);
+kern_return_t kext_receipt_set_queried(void);
+
__END_DECLS
#endif /* _KEXT_ALLOC_H_ */
#if CONFIG_DTRACE
extern uint32_t lockstat_probemap[LS_NPROBES];
-extern void (*lockstat_probe)(uint32_t, uint64_t, uint64_t,
+extern void dtrace_probe(uint32_t, uint64_t, uint64_t,
uint64_t, uint64_t, uint64_t);
/*
* Macros to record lockstat probes.
*/
#define LOCKSTAT_RECORD4(probe, lp, arg0, arg1, arg2, arg3) \
- { \
- uint32_t id; \
- if (__improbable(id = lockstat_probemap[(probe)])) { \
- (*lockstat_probe)(id, (uintptr_t)(lp), (arg0), \
- (arg1), (arg2), (arg3)); \
- } \
+ { \
+ uint32_t id; \
+ if (__improbable(id = lockstat_probemap[(probe)])) { \
+ dtrace_probe(id, (uintptr_t)(lp), (arg0), \
+ (arg1), (arg2), (arg3)); \
+ } \
}
#define LOCKSTAT_RECORD_(probe, lp, arg0, arg1, arg2, arg3, ...) LOCKSTAT_RECORD4(probe, lp, arg0, arg1, arg2, arg3)
#define LOCKSTAT_RECORD__(probe, lp, arg0, arg1, arg2, arg3, ...) LOCKSTAT_RECORD_(probe, lp, arg0, arg1, arg2, arg3)
*
* Initialize a hardware lock.
*/
-void
+MARK_AS_HIBERNATE_TEXT void
hw_lock_init(hw_lock_t lock)
{
ordered_store_hw(lock, 0);
hw_lock_lock_internal(lock, thread LCK_GRP_ARG(grp));
}
-/*
- * Routine: hw_lock_to
- *
- * Acquire lock, spinning until it becomes available or timeout.
- * Timeout is in mach_absolute_time ticks, return with
- * preemption disabled.
- */
-unsigned
-int
-(hw_lock_to)(hw_lock_t lock, uint64_t timeout LCK_GRP_ARG(lck_grp_t *grp))
+static inline unsigned int
+hw_lock_to_internal(hw_lock_t lock, uint64_t timeout, thread_t thread
+ LCK_GRP_ARG(lck_grp_t *grp))
{
- thread_t thread;
- uintptr_t state;
+ uintptr_t state;
unsigned int success = 0;
- thread = current_thread();
- disable_preemption_for_thread(thread);
state = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK;
#if LOCK_PRETEST
if (ordered_load_hw(lock)) {
return success;
}
+/*
+ * Routine: hw_lock_to
+ *
+ * Acquire lock, spinning until it becomes available or timeout.
+ * Timeout is in mach_absolute_time ticks, return with
+ * preemption disabled.
+ */
+unsigned
+int
+(hw_lock_to)(hw_lock_t lock, uint64_t timeout LCK_GRP_ARG(lck_grp_t *grp))
+{
+ thread_t thread = current_thread();
+ disable_preemption_for_thread(thread);
+ return hw_lock_to_internal(lock, timeout, thread LCK_GRP_ARG(grp));
+}
+
+/*
+ * Routine: hw_lock_to_nopreempt
+ *
+ * Acquire lock, spinning until it becomes available or timeout.
+ * Timeout is in mach_absolute_time ticks, called and return with
+ * preemption disabled.
+ */
+unsigned
+int
+(hw_lock_to_nopreempt)(hw_lock_t lock, uint64_t timeout LCK_GRP_ARG(lck_grp_t *grp))
+{
+ thread_t thread = current_thread();
+ if (__improbable(!preemption_disabled_for_thread(thread))) {
+ panic("Attempt to test no-preempt spinlock %p in preemptible context", lock);
+ }
+ return hw_lock_to_internal(lock, timeout, thread LCK_GRP_ARG(grp));
+}
+
/*
* Routine: hw_lock_try
*
#include <mach/processor_server.h>
#include <kern/kern_types.h>
-#include <kern/counters.h>
#include <kern/cpu_data.h>
#include <kern/cpu_quiesce.h>
#include <kern/ipc_host.h>
return KERN_INVALID_HOST;
}
- assert(host_priv == &realhost);
-
#if DEVELOPMENT || DEBUG
if (options & HOST_REBOOT_DEBUGGER) {
Debugger("Debugger");
return KERN_INVALID_HOST;
}
- assert(host_priv == &realhost);
-
/*
* Copy first operator string terminated by '\0' followed by
* standardized strings generated from boot string.
#if CONFIG_IOSCHED
#define IOSCHED_METADATA_TIER THROTTLE_LEVEL_TIER1
+#define IOSCHED_METADATA_EXPEDITED_TIER THROTTLE_LEVEL_TIER0
+_Static_assert(IOSCHED_METADATA_EXPEDITED_TIER < IOSCHED_METADATA_TIER,
+ "expedited metadata tier must be less than metadata tier");
#endif /* CONFIG_IOSCHED */
extern int proc_get_darwinbgstate(task_t task, uint32_t *flagsp);
if (c == 'z' || c == 'Z') {
c = *++fmt;
- if (sizeof(size_t) == sizeof(unsigned long)) {
+ if (sizeof(size_t) == sizeof(unsigned long long)) {
long_long = 1;
}
}
int tasks_count;
int terminated_tasks_count;
queue_head_t threads;
+queue_head_t terminated_threads;
int threads_count;
+int terminated_threads_count;
LCK_GRP_DECLARE(task_lck_grp, "task");
LCK_ATTR_DECLARE(task_lck_attr, 0, 0);
LCK_MTX_DECLARE_ATTR(tasks_threads_lock, &task_lck_grp, &task_lck_attr);
queue_init(&tasks);
queue_init(&terminated_tasks);
queue_init(&threads);
+ queue_init(&terminated_threads);
queue_init(&corpse_tasks);
processor_init(master_processor, master_cpu, &pset0);
processor_set_t pset,
void **thing_list,
mach_msg_type_number_t *count,
- int type)
+ int type,
+ mach_task_flavor_t flavor)
{
unsigned int i;
task_t task;
/* for each task, make sure we are allowed to examine it */
for (i = used = 0; i < actual_tasks; i++) {
- if (mac_task_check_expose_task(task_list[i])) {
+ if (mac_task_check_expose_task(task_list[i], flavor)) {
task_deallocate(task_list[i]);
continue;
}
processor_set_t pset,
task_array_t *task_list,
mach_msg_type_number_t *count,
- int flavor)
+ mach_task_flavor_t flavor)
{
kern_return_t ret;
mach_msg_type_number_t i;
- ret = processor_set_things(pset, (void **)task_list, count, PSET_THING_TASK);
+ ret = processor_set_things(pset, (void **)task_list, count, PSET_THING_TASK, flavor);
if (ret != KERN_SUCCESS) {
return ret;
}
switch (flavor) {
case TASK_FLAVOR_CONTROL:
for (i = 0; i < *count; i++) {
- (*task_list)[i] = (task_t)convert_task_to_port((*task_list)[i]);
+ if ((*task_list)[i] == current_task()) {
+ /* if current_task(), return pinned port */
+ (*task_list)[i] = (task_t)convert_task_to_port_pinned((*task_list)[i]);
+ } else {
+ (*task_list)[i] = (task_t)convert_task_to_port((*task_list)[i]);
+ }
}
break;
case TASK_FLAVOR_READ:
kern_return_t ret;
mach_msg_type_number_t i;
- ret = processor_set_things(pset, (void **)thread_list, count, PSET_THING_THREAD);
+ ret = processor_set_things(pset, (void **)thread_list, count, PSET_THING_THREAD, TASK_FLAVOR_CONTROL);
if (ret != KERN_SUCCESS) {
return ret;
}
extern struct pset_node pset_node0;
extern queue_head_t tasks, threads, corpse_tasks;
-extern int tasks_count, terminated_tasks_count, threads_count;
+extern int tasks_count, terminated_tasks_count, threads_count, terminated_threads_count;
decl_lck_mtx_data(extern, tasks_threads_lock);
decl_lck_mtx_data(extern, tasks_corpse_lock);
*/
extern queue_head_t terminated_tasks;
+extern queue_head_t terminated_threads;
+
struct processor {
processor_state_t state; /* See above */
bool is_SMT;
}
#if DEVELOPMENT || DEBUG
-extern int32_t sysctl_get_bound_cpuid(void);
-int32_t
-sysctl_get_bound_cpuid(void)
-{
- int32_t cpuid = -1;
- thread_t self = current_thread();
-
- processor_t processor = self->bound_processor;
- if (processor == NULL) {
- cpuid = -1;
- } else {
- cpuid = processor->cpu_id;
- }
-
- return cpuid;
-}
-
-extern void sysctl_thread_bind_cpuid(int32_t cpuid);
-void
-sysctl_thread_bind_cpuid(int32_t cpuid)
-{
- if (cpuid < 0 || cpuid >= MAX_SCHED_CPUS) {
- return;
- }
-
- processor_t processor = processor_array[cpuid];
- if (processor == PROCESSOR_NULL) {
- return;
- }
-
- thread_bind(processor);
-
- thread_block(THREAD_CONTINUE_NULL);
-}
extern char sysctl_get_bound_cluster_type(void);
char
thread_block(THREAD_CONTINUE_NULL);
}
-#endif
+#endif /* DEVELOPMENT || DEBUG */
-#endif
+#endif /* __AMP__ */
#if CONFIG_TELEMETRY
#include <kern/telemetry.h>
#endif
+#include <kern/zalloc_internal.h>
#include <sys/kdebug.h>
{ compute_stack_target, NULL, 5, 1 },
{ compute_pageout_gc_throttle, NULL, 1, 0 },
{ compute_pmap_gc_throttle, NULL, 60, 0 },
+ { compute_zone_working_set_size, NULL, ZONE_WSS_UPDATE_PERIOD, 0 },
#if CONFIG_TELEMETRY
{ compute_telemetry, NULL, 1, 0 },
#endif
#include <kern/kern_types.h>
#include <kern/backtrace.h>
#include <kern/clock.h>
-#include <kern/counters.h>
#include <kern/cpu_number.h>
#include <kern/cpu_data.h>
#include <kern/smp.h>
ctime = mach_absolute_time();
thread->realtime.deadline = thread->realtime.constraint + ctime;
+ KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SET_RT_DEADLINE) | DBG_FUNC_NONE,
+ (uintptr_t)thread_tid(thread), thread->realtime.deadline, thread->realtime.computation, 0);
}
/*
}
}
+ bool secondary_forced_idle = ((processor->processor_secondary != PROCESSOR_NULL) &&
+ (thread_no_smt(thread) || (thread->sched_pri >= BASEPRI_RTQUEUES)) &&
+ (processor->processor_secondary->state == PROCESSOR_IDLE));
+
/* OK, so we're not going to run the current thread. Look at the RT queue. */
bool ok_to_run_realtime_thread = sched_ok_to_run_realtime_thread(pset, processor);
if ((rt_runq_count(pset) > 0) && ok_to_run_realtime_thread) {
ipi_type = sched_ipi_action(sprocessor, NULL, false, SCHED_IPI_EVENT_SMT_REBAL);
ast_processor = sprocessor;
}
+ } else if (secondary_forced_idle && !thread_no_smt(new_thread) && pset_has_stealable_threads(pset)) {
+ pset_update_processor_state(pset, sprocessor, PROCESSOR_DISPATCHING);
+ ipi_type = sched_ipi_action(sprocessor, NULL, true, SCHED_IPI_EVENT_PREEMPT);
+ ast_processor = sprocessor;
}
pset_unlock(pset);
thread->continuation = thread->parameter = NULL;
- counter(c_thread_invoke_hits++);
-
boolean_t enable_interrupts = TRUE;
/* idle thread needs to stay interrupts-disabled */
} else if (thread == self) {
/* same thread but with continuation */
ast_context(self);
- counter(++c_thread_invoke_same);
thread_unlock(self);
if (!thread->kernel_stack) {
need_stack:
if (!stack_alloc_try(thread)) {
- counter(c_thread_invoke_misses++);
thread_unlock(thread);
thread_stack_enqueue(thread);
return FALSE;
}
} else if (thread == self) {
ast_context(self);
- counter(++c_thread_invoke_same);
thread_unlock(self);
KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
thread_unlock(thread);
- counter(c_thread_invoke_csw++);
-
self->reason = reason;
processor->last_dispatch = ctime;
* consumed the entire quantum.
*/
if (thread->quantum_remaining == 0) {
+ KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_CANCEL_RT_DEADLINE) | DBG_FUNC_NONE,
+ (uintptr_t)thread_tid(thread), thread->realtime.deadline, thread->realtime.computation, 0);
thread->realtime.deadline = UINT64_MAX;
}
} else {
* thread resumes, it will execute the continuation function
* on a new kernel stack.
*/
-counter(mach_counter_t c_thread_block_calls = 0; )
-
wait_result_t
thread_block_reason(
thread_continue_t continuation,
thread_t new_thread;
spl_t s;
- counter(++c_thread_block_calls);
-
s = splsched();
processor = current_processor();
(void)soft_bound;
#endif /* __AMP__ */
}
+
+#if DEVELOPMENT || DEBUG
+extern int32_t sysctl_get_bound_cpuid(void);
+int32_t
+sysctl_get_bound_cpuid(void)
+{
+ int32_t cpuid = -1;
+ thread_t self = current_thread();
+
+ processor_t processor = self->bound_processor;
+ if (processor == NULL) {
+ cpuid = -1;
+ } else {
+ cpuid = processor->cpu_id;
+ }
+
+ return cpuid;
+}
+
+extern kern_return_t sysctl_thread_bind_cpuid(int32_t cpuid);
+kern_return_t
+sysctl_thread_bind_cpuid(int32_t cpuid)
+{
+ processor_t processor = PROCESSOR_NULL;
+
+ if (cpuid == -1) {
+ goto unbind;
+ }
+
+ if (cpuid < 0 || cpuid >= MAX_SCHED_CPUS) {
+ return KERN_INVALID_VALUE;
+ }
+
+ processor = processor_array[cpuid];
+ if (processor == PROCESSOR_NULL) {
+ return KERN_INVALID_VALUE;
+ }
+
+#if __AMP__
+
+ thread_t thread = current_thread();
+
+ if (thread->sched_flags & (TH_SFLAG_ECORE_ONLY | TH_SFLAG_PCORE_ONLY)) {
+ if ((thread->sched_flags & TH_SFLAG_BOUND_SOFT) == 0) {
+ /* Cannot hard-bind an already hard-cluster-bound thread */
+ return KERN_NOT_SUPPORTED;
+ }
+ }
+
+#endif /* __AMP__ */
+
+unbind:
+ thread_bind(processor);
+
+ thread_block(THREAD_CONTINUE_NULL);
+ return KERN_SUCCESS;
+}
+#endif /* DEVELOPMENT || DEBUG */
#include <kern/lock_group.h>
#include <machine/simple_lock.h>
-#ifdef MACH_KERNEL_PRIVATE
+#ifdef XNU_KERNEL_PRIVATE
+
+#if MACH_KERNEL_PRIVATE
#include <machine/atomic.h>
#include <mach_ldebug.h>
+#endif
+
+__BEGIN_DECLS
+
+#pragma GCC visibility push(hidden)
+#ifdef MACH_KERNEL_PRIVATE
extern void hw_lock_init(
hw_lock_t);
uint64_t,
lck_grp_t*);
+extern unsigned int hw_lock_to_nopreempt(
+ hw_lock_t,
+ uint64_t,
+ lck_grp_t*);
+
extern unsigned int hw_lock_try(
hw_lock_t,
lck_grp_t*);
extern void hw_lock_lock(
hw_lock_t);
-
-#define hw_lock_lock(lck, grp) hw_lock_lock(lck)
+#define hw_lock_lock(lck, grp) \
+ hw_lock_lock(lck)
extern void hw_lock_lock_nopreempt(
hw_lock_t);
-#define hw_lock_lock_nopreempt(lck, grp) hw_lock_lock_nopreempt(lck)
+#define hw_lock_lock_nopreempt(lck, grp) \
+ hw_lock_lock_nopreempt(lck)
extern unsigned int hw_lock_to(
hw_lock_t,
uint64_t);
-#define hw_lock_to(lck, timeout, grp) hw_lock_to(lck, timeout)
+#define hw_lock_to(lck, timeout, grp) \
+ hw_lock_to(lck, timeout)
+
+extern unsigned int hw_lock_to_nopreempt(
+ hw_lock_t,
+ uint64_t);
+#define hw_lock_to_nopreempt(lck, timeout, grp) \
+ hw_lock_to_nopreempt(lck, timeout)
extern unsigned int hw_lock_try(
hw_lock_t);
-#define hw_lock_try(lck, grp) hw_lock_try(lck)
+#define hw_lock_try(lck, grp) \
+ hw_lock_try(lck)
extern unsigned int hw_lock_try_nopreempt(
hw_lock_t);
-#define hw_lock_try_nopreempt(lck, grp) hw_lock_try_nopreempt(lck)
-
+#define hw_lock_try_nopreempt(lck, grp) \
+ hw_lock_try_nopreempt(lck)
#endif /* LOCK_STATS */
enum memory_order ord,
boolean_t wait);
+extern void usimple_unlock_nopreempt(
+ usimple_lock_t);
+
#endif /* MACH_KERNEL_PRIVATE */
-#if XNU_KERNEL_PRIVATE
struct usimple_lock_startup_spec {
usimple_lock_t lck;
STARTUP_ARG(LOCKS_EARLY, STARTUP_RANK_FOURTH, usimple_lock_startup_init, \
&__startup_usimple_lock_spec_ ## var)
-#endif /* XNU_KERNEL_PRIVATE */
-
-__BEGIN_DECLS
-
extern void * hw_wait_while_equals(
void **address,
void *current);
uint64_t,
lck_grp_t*);
#endif
-
#else
extern void usimple_lock(
usimple_lock_t);
-#define usimple_lock(lck, grp) usimple_lock(lck)
+#define usimple_lock(lck, grp) \
+ usimple_lock(lck)
extern unsigned int usimple_lock_try(
usimple_lock_t);
-
-#define usimple_lock_try(lck, grp) usimple_lock_try(lck)
+#define usimple_lock_try(lck, grp) \
+ usimple_lock_try(lck)
extern void usimple_lock_try_lock_loop(
usimple_lock_t);
-#define usimple_lock_try_lock_loop(lck, grp) usimple_lock_try_lock_loop(lck)
+#define usimple_lock_try_lock_loop(lck, grp) \
+ usimple_lock_try_lock_loop(lck)
#if defined(__x86_64__)
extern unsigned int usimple_lock_try_lock_mp_signal_safe_loop_deadline(
usimple_lock_t,
uint64_t);
-#define usimple_lock_try_lock_mp_signal_safe_loop_deadline(lck, ddl, grp) usimple_lock_try_lock_mp_signal_safe_loop_deadline(lck, ddl)
+#define usimple_lock_try_lock_mp_signal_safe_loop_deadline(lck, ddl, grp) \
+ usimple_lock_try_lock_mp_signal_safe_loop_deadline(lck, ddl)
extern unsigned int usimple_lock_try_lock_mp_signal_safe_loop_duration(
usimple_lock_t,
uint64_t);
-#define usimple_lock_try_lock_mp_signal_safe_loop_duration(lck, dur, grp) usimple_lock_try_lock_mp_signal_safe_loop_duration(lck, dur)
+#define usimple_lock_try_lock_mp_signal_safe_loop_duration(lck, dur, grp) \
+ usimple_lock_try_lock_mp_signal_safe_loop_duration(lck, dur)
#endif
#endif /* LOCK_STATS */
usimple_lock_t);
-__END_DECLS
-
-#define ETAP_NO_TRACE 0
-#define ETAP_IO_AHA 0
-
/*
* If we got to here and we still don't have simple_lock_init
* defined, then we must either be outside the osfmk component,
* running on a true SMP, or need debug.
*/
#if !defined(simple_lock_init)
-#define simple_lock_init(l, t) usimple_lock_init(l,t)
-#define simple_lock(l, grp) usimple_lock(l, grp)
-#define simple_unlock(l) usimple_unlock(l)
-#define simple_lock_try(l, grp) usimple_lock_try(l, grp)
+#define simple_lock_init(l, t) usimple_lock_init(l,t)
+#define simple_lock(l, grp) usimple_lock(l, grp)
+#define simple_unlock(l) usimple_unlock(l)
+#define simple_lock_try(l, grp) usimple_lock_try(l, grp)
#define simple_lock_try_lock_loop(l, grp) usimple_lock_try_lock_loop(l, grp)
-#define simple_lock_try_lock_mp_signal_safe_loop_deadline(l, ddl, grp) usimple_lock_try_lock_mp_signal_safe_loop_deadline(l, ddl, grp)
-#define simple_lock_try_lock_mp_signal_safe_loop_duration(l, dur, grp) usimple_lock_try_lock_mp_signal_safe_loop_duration(l, dur, grp)
+#define simple_lock_try_lock_mp_signal_safe_loop_deadline(l, ddl, grp) \
+ usimple_lock_try_lock_mp_signal_safe_loop_deadline(l, ddl, grp)
+#define simple_lock_try_lock_mp_signal_safe_loop_duration(l, dur, grp) \
+ usimple_lock_try_lock_mp_signal_safe_loop_duration(l, dur, grp)
#define simple_lock_addr(l) (&(l))
#endif /* !defined(simple_lock_init) */
extern void hw_lock_bit(
hw_lock_bit_t *,
unsigned int);
-#define hw_lock_bit(lck, bit, grp) hw_lock_bit(lck, bit)
+#define hw_lock_bit(lck, bit, grp) \
+ hw_lock_bit(lck, bit)
extern void hw_lock_bit_nopreempt(
hw_lock_bit_t *,
unsigned int);
-#define hw_lock_bit_nopreempt(lck, bit, grp) hw_lock_bit_nopreempt(lck, bit)
+#define hw_lock_bit_nopreempt(lck, bit, grp) \
+ hw_lock_bit_nopreempt(lck, bit)
extern unsigned int hw_lock_bit_try(
hw_lock_bit_t *,
unsigned int);
-#define hw_lock_bit_try(lck, bit, grp) hw_lock_bit_try(lck, bit)
+#define hw_lock_bit_try(lck, bit, grp) \
+ hw_lock_bit_try(lck, bit)
extern unsigned int hw_lock_bit_to(
hw_lock_bit_t *,
unsigned int,
uint32_t);
-#define hw_lock_bit_to(lck, bit, timeout, grp) hw_lock_bit_to(lck, bit, timeout)
+#define hw_lock_bit_to(lck, bit, timeout, grp) \
+ hw_lock_bit_to(lck, bit, timeout)
#endif /* LOCK_STATS */
hw_lock_bit_t *,
unsigned int);
-#define hw_lock_bit_held(l, b) (((*(l))&(1<<b))!=0)
+#define hw_lock_bit_held(l, b) \
+ (((*(l)) & (1 << (b))) != 0)
#endif /* MACH_KERNEL_PRIVATE */
+__END_DECLS
+
+#pragma GCC visibility pop
+
+#endif /* XNU_KERNEL_PRIVATE */
#endif /*!_KERN_SIMPLE_LOCK_H_*/
#endif /* KERNEL_PRIVATE */
extern void bsd_scale_setup(int);
extern unsigned int semaphore_max;
extern void stackshot_init(void);
-extern void ktrace_init(void);
/*
* Running in virtual memory, on the interrupt stack.
[STARTUP_SUB_CODESIGNING] = "codesigning",
[STARTUP_SUB_OSLOG] = "oslog",
[STARTUP_SUB_MACH_IPC] = "mach_ipc",
+ [STARTUP_SUB_SYSCTL] = "sysctl",
[STARTUP_SUB_EARLY_BOOT] = "early_boot",
/* LOCKDOWN is special and its value won't fit here. */
bootprofile_init();
#endif
-#if (defined(__i386__) || defined(__x86_64__)) && CONFIG_VMX
- vmx_init();
-#endif
-
- kernel_bootstrap_thread_log("ktrace_init");
- ktrace_init();
-
char trace_typefilter[256] = {};
PE_parse_boot_arg_str("trace_typefilter", trace_typefilter,
sizeof(trace_typefilter));
kdebug_init(new_nkdbufs, trace_typefilter,
(trace_wrap ? KDOPT_WRAPPING : 0) | KDOPT_ATBOOT);
-#ifdef MACH_BSD
- kernel_bootstrap_log("bsd_early_init");
- bsd_early_init();
-#endif
+ kernel_startup_initialize_upto(STARTUP_SUB_SYSCTL);
#ifdef IOKIT
kernel_bootstrap_log("PE_init_iokit");
#elif defined(__x86_64__)
/* Intel doesn't have a __BOOTDATA but doesn't protect __KLD */
#define STARTUP_CODE_SEGSECT "__TEXT,__text"
-#define STARTUP_DATA_SEGSECT "__KLD,__init"
-#define STARTUP_HOOK_SEGMENT "__KLD"
+#define STARTUP_DATA_SEGSECT "__KLDDATA,__init"
+#define STARTUP_HOOK_SEGMENT "__KLDDATA"
#define STARTUP_HOOK_SECTION "__init_entry_set"
#else
/* arm protects __KLD early, so use __BOOTDATA for data */
*/
#define __startup_func \
__PLACE_IN_SECTION(STARTUP_CODE_SEGSECT) \
- __attribute__((noinline, visibility("hidden")))
+ __attribute__((cold, visibility("hidden")))
/*!
* @macro __startup_data
static __startup_data struct startup_tunable_spec \
__startup_TUNABLES_spec_ ## var = { \
.name = __startup_TUNABLES_name_ ## var, \
- .var_addr = &var, \
+ .var_addr = (void *)&var, \
.var_len = sizeof(type_t), \
.var_is_bool = __builtin_types_compatible_p(bool, type_t), \
}; \
__STARTUP_ARG(var, __LINE__, TUNABLES, STARTUP_RANK_FIRST, \
kernel_startup_tunable_init, &__startup_TUNABLES_spec_ ## var)
+#ifdef __cplusplus
+#define __STARTUP_FUNC_CAST(func, a) \
+ (void(*)(const void *))func
+#else
+#define __STARTUP_FUNC_CAST(func, a) \
+ (typeof(func(a))(*)(const void *))func
+#endif
+
#define __STARTUP1(name, line, subsystem, rank, func, a, b) \
__PLACE_IN_SECTION(STARTUP_HOOK_SEGMENT "," STARTUP_HOOK_SECTION) \
static const struct startup_entry \
__startup_ ## subsystem ## _entry_ ## name ## _ ## line = { \
STARTUP_SUB_ ## subsystem, \
- rank, (typeof(func(a))(*)(const void *))func, b, \
+ rank, __STARTUP_FUNC_CAST(func, a), b, \
}
#define __STARTUP(name, line, subsystem, rank, func) \
/* BSD subsystem initialization */
extern void bsd_init(void);
-extern void bsd_early_init(void);
#endif /* MACH_BSD */
ip_lock(port);
assert(ip_kotype(port) == IKOT_SUID_CRED);
- sc = (suid_cred_t)port->ip_kobject;
+ sc = (suid_cred_t)ipc_kobject_get(port);
ipc_kobject_set_atomically(port, IKO_NULL, IKOT_NONE);
ip_unlock(port);
}
if (!ipc_kobject_make_send_lazy_alloc_port(&sc->port,
- (ipc_kobject_t) sc, IKOT_SUID_CRED, false, 0)) {
+ (ipc_kobject_t) sc, IKOT_SUID_CRED, IPC_KOBJECT_ALLOC_NONE, false, 0)) {
suid_cred_free(sc);
return IP_NULL;
}
return -1;
}
- sc = (suid_cred_t)port->ip_kobject;
+ sc = (suid_cred_t)ipc_kobject_get(port);
if (vnode != sc->vnode) {
ip_unlock(port);
* the new semaphore to the task's semaphore list.
*/
task_lock(task);
+ /* Check for race with task_terminate */
+ if (!task->active) {
+ task_unlock(task);
+ zfree(semaphore_zone, s);
+ return KERN_INVALID_TASK;
+ }
enqueue_head(&task->semaphore_list, (queue_entry_t) s);
task->semaphores_owned++;
task_unlock(task);
#include <mach/thread_switch.h>
#include <ipc/ipc_port.h>
#include <ipc/ipc_space.h>
-#include <kern/counters.h>
+#include <kern/counter.h>
#include <kern/ipc_kobject.h>
#include <kern/processor.h>
#include <kern/sched.h>
}
enable_preemption();
- counter(c_swtch_block++);
-
thread_yield_with_continuation((thread_continue_t)swtch_continue, NULL);
}
}
enable_preemption();
- counter(c_swtch_pri_block++);
-
thread_depress_abstime(thread_depress_time);
thread_yield_with_continuation((thread_continue_t)swtch_pri_continue, NULL);
/* 10 */ MACH_TRAP(_kernelrpc_mach_vm_allocate_trap, 4, 5, munge_wwlw),
/* 11 */ MACH_TRAP(_kernelrpc_mach_vm_purgable_control_trap, 4, 5, munge_wlww),
/* 12 */ MACH_TRAP(_kernelrpc_mach_vm_deallocate_trap, 3, 5, munge_wll),
-/* 13 */ MACH_TRAP(kern_invalid, 0, 0, NULL),
+/* 13 */ MACH_TRAP(task_dyld_process_info_notify_get_trap, 2, 4, munge_ll),
/* 14 */ MACH_TRAP(_kernelrpc_mach_vm_protect_trap, 5, 7, munge_wllww),
/* 15 */ MACH_TRAP(_kernelrpc_mach_vm_map_trap, 6, 8, munge_wwllww),
/* 16 */ MACH_TRAP(_kernelrpc_mach_port_allocate_trap, 3, 3, munge_www),
/* 127 */ MACH_TRAP(kern_invalid, 0, 0, NULL),
};
-const char * mach_syscall_name_table[MACH_TRAP_TABLE_COUNT] = {
+const char * const mach_syscall_name_table[MACH_TRAP_TABLE_COUNT] = {
/* 0 */ "kern_invalid",
/* 1 */ "kern_invalid",
/* 2 */ "kern_invalid",
/* 10 */ "_kernelrpc_mach_vm_allocate_trap",
/* 11 */ "kern_invalid",
/* 12 */ "_kernelrpc_mach_vm_deallocate_trap",
-/* 13 */ "kern_invalid",
+/* 13 */ "task_dyld_process_info_notify_get_trap",
/* 14 */ "_kernelrpc_mach_vm_protect_trap",
/* 15 */ "_kernelrpc_mach_vm_map_trap",
/* 16 */ "_kernelrpc_mach_port_allocate_trap",
/* 127 */ "kern_invalid",
};
-int mach_trap_count = (sizeof(mach_trap_table) / sizeof(mach_trap_table[0]));
+const int mach_trap_count = (sizeof(mach_trap_table) / sizeof(mach_trap_table[0]));
kern_return_t
kern_invalid(
extern const mach_trap_t mach_trap_table[];
-extern int mach_trap_count;
+extern const int mach_trap_count;
#if CONFIG_REQUIRES_U32_MUNGING || (__arm__ && (__BIGGEST_ALIGNMENT__ > 4))
#include <libkern/section_keywords.h>
#include <mach-o/loader.h>
+#include <kdp/kdp_dyld.h>
#include <kern/sfi.h> /* picks up ledger.h */
ledger_template_t task_ledger_template = NULL;
+/* global lock for task_dyld_process_info_notify_{register, deregister, get_trap} */
+LCK_GRP_DECLARE(g_dyldinfo_mtx_grp, "g_dyldinfo");
+LCK_MTX_DECLARE(g_dyldinfo_mtx, &g_dyldinfo_mtx_grp);
+
SECURITY_READ_ONLY_LATE(struct _task_ledger_indices) task_ledgers __attribute__((used)) =
{.cpu_time = -1,
.tkm_private = -1,
return KERN_RESOURCE_SHORTAGE;
}
+ counter_alloc(&(new_task->faults));
+
#if defined(HAS_APPLE_PAC)
ml_task_set_rop_pid(new_task, parent_task, inherit_memory);
ml_task_set_jop_pid(new_task, parent_task, inherit_memory);
new_task->requested_policy = default_task_requested_policy;
new_task->effective_policy = default_task_effective_policy;
+ new_task->task_shared_region_slide = -1;
+
task_importance_init_from_parent(new_task, parent_task);
if (parent_task != TASK_NULL) {
new_task->total_system_time = 0;
new_task->total_ptime = 0;
new_task->total_runnable_time = 0;
- new_task->faults = 0;
new_task->pageins = 0;
new_task->cow_faults = 0;
new_task->messages_sent = 0;
to_task->total_system_time = from_task->total_system_time;
to_task->total_ptime = from_task->total_ptime;
to_task->total_runnable_time = from_task->total_runnable_time;
- to_task->faults = from_task->faults;
+ counter_add(&to_task->faults, counter_load(&from_task->faults));
to_task->pageins = from_task->pageins;
to_task->cow_faults = from_task->cow_faults;
to_task->decompressions = from_task->decompressions;
btlog_remove_entries_for_element(task_ref_btlog, task);
#endif
+ counter_free(&task->faults);
+
#if CONFIG_COALITIONS
task_release_coalitions(task);
#endif /* CONFIG_COALITIONS */
task_add_to_corpse_task_list(task);
task_start_halt(task);
- thread_terminate_internal(self_thread);
+ thread_terminate_internal(self_thread, TH_TERMINATE_OPTION_NONE);
(void) thread_interrupt_level(wsave);
assert(task->halting == TRUE);
{
thread_mtx_lock(th_iter);
th_iter->inspection = FALSE;
+ ipc_thread_disable(th_iter);
thread_mtx_unlock(th_iter);
}
ip_unlock(port);
return;
}
- task = (task_t)port->ip_kobject;
+ task = (task_t)ipc_kobject_get(port);
kotype = ip_kotype(port);
if (task != TASK_NULL) {
assert((IKOT_TASK_READ == kotype) || (IKOT_TASK_INSPECT == kotype));
return;
}
+ if (kotype == IKOT_TASK_READ) {
+ flavor = TASK_FLAVOR_READ;
+ } else {
+ flavor = TASK_FLAVOR_INSPECT;
+ }
+
itk_lock(task);
ip_lock(port);
- require_ip_active(port);
/*
+ * If the port is no longer active, then ipc_task_terminate() ran
+ * and destroyed the kobject already. Just deallocate the task
+ * ref we took and go away.
+ *
+ * It is also possible that several nsrequests are in flight,
+ * only one shall NULL-out the port entry, and this is the one
+ * that gets to dealloc the port.
+ *
* Check for a stale no-senders notification. A call to any function
* that vends out send rights to this port could resurrect it between
* this notification being generated and actually being handled here.
*/
- if (port->ip_srights > 0) {
+ if (!ip_active(port) ||
+ task->itk_task_ports[flavor] != port ||
+ port->ip_srights > 0) {
ip_unlock(port);
itk_unlock(task);
task_deallocate(task);
return;
}
- if (kotype == IKOT_TASK_READ) {
- flavor = TASK_FLAVOR_READ;
- } else {
- flavor = TASK_FLAVOR_INSPECT;
- }
- assert(task->itk_self[flavor] == port);
- task->itk_self[flavor] = IP_NULL;
- port->ip_kobject = IKOT_NONE;
+ assert(task->itk_task_ports[flavor] == port);
+ task->itk_task_ports[flavor] = IP_NULL;
+
+ ipc_kobject_set_atomically(port, IKO_NULL, IKOT_NONE);
ip_unlock(port);
itk_unlock(task);
task_deallocate(task);
* Terminate each thread in the task.
*/
queue_iterate(&task->threads, thread, thread_t, task_threads) {
- thread_terminate_internal(thread);
+ thread_terminate_internal(thread, TH_TERMINATE_OPTION_NONE);
}
#ifdef MACH_BSD
thread_mtx_unlock(thread);
}
if (thread != self) {
- thread_terminate_internal(thread);
+ thread_terminate_internal(thread, TH_TERMINATE_OPTION_NONE);
}
}
task->dispatchqueue_offset = dispatchqueue_offset;
return KERN_INVALID_ARGUMENT;
}
+ assert(flavor <= THREAD_FLAVOR_INSPECT);
+
for (;;) {
task_lock(task);
if (!task->active) {
switch (flavor) {
case THREAD_FLAVOR_CONTROL:
- for (i = 0; i < actual; ++i) {
- ((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
+ if (task == current_task()) {
+ for (i = 0; i < actual; ++i) {
+ ((ipc_port_t *) thread_list)[i] = convert_thread_to_port_pinned(thread_list[i]);
+ }
+ } else {
+ for (i = 0; i < actual; ++i) {
+ ((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
+ }
}
break;
case THREAD_FLAVOR_READ:
((ipc_port_t *) thread_list)[i] = convert_thread_inspect_to_port(thread_list[i]);
}
break;
- default:
- return KERN_INVALID_ARGUMENT;
}
}
* notification on that port (if none outstanding).
*/
(void)ipc_kobject_make_send_lazy_alloc_port((ipc_port_t *) &task->itk_resume,
- (ipc_kobject_t)task, IKOT_TASK_RESUME, true, OS_PTRAUTH_DISCRIMINATOR("task.itk_resume"));
+ (ipc_kobject_t)task, IKOT_TASK_RESUME, IPC_KOBJECT_ALLOC_NONE, true,
+ OS_PTRAUTH_DISCRIMINATOR("task.itk_resume"));
port = task->itk_resume;
task_unlock(task);
* but we'll look it up when calling a traditional resume. Any IPC operations that
* deallocate the send right will auto-release the suspension.
*/
- if ((kr = ipc_kmsg_copyout_object(current_task()->itk_space, ip_to_object(port),
- MACH_MSG_TYPE_MOVE_SEND, NULL, NULL, &name)) != KERN_SUCCESS) {
- printf("warning: %s(%d) failed to copyout suspension token for pid %d with error: %d\n",
- proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
+ if (IP_VALID(port)) {
+ kr = ipc_object_copyout(current_space(), ip_to_object(port),
+ MACH_MSG_TYPE_MOVE_SEND, IPC_OBJECT_COPYOUT_FLAGS_NONE,
+ NULL, NULL, &name);
+ } else {
+ kr = KERN_SUCCESS;
+ }
+ if (kr != KERN_SUCCESS) {
+ printf("warning: %s(%d) failed to copyout suspension "
+ "token for pid %d with error: %d\n",
+ proc_name_address(current_task()->bsd_info),
+ proc_pid(current_task()->bsd_info),
task_pid(task), kr);
- return kr;
}
return kr;
{
kern_return_t error = KERN_SUCCESS;
mach_msg_type_number_t original_task_info_count;
+ bool is_kernel_task = (task == kernel_task);
if (task == TASK_NULL) {
return KERN_INVALID_ARGUMENT;
events_info = (task_events_info_t) task_info_out;
- events_info->faults = task->faults;
+ events_info->faults = (int32_t) MIN(counter_load(&task->faults), INT32_MAX);
events_info->pageins = task->pageins;
events_info->cow_faults = task->cow_faults;
events_info->messages_sent = task->messages_sent;
vm_info = (task_vm_info_t)task_info_out;
- if (task == kernel_task) {
+ /*
+ * Do not hold both the task and map locks,
+ * so convert the task lock into a map reference,
+ * drop the task lock, then lock the map.
+ */
+ if (is_kernel_task) {
map = kernel_map;
- /* no lock */
+ task_unlock(task);
+ /* no lock, no reference */
} else {
map = task->map;
+ vm_map_reference(map);
+ task_unlock(task);
vm_map_lock_read(map);
}
vm_info->purgeable_volatile_pmap = 0;
vm_info->purgeable_volatile_resident = 0;
vm_info->purgeable_volatile_virtual = 0;
- if (task == kernel_task) {
+ if (is_kernel_task) {
/*
* We do not maintain the detailed stats for the
* kernel_pmap, so just count everything as
}
*task_info_count = TASK_VM_INFO_REV0_COUNT;
+ if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
+ /* must be captured while we still have the map lock */
+ vm_info->min_address = map->min_offset;
+ vm_info->max_address = map->max_offset;
+ }
+
+ /*
+ * Done with vm map things, can drop the map lock and reference,
+ * and take the task lock back.
+ *
+ * Re-validate that the task didn't die on us.
+ */
+ if (!is_kernel_task) {
+ vm_map_unlock_read(map);
+ vm_map_deallocate(map);
+ }
+ map = VM_MAP_NULL;
+
+ task_lock(task);
+
+ if ((task != current_task()) && (!task->active)) {
+ error = KERN_INVALID_ARGUMENT;
+ break;
+ }
+
if (original_task_info_count >= TASK_VM_INFO_REV1_COUNT) {
vm_info->phys_footprint =
(mach_vm_size_t) get_task_phys_footprint(task);
*task_info_count = TASK_VM_INFO_REV1_COUNT;
}
if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
- vm_info->min_address = map->min_offset;
- vm_info->max_address = map->max_offset;
+ /* data was captured above */
*task_info_count = TASK_VM_INFO_REV2_COUNT;
}
+
if (original_task_info_count >= TASK_VM_INFO_REV3_COUNT) {
ledger_get_lifetime_max(task->ledger,
task_ledgers.phys_footprint,
*task_info_count = TASK_VM_INFO_REV5_COUNT;
}
- if (task != kernel_task) {
- vm_map_unlock_read(map);
- }
-
break;
}
* checks on task_port.
*
* In the case of TASK_DYLD_INFO, we require the more
- * privileged task_port not the less-privileged task_name_port.
+ * privileged task_read_port not the less-privileged task_name_port.
*
*/
kern_return_t
kern_return_t ret;
if (flavor == TASK_DYLD_INFO) {
- task = convert_port_to_task(task_port);
+ task = convert_port_to_task_read(task_port);
} else {
task = convert_port_to_task_name(task_port);
}
return ret;
}
+/*
+ * Routine: task_dyld_process_info_update_helper
+ *
+ * Release send rights in release_ports.
+ *
+ * If no active ports found in task's dyld notifier array, unset the magic value
+ * in user space to indicate so.
+ *
+ * Condition:
+ * task's itk_lock is locked, and is unlocked upon return.
+ * Global g_dyldinfo_mtx is locked, and is unlocked upon return.
+ */
+void
+task_dyld_process_info_update_helper(
+ task_t task,
+ size_t active_count,
+ vm_map_address_t magic_addr, /* a userspace address */
+ ipc_port_t *release_ports,
+ size_t release_count)
+{
+ void *notifiers_ptr = NULL;
+
+ assert(release_count <= DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT);
+
+ if (active_count == 0) {
+ assert(task->itk_dyld_notify != NULL);
+ notifiers_ptr = task->itk_dyld_notify;
+ task->itk_dyld_notify = NULL;
+ itk_unlock(task);
+
+ kfree(notifiers_ptr, (vm_size_t)sizeof(ipc_port_t) * DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT);
+ (void)copyoutmap_atomic32(task->map, MACH_PORT_NULL, magic_addr); /* unset magic */
+ } else {
+ itk_unlock(task);
+ (void)copyoutmap_atomic32(task->map, (mach_port_name_t)DYLD_PROCESS_INFO_NOTIFY_MAGIC,
+ magic_addr); /* reset magic */
+ }
+
+ lck_mtx_unlock(&g_dyldinfo_mtx);
+
+ for (size_t i = 0; i < release_count; i++) {
+ ipc_port_release_send(release_ports[i]);
+ }
+}
+
+/*
+ * Routine: task_dyld_process_info_notify_register
+ *
+ * Insert a send right to target task's itk_dyld_notify array. Allocate kernel
+ * memory for the array if it's the first port to be registered. Also cleanup
+ * any dead rights found in the array.
+ *
+ * Consumes sright if returns KERN_SUCCESS, otherwise MIG will destroy it.
+ *
+ * Args:
+ * task: Target task for the registration.
+ * sright: A send right.
+ *
+ * Returns:
+ * KERN_SUCCESS: Registration succeeded.
+ * KERN_INVALID_TASK: task is invalid.
+ * KERN_INVALID_RIGHT: sright is invalid.
+ * KERN_DENIED: Security policy denied this call.
+ * KERN_RESOURCE_SHORTAGE: Kernel memory allocation failed.
+ * KERN_NO_SPACE: No available notifier port slot left for this task.
+ * KERN_RIGHT_EXISTS: The notifier port is already registered and active.
+ *
+ * Other error code see task_info().
+ *
+ * See Also:
+ * task_dyld_process_info_notify_get_trap() in mach_kernelrpc.c
+ */
+kern_return_t
+task_dyld_process_info_notify_register(
+ task_t task,
+ ipc_port_t sright)
+{
+ struct task_dyld_info dyld_info;
+ mach_msg_type_number_t info_count = TASK_DYLD_INFO_COUNT;
+ ipc_port_t release_ports[DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT];
+ uint32_t release_count = 0, active_count = 0;
+ mach_vm_address_t ports_addr; /* a user space address */
+ kern_return_t kr;
+ boolean_t right_exists = false;
+ ipc_port_t *notifiers_ptr = NULL;
+ ipc_port_t *portp;
+
+ if (task == TASK_NULL || task == kernel_task) {
+ return KERN_INVALID_TASK;
+ }
+
+ if (!IP_VALID(sright)) {
+ return KERN_INVALID_RIGHT;
+ }
+
+#if CONFIG_MACF
+ if (mac_task_check_dyld_process_info_notify_register()) {
+ return KERN_DENIED;
+ }
+#endif
+
+ kr = task_info(task, TASK_DYLD_INFO, (task_info_t)&dyld_info, &info_count);
+ if (kr) {
+ return kr;
+ }
+
+ if (dyld_info.all_image_info_format == TASK_DYLD_ALL_IMAGE_INFO_32) {
+ ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
+ offsetof(struct user32_dyld_all_image_infos, notifyMachPorts));
+ } else {
+ ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
+ offsetof(struct user64_dyld_all_image_infos, notifyMachPorts));
+ }
+
+ if (task->itk_dyld_notify == NULL) {
+ notifiers_ptr = (ipc_port_t *)
+ kalloc_flags(sizeof(ipc_port_t) * DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT, Z_ZERO);
+ if (!notifiers_ptr) {
+ return KERN_RESOURCE_SHORTAGE;
+ }
+ }
+
+ lck_mtx_lock(&g_dyldinfo_mtx);
+ itk_lock(task);
+
+ if (task->itk_dyld_notify == NULL) {
+ task->itk_dyld_notify = notifiers_ptr;
+ notifiers_ptr = NULL;
+ }
+
+ assert(task->itk_dyld_notify != NULL);
+ /* First pass: clear dead names and check for duplicate registration */
+ for (int slot = 0; slot < DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT; slot++) {
+ portp = &task->itk_dyld_notify[slot];
+ if (*portp != IPC_PORT_NULL && !ip_active(*portp)) {
+ release_ports[release_count++] = *portp;
+ *portp = IPC_PORT_NULL;
+ } else if (*portp == sright) {
+ /* the port is already registered and is active */
+ right_exists = true;
+ }
+
+ if (*portp != IPC_PORT_NULL) {
+ active_count++;
+ }
+ }
+
+ if (right_exists) {
+ /* skip second pass */
+ kr = KERN_RIGHT_EXISTS;
+ goto out;
+ }
+
+ /* Second pass: register the port */
+ kr = KERN_NO_SPACE;
+ for (int slot = 0; slot < DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT; slot++) {
+ portp = &task->itk_dyld_notify[slot];
+ if (*portp == IPC_PORT_NULL) {
+ *portp = sright;
+ active_count++;
+ kr = KERN_SUCCESS;
+ break;
+ }
+ }
+
+out:
+ assert(active_count > 0);
+
+ task_dyld_process_info_update_helper(task, active_count,
+ (vm_map_address_t)ports_addr, release_ports, release_count);
+ /* itk_lock, g_dyldinfo_mtx are unlocked upon return */
+
+ if (notifiers_ptr) {
+ kfree(notifiers_ptr, sizeof(ipc_port_t) * DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT);
+ }
+
+ return kr;
+}
+
+/*
+ * Routine: task_dyld_process_info_notify_deregister
+ *
+ * Remove a send right in target task's itk_dyld_notify array matching the receive
+ * right name passed in. Deallocate kernel memory for the array if it's the last port to
+ * be deregistered, or all ports have died. Also cleanup any dead rights found in the array.
+ *
+ * Does not consume any reference.
+ *
+ * Args:
+ * task: Target task for the deregistration.
+ * rcv_name: The name denoting the receive right in caller's space.
+ *
+ * Returns:
+ * KERN_SUCCESS: A matching entry found and degistration succeeded.
+ * KERN_INVALID_TASK: task is invalid.
+ * KERN_INVALID_NAME: name is invalid.
+ * KERN_DENIED: Security policy denied this call.
+ * KERN_FAILURE: A matching entry is not found.
+ * KERN_INVALID_RIGHT: The name passed in does not represent a valid rcv right.
+ *
+ * Other error code see task_info().
+ *
+ * See Also:
+ * task_dyld_process_info_notify_get_trap() in mach_kernelrpc.c
+ */
+kern_return_t
+task_dyld_process_info_notify_deregister(
+ task_t task,
+ mach_port_name_t rcv_name)
+{
+ struct task_dyld_info dyld_info;
+ mach_msg_type_number_t info_count = TASK_DYLD_INFO_COUNT;
+ ipc_port_t release_ports[DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT];
+ uint32_t release_count = 0, active_count = 0;
+ boolean_t port_found = false;
+ mach_vm_address_t ports_addr; /* a user space address */
+ ipc_port_t sright;
+ kern_return_t kr;
+ ipc_port_t *portp;
+
+ if (task == TASK_NULL || task == kernel_task) {
+ return KERN_INVALID_TASK;
+ }
+
+ if (!MACH_PORT_VALID(rcv_name)) {
+ return KERN_INVALID_NAME;
+ }
+
+#if CONFIG_MACF
+ if (mac_task_check_dyld_process_info_notify_register()) {
+ return KERN_DENIED;
+ }
+#endif
+
+ kr = task_info(task, TASK_DYLD_INFO, (task_info_t)&dyld_info, &info_count);
+ if (kr) {
+ return kr;
+ }
+
+ if (dyld_info.all_image_info_format == TASK_DYLD_ALL_IMAGE_INFO_32) {
+ ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
+ offsetof(struct user32_dyld_all_image_infos, notifyMachPorts));
+ } else {
+ ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
+ offsetof(struct user64_dyld_all_image_infos, notifyMachPorts));
+ }
+
+ kr = ipc_port_translate_receive(current_space(), rcv_name, &sright); /* does not produce port ref */
+ if (kr) {
+ return KERN_INVALID_RIGHT;
+ }
+
+ ip_reference(sright);
+ ip_unlock(sright);
+
+ assert(sright != IPC_PORT_NULL);
+
+ lck_mtx_lock(&g_dyldinfo_mtx);
+ itk_lock(task);
+
+ if (task->itk_dyld_notify == NULL) {
+ itk_unlock(task);
+ lck_mtx_unlock(&g_dyldinfo_mtx);
+ ip_release(sright);
+ return KERN_FAILURE;
+ }
+
+ for (int slot = 0; slot < DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT; slot++) {
+ portp = &task->itk_dyld_notify[slot];
+ if (*portp == sright) {
+ release_ports[release_count++] = *portp;
+ *portp = IPC_PORT_NULL;
+ port_found = true;
+ } else if ((*portp != IPC_PORT_NULL && !ip_active(*portp))) {
+ release_ports[release_count++] = *portp;
+ *portp = IPC_PORT_NULL;
+ }
+
+ if (*portp != IPC_PORT_NULL) {
+ active_count++;
+ }
+ }
+
+ task_dyld_process_info_update_helper(task, active_count,
+ (vm_map_address_t)ports_addr, release_ports, release_count);
+ /* itk_lock, g_dyldinfo_mtx are unlocked upon return */
+
+ ip_release(sright);
+
+ return port_found ? KERN_SUCCESS : KERN_FAILURE;
+}
+
/*
* task_power_info
*
#ifdef XNU_KERNEL_PRIVATE
#include <kern/kern_cdata.h>
#include <mach/sfi_class.h>
+#include <kern/counter.h>
#include <kern/queue.h>
#include <sys/kern_sysctl.h>
#endif /* XNU_KERNEL_PRIVATE */
struct task {
/* Synchronization/destruction information */
- decl_lck_mtx_data(, lock); /* Task's lock */
+ decl_lck_mtx_data(, lock); /* Task's lock */
os_refcnt_t ref_count; /* Number of references to me */
- boolean_t active; /* Task has not been terminated */
- boolean_t halting; /* Task is being halted */
- boolean_t message_app_suspended; /* Let iokit know when pidsuspended */
+ bool active; /* Task has not been terminated */
+ bool ipc_active; /* IPC with the task ports is allowed */
+ bool halting; /* Task is being halted */
+ bool message_app_suspended; /* Let iokit know when pidsuspended */
/* Virtual timers */
uint32_t vtimers;
* Different flavors of task port.
* These flavors TASK_FLAVOR_* are defined in mach_types.h
*/
- struct ipc_port * XNU_PTRAUTH_SIGNED_PTR("task.itk_self") itk_self[TASK_SELF_PORT_COUNT]; /* does not hold right */
- struct ipc_port * XNU_PTRAUTH_SIGNED_PTR("task.itk_settable_self") itk_settable_self; /* a send right */
+ struct ipc_port * XNU_PTRAUTH_SIGNED_PTR("task.itk_task_ports") itk_task_ports[TASK_SELF_PORT_COUNT];
+ struct ipc_port * XNU_PTRAUTH_SIGNED_PTR("task.itk_settable_self") itk_settable_self; /* a send right */
+ struct ipc_port * XNU_PTRAUTH_SIGNED_PTR("task.itk_self") itk_self; /* immovable/pinned task port, does not hold right */
struct exception_action exc_actions[EXC_TYPES_COUNT];
/* a send right each valid element */
- struct ipc_port * XNU_PTRAUTH_SIGNED_PTR("task.itk_host") itk_host; /* a send right */
- struct ipc_port * XNU_PTRAUTH_SIGNED_PTR("task.itk_bootstrap") itk_bootstrap; /* a send right */
- struct ipc_port * XNU_PTRAUTH_SIGNED_PTR("task.itk_seatbelt") itk_seatbelt; /* a send right */
- struct ipc_port * XNU_PTRAUTH_SIGNED_PTR("task.itk_gssd") itk_gssd; /* yet another send right */
- struct ipc_port * XNU_PTRAUTH_SIGNED_PTR("task.itk_debug_control") itk_debug_control; /* send right for debugmode communications */
- struct ipc_port * XNU_PTRAUTH_SIGNED_PTR("task.itk_task_access") itk_task_access; /* and another send right */
- struct ipc_port * XNU_PTRAUTH_SIGNED_PTR("task.itk_resume") itk_resume; /* a receive right to resume this task */
+ struct ipc_port * XNU_PTRAUTH_SIGNED_PTR("task.itk_host") itk_host; /* a send right */
+ struct ipc_port * XNU_PTRAUTH_SIGNED_PTR("task.itk_bootstrap") itk_bootstrap; /* a send right */
+ struct ipc_port * XNU_PTRAUTH_SIGNED_PTR("task.itk_seatbelt") itk_seatbelt; /* a send right */
+ struct ipc_port * XNU_PTRAUTH_SIGNED_PTR("task.itk_gssd") itk_gssd; /* yet another send right */
+ struct ipc_port * XNU_PTRAUTH_SIGNED_PTR("task.itk_debug_control") itk_debug_control; /* send right for debugmode communications */
+ struct ipc_port * XNU_PTRAUTH_SIGNED_PTR("task.itk_task_access") itk_task_access; /* and another send right */
+ struct ipc_port * XNU_PTRAUTH_SIGNED_PTR("task.itk_resume") itk_resume; /* a receive right to resume this task */
struct ipc_port * XNU_PTRAUTH_SIGNED_PTR("task.itk_registered") itk_registered[TASK_PORT_REGISTER_MAX];
/* all send rights */
+ ipc_port_t * XNU_PTRAUTH_SIGNED_PTR("task.itk_dyld_notify") itk_dyld_notify; /* lazy send rights array of size DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT */
struct ipc_space * XNU_PTRAUTH_SIGNED_PTR("task.itk_space") itk_space;
MACHINE_TASK
- integer_t faults; /* faults counter */
+ counter_t faults; /* faults counter */
integer_t decompressions; /* decompression counter */
integer_t pageins; /* pageins counter */
integer_t cow_faults; /* copy on write fault counter */
#if CONFIG_PHYS_WRITE_ACCT
uint64_t task_fs_metadata_writes;
#endif /* CONFIG_PHYS_WRITE_ACCT */
+ uint32_t task_shared_region_slide; /* cached here to avoid locking during telemetry */
+ uuid_t task_shared_region_uuid;
};
/*
extern boolean_t
task_has_watchports(task_t task);
+void
+task_dyld_process_info_update_helper(
+ task_t task,
+ size_t active_count,
+ vm_map_address_t magic_addr,
+ ipc_port_t *release_ports,
+ size_t release_count);
+
#else /* MACH_KERNEL_PRIVATE */
__BEGIN_DECLS
/* Convert from a task to a port */
extern ipc_port_t convert_task_to_port(task_t);
+extern ipc_port_t convert_task_to_port_pinned(task_t);
extern ipc_port_t convert_task_name_to_port(task_name_t);
extern ipc_port_t convert_task_inspect_to_port(task_inspect_t);
extern ipc_port_t convert_task_read_to_port(task_read_t);
--- /dev/null
+/*
+ * Copyright (c) 2020 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <os/refcnt.h>
+#include <kern/ipc_kobject.h>
+#include <mach/mach_types.h>
+#include <mach/task.h>
+#include <mach/notify.h>
+#include <mach/kern_return.h>
+#include <security/mac_mach_internal.h>
+#include <kern/task_ident.h>
+
+struct proc_ident {
+ uint64_t p_uniqueid;
+ pid_t p_pid;
+ int p_idversion;
+};
+
+extern void* proc_find_ident(struct proc_ident const *i);
+extern int proc_rele(void* p);
+extern task_t proc_task(void* p);
+extern struct proc_ident proc_ident(void* p);
+extern kern_return_t task_conversion_eval(task_t caller, task_t victim);
+
+struct task_id_token {
+ struct proc_ident ident;
+ ipc_port_t port;
+ os_refcnt_t tidt_refs;
+};
+
+static ZONE_DECLARE(task_id_token_zone, "task_id_token",
+ sizeof(struct task_id_token), ZC_ZFREE_CLEARMEM);
+
+static void
+tidt_reference(task_id_token_t token)
+{
+ if (token == TASK_ID_TOKEN_NULL) {
+ return;
+ }
+ os_ref_retain(&token->tidt_refs);
+}
+
+static void
+tidt_release(task_id_token_t token)
+{
+ ipc_port_t port;
+
+ if (token == TASK_ID_TOKEN_NULL) {
+ return;
+ }
+
+ if (os_ref_release(&token->tidt_refs) > 0) {
+ return;
+ }
+
+ /* last ref */
+ port = token->port;
+
+ require_ip_active(port);
+ assert(!port->ip_srights);
+ ipc_port_dealloc_kernel(port);
+
+ zfree(task_id_token_zone, token);
+}
+
+void
+task_id_token_release(task_id_token_t token)
+{
+ tidt_release(token);
+}
+
+void
+task_id_token_notify(mach_msg_header_t *msg)
+{
+ assert(msg->msgh_id == MACH_NOTIFY_NO_SENDERS);
+
+ mach_no_senders_notification_t *not = (mach_no_senders_notification_t *)msg;
+ ipc_port_t port = not->not_header.msgh_remote_port;
+ task_id_token_t token = ip_get_kobject(port);
+
+ require_ip_active(port);
+ assert(IKOT_TASK_ID_TOKEN == ip_kotype(port));
+ assert(port->ip_srights == 0);
+
+ tidt_release(token); /* consumes ref given by notification */
+}
+
+kern_return_t
+task_create_identity_token(
+ task_t task,
+ task_id_token_t *tokenp)
+{
+ task_id_token_t token;
+
+ if (task == TASK_NULL || task == kernel_task) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ token = zalloc_flags(task_id_token_zone, Z_ZERO | Z_WAITOK | Z_NOFAIL);
+
+ task_lock(task);
+ if (task->bsd_info) {
+ token->port = IP_NULL;
+ token->ident = proc_ident(task->bsd_info);
+ /* this reference will be donated to no-senders notification */
+ os_ref_init_count(&token->tidt_refs, NULL, 1);
+ } else {
+ task_unlock(task);
+ zfree(task_id_token_zone, token);
+ return KERN_INVALID_ARGUMENT;
+ }
+ task_unlock(task);
+
+ *tokenp = token;
+
+ return KERN_SUCCESS;
+}
+
+kern_return_t
+task_identity_token_get_task_port(
+ task_id_token_t token,
+ task_flavor_t flavor,
+ ipc_port_t *portp)
+{
+ int which;
+ task_t task;
+ kern_return_t kr;
+
+ if (token == TASK_ID_TOKEN_NULL) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ switch (flavor) {
+ case TASK_FLAVOR_NAME:
+ which = TASK_NAME_PORT;
+ break;
+ case TASK_FLAVOR_INSPECT:
+ which = TASK_INSPECT_PORT;
+ break;
+ case TASK_FLAVOR_READ:
+ which = TASK_READ_PORT;
+ break;
+ case TASK_FLAVOR_CONTROL:
+ which = TASK_KERNEL_PORT;
+ break;
+ default:
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ void* p = proc_find_ident(&token->ident);
+ if (p == NULL) {
+ return KERN_INVALID_ARGUMENT;
+ }
+ task = proc_task(p);
+ task_reference(task);
+ proc_rele(p);
+
+ if (task == TASK_NULL) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ if (flavor == TASK_FLAVOR_CONTROL && task == current_task()) {
+ *portp = convert_task_to_port_pinned(task); /* consumes task ref */
+ return KERN_SUCCESS;
+ }
+ if (flavor <= TASK_FLAVOR_INSPECT && task_conversion_eval(current_task(), task)) {
+ task_deallocate(task);
+ return KERN_INVALID_ARGUMENT;
+ }
+
+#if CONFIG_MACF
+ if (task != current_task()) {
+ if (mac_task_check_task_id_token_get_task(task, flavor)) {
+ task_deallocate(task);
+ return KERN_DENIED;
+ }
+ }
+#endif
+
+ kr = task_get_special_port(task, which, portp);
+ task_deallocate(task);
+ return kr;
+}
+
+/* Produces token ref */
+task_id_token_t
+convert_port_to_task_id_token(
+ ipc_port_t port)
+{
+ task_id_token_t token = TASK_ID_TOKEN_NULL;
+
+ if (IP_VALID(port)) {
+ ip_lock(port);
+ if (ip_active(port)) {
+ if (ip_kotype(port) == IKOT_TASK_ID_TOKEN) {
+ token = (task_id_token_t)ip_get_kobject(port);
+
+ zone_require(task_id_token_zone, token);
+ tidt_reference(token);
+ }
+ }
+ ip_unlock(port);
+ }
+ return token;
+}
+
+/* Consumes token ref */
+ipc_port_t
+convert_task_id_token_to_port(
+ task_id_token_t token)
+{
+ boolean_t kr;
+
+ if (token == TASK_ID_TOKEN_NULL) {
+ return IP_NULL;
+ }
+
+ zone_require(task_id_token_zone, token);
+
+ kr = ipc_kobject_make_send_lazy_alloc_port(&token->port,
+ (ipc_kobject_t) token, IKOT_TASK_ID_TOKEN, IPC_KOBJECT_ALLOC_NONE, false, 0);
+ assert(kr == TRUE); /* no-senders notification is armed, consumes token ref */
+
+ return token->port;
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ *
+ * A task identity token represents the identity of a mach task without carrying task
+ * access capabilities. In applicable scenarios, task identity token can be moved between
+ * tasks and be upgraded to desired level of task port flavor (namely, task name port,
+ * inspect port, read port or control port) upon use.
+ *
+ */
+
+#ifndef _KERN_TASK_IDENT_H
+#define _KERN_TASK_IDENT_H
+
+#if XNU_KERNEL_PRIVATE
+
+#include <kern/kern_types.h>
+#include <mach/mach_types.h>
+
+void task_id_token_notify(mach_msg_header_t *msg);
+void task_id_token_release(task_id_token_t token);
+
+ipc_port_t convert_task_id_token_to_port(task_id_token_t token);
+
+task_id_token_t convert_port_to_task_id_token(ipc_port_t port);
+
+#endif /* XNU_KERNEL_PRIVATE */
+
+#endif /* _KERN_TASK_IDENT_H */
queue_head_t queue;
task_watch_t *twp;
- queue_init(&queue);
-
task_watch_lock();
- movqueue(&queue, &task->task_watchers);
+ queue_new_head(&task->task_watchers, &queue, task_watch_t *, tw_links);
+ queue_init(&task->task_watchers);
queue_iterate(&queue, twp, task_watch_t *, tw_links) {
/*
task->num_taskwatchers = 0;
task_watch_unlock();
- while ((twp = qe_dequeue_head(&task->task_watchers, task_watch_t, tw_links)) != NULL) {
+ while (!queue_empty(&queue)) {
+ queue_remove_first(&queue, twp, task_watch_t *, tw_links);
/* remove thread and network bg */
set_thread_appbg(twp->tw_thread, 0, twp->tw_importance);
thread_deallocate(twp->tw_thread);
+++ /dev/null
-/*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- *
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- *
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * File: kern/task_swap.c
- *
- * Task residency management primitives implementation.
- */
-#include <mach_assert.h>
-#include <task_swapper.h>
-
-#include <kern/spl.h>
-#include <kern/queue.h>
-#include <kern/host.h>
-#include <kern/task.h>
-#include <kern/task_swap.h>
-#include <kern/thread.h>
-#include <kern/host_statistics.h>
-#include <kern/misc_protos.h>
-#include <kern/assert.h>
-#include <mach/policy.h>
-
-#include <ipc/ipc_port.h> /* We use something from in here */
-
-/*
- * task_swappable: [exported]
- *
- * Make a task swappable or non-swappable. If made non-swappable,
- * it will be swapped in.
- */
-kern_return_t
-task_swappable(
- host_priv_t host_priv,
- task_t task,
- __unused boolean_t make_swappable)
-{
- if (host_priv == HOST_PRIV_NULL) {
- return KERN_INVALID_ARGUMENT;
- }
-
- if (task == TASK_NULL) {
- return KERN_INVALID_ARGUMENT;
- }
-
- /*
- * We don't support swapping, this call is purely advisory.
- */
- return KERN_SUCCESS;
-}
+++ /dev/null
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- *
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- *
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * HISTORY
- *
- * Revision 1.1.1.1 1998/09/22 21:05:32 wsanchez
- * Import of Mac OS X kernel (~semeria)
- *
- * Revision 1.1.1.1 1998/03/07 02:25:56 wsanchez
- * Import of OSF Mach kernel (~mburg)
- *
- * Revision 1.1.4.1 1995/04/07 19:02:38 barbou
- * Merged into mainline.
- * [95/03/09 barbou]
- *
- * Revision 1.1.2.2 1995/02/13 15:35:45 barbou
- * Merged/ported to MK6.
- *
- * Revision 1.1.1.3 94/08/12 15:44:39 barbou
- * VM Merge - Task Swapper.
- *
- * Changed host_priv_t into host_t.
- * [94/07/28 barbou]
- *
- * Revision 1.1.1.2 1994/07/28 15:33:46 barbou
- * Copied from IK.
- *
- * Revision 3.0.3.2 1994/01/20 19:53:01 chasb
- * Remove excessively restrictive copyright notice
- * [1994/01/20 17:50:40 chasb]
- *
- * Revision 3.0.3.1 1993/12/20 21:06:49 gupta
- * Expanded C O P Y R I G H T
- * [1993/12/17 22:19:22 gupta]
- *
- * Revision 3.0 1992/12/31 22:08:24 ede
- * Initial revision for OSF/1 R1.3
- *
- * Revision 1.1.4.5 1992/03/16 18:02:52 gmf
- * Add TASK_SW_ELIGIBLE flag to swap_flags; prototype
- * task_swapout_eligible, task_swapout_ineligible.
- * [1992/02/12 22:01:48 gmf]
- *
- * Revision 1.1.4.4 1992/01/22 22:14:13 gmf
- * Change prototype for task_swappable() to use host_priv_t
- * instead of host_t.
- * [1992/01/17 17:48:13 gmf]
- *
- * Revision 1.1.4.3 1991/12/10 17:20:55 gmf
- * Add extern declaration for new thread.
- * Changed TASK_SW_WAIT flag to TASK_SW_WANT_IN.
- * [1991/12/10 16:19:10 gmf]
- *
- * Revision 1.1.4.2 1991/11/21 21:48:35 mmp
- * initial task swapping code
- * [1991/11/21 21:01:37 mmp]
- *
- * $EndLog$
- */
-
-/*
- * File: kern/task_swap.h
- *
- * Task residency management primitives declarations.
- */
-
-#ifndef _KERN_TASK_SWAP_H_
-#define _KERN_TASK_SWAP_H_
-
-#include <kern/host.h>
-
-/*
- * swap states
- */
-#define TASK_SW_UNSWAPPABLE 1 /* not swappable */
-#define TASK_SW_IN 2 /* swapped in (resident) */
-#define TASK_SW_OUT 3 /* swapped out (non-resident) */
-#define TASK_SW_COMING_IN 4 /* about to be swapped in */
-#define TASK_SW_GOING_OUT 5 /* being swapped out */
-
-/*
- * swap flags
- */
-#define TASK_SW_MAKE_UNSWAPPABLE 0x01 /* make it unswappable */
-#define TASK_SW_WANT_IN 0x02 /* sleeping on state */
-#define TASK_SW_ELIGIBLE 0x04 /* eligible for swapping */
-
-/*
- * exported routines
- */
-extern void task_swapper_init(void);
-extern kern_return_t task_swapin(
- task_t, /* task */
- boolean_t); /* make_unswappable */
-extern kern_return_t task_swapout(task_t /* task */);
-extern void task_swapper(void);
-extern void task_swap_swapout_thread(void);
-extern void compute_vm_averages(void);
-extern kern_return_t task_swappable(
- host_priv_t, /* host */
- task_t, /* task */
- boolean_t); /* swappable */
-extern void task_swapout_eligible(task_t /* task */);
-extern void task_swapout_ineligible(task_t /* task */);
-extern void swapout_ast(void);
-
-#endif /* _KERN_TASK_SWAP_H_ */
}
bool user64_va = task_has_64Bit_addr(task);
- /*
- * Find the actual [slid] address of the shared cache's UUID, and copy it in from userland.
- */
- int shared_cache_uuid_valid = 0;
- uint64_t shared_cache_base_address = 0;
- struct _dyld_cache_header shared_cache_header = {};
- uint64_t shared_cache_slide = 0;
-
- /*
- * Don't copy in the entire shared cache header; we only need the UUID. Calculate the
- * offset of that one field.
- */
- int sc_header_uuid_offset = (char *)&shared_cache_header.uuid - (char *)&shared_cache_header;
- vm_shared_region_t sr = vm_shared_region_get(task);
- if (sr != NULL) {
- if ((vm_shared_region_start_address(sr, &shared_cache_base_address) == KERN_SUCCESS) &&
- (copyin(shared_cache_base_address + sc_header_uuid_offset, (char *)&shared_cache_header.uuid,
- sizeof(shared_cache_header.uuid)) == 0)) {
- shared_cache_uuid_valid = 1;
- shared_cache_slide = sr->sr_slide;
- }
- // vm_shared_region_get() gave us a reference on the shared region.
- vm_shared_region_deallocate(sr);
- }
-
/*
* Retrieve the array of UUID's for binaries used by this task.
* We reach down into DYLD's data structures to find the array.
tsnap->system_time_in_terminated_threads = task->total_system_time;
tsnap->suspend_count = task->suspend_count;
tsnap->task_size = (typeof(tsnap->task_size))(get_task_phys_footprint(task) / PAGE_SIZE);
- tsnap->faults = task->faults;
+ tsnap->faults = counter_load(&task->faults);
tsnap->pageins = task->pageins;
tsnap->cow_faults = task->cow_faults;
/*
tsnap->ss_flags |= kUser64_p;
}
- if (shared_cache_uuid_valid) {
- tsnap->shared_cache_slide = shared_cache_slide;
- bcopy(shared_cache_header.uuid, tsnap->shared_cache_identifier, sizeof(shared_cache_header.uuid));
+
+ if (task->task_shared_region_slide != -1) {
+ tsnap->shared_cache_slide = task->task_shared_region_slide;
+ bcopy(task->task_shared_region_uuid, tsnap->shared_cache_identifier,
+ sizeof(task->task_shared_region_uuid));
}
current_buffer->current_position += sizeof(struct task_snapshot);
#include <kern/kern_types.h>
#include <kern/kalloc.h>
#include <kern/cpu_data.h>
-#include <kern/counters.h>
#include <kern/extmod_statistics.h>
#include <kern/ipc_mig.h>
#include <kern/ipc_tt.h>
{
thread_t thread = current_thread();
- thread_terminate_internal(thread);
+ thread_terminate_internal(thread, TH_TERMINATE_OPTION_NONE);
/*
* Handle the thread termination directly
thread->thread_magic = 0;
#endif /* MACH_ASSERT */
+ lck_mtx_lock(&tasks_threads_lock);
+ assert(terminated_threads_count > 0);
+ queue_remove(&terminated_threads, thread, thread_t, threads);
+ terminated_threads_count--;
+ lck_mtx_unlock(&tasks_threads_lock);
+
zfree(thread_zone, thread);
}
lck_mtx_lock(&tasks_threads_lock);
queue_remove(&threads, thread, thread_t, threads);
threads_count--;
+ queue_enter(&terminated_threads, thread, thread_t, threads);
+ terminated_threads_count++;
lck_mtx_unlock(&tasks_threads_lock);
thread_deallocate(thread);
}
}
-#define TH_OPTION_NONE 0x00
-#define TH_OPTION_NOCRED 0x01
-#define TH_OPTION_NOSUSP 0x02
-#define TH_OPTION_WORKQ 0x04
+__options_decl(thread_create_internal_options_t, uint32_t, {
+ TH_OPTION_NONE = 0x00,
+ TH_OPTION_NOCRED = 0x01,
+ TH_OPTION_NOSUSP = 0x02,
+ TH_OPTION_WORKQ = 0x04,
+ TH_OPTION_IMMOVABLE = 0x08,
+ TH_OPTION_PINNED = 0x10,
+});
/*
* Create a new thread.
thread_create_internal(
task_t parent_task,
integer_t priority,
- thread_continue_t continuation,
+ thread_continue_t continuation,
void *parameter,
- int options,
+ thread_create_internal_options_t options,
thread_t *out_thread)
{
thread_t new_thread;
- static thread_t first_thread;
+ static thread_t first_thread;
+ ipc_thread_init_options_t init_options = IPC_THREAD_INIT_NONE;
/*
* Allocate a thread and initialize static fields
init_thread_from_template(new_thread);
}
+ if (options & TH_OPTION_PINNED) {
+ init_options |= IPC_THREAD_INIT_PINNED;
+ }
+
+ if (options & TH_OPTION_IMMOVABLE) {
+ init_options |= IPC_THREAD_INIT_IMMOVABLE;
+ }
+
os_ref_init_count(&new_thread->ref_count, &thread_refgrp, 2);
#if DEBUG || DEVELOPMENT
queue_init(&new_thread->t_temp_alloc_list);
lck_mtx_init(&new_thread->mutex, &thread_lck_grp, LCK_ATTR_NULL);
- ipc_thread_init(new_thread);
+ ipc_thread_init(new_thread, init_options);
new_thread->continuation = continuation;
new_thread->parameter = parameter;
}
static kern_return_t
-thread_create_internal2(
- task_t task,
- thread_t *new_thread,
- boolean_t from_user,
- thread_continue_t continuation)
+thread_create_with_options_internal(
+ task_t task,
+ thread_t *new_thread,
+ boolean_t from_user,
+ thread_create_internal_options_t options,
+ thread_continue_t continuation)
{
kern_return_t result;
- thread_t thread;
+ thread_t thread;
if (task == TASK_NULL || task == kernel_task) {
return KERN_INVALID_ARGUMENT;
}
#endif
- result = thread_create_internal(task, -1, continuation, NULL, TH_OPTION_NONE, &thread);
+ result = thread_create_internal(task, -1, continuation, NULL, options, &thread);
if (result != KERN_SUCCESS) {
return result;
}
task_t task,
thread_t *new_thread)
{
- return thread_create_internal2(task, new_thread, FALSE, (thread_continue_t)thread_bootstrap_return);
+ return thread_create_with_options_internal(task, new_thread, FALSE, TH_OPTION_NONE,
+ (thread_continue_t)thread_bootstrap_return);
+}
+
+/*
+ * Create a thread that has its itk_self pinned
+ * Deprecated, should be cleanup once rdar://70892168 lands
+ */
+kern_return_t
+thread_create_pinned(
+ task_t task,
+ thread_t *new_thread)
+{
+ return thread_create_with_options_internal(task, new_thread, FALSE,
+ TH_OPTION_PINNED | TH_OPTION_IMMOVABLE, (thread_continue_t)thread_bootstrap_return);
+}
+
+kern_return_t
+thread_create_immovable(
+ task_t task,
+ thread_t *new_thread)
+{
+ return thread_create_with_options_internal(task, new_thread, FALSE,
+ TH_OPTION_IMMOVABLE, (thread_continue_t)thread_bootstrap_return);
}
kern_return_t
task_t task,
thread_t *new_thread)
{
- return thread_create_internal2(task, new_thread, TRUE, (thread_continue_t)thread_bootstrap_return);
+ return thread_create_with_options_internal(task, new_thread, TRUE, TH_OPTION_NONE,
+ (thread_continue_t)thread_bootstrap_return);
}
kern_return_t
thread_t *new_thread,
thread_continue_t continuation)
{
- return thread_create_internal2(task, new_thread, FALSE, continuation);
+ return thread_create_with_options_internal(task, new_thread, FALSE, TH_OPTION_NONE, continuation);
}
/*
kern_return_t
thread_create_waiting(
- task_t task,
- thread_continue_t continuation,
- event_t event,
- thread_t *new_thread)
+ task_t task,
+ thread_continue_t continuation,
+ event_t event,
+ th_create_waiting_options_t options,
+ thread_t *new_thread)
{
+ thread_create_internal_options_t ci_options = TH_OPTION_NONE;
+
+ assert((options & ~TH_CREATE_WAITING_OPTION_MASK) == 0);
+ if (options & TH_CREATE_WAITING_OPTION_PINNED) {
+ ci_options |= TH_OPTION_PINNED;
+ }
+ if (options & TH_CREATE_WAITING_OPTION_IMMOVABLE) {
+ ci_options |= TH_OPTION_IMMOVABLE;
+ }
+
return thread_create_waiting_internal(task, continuation, event,
- kThreadWaitNone, TH_OPTION_NONE, new_thread);
+ kThreadWaitNone, ci_options, new_thread);
}
thread_continue_t continuation,
thread_t *new_thread)
{
- int options = TH_OPTION_NOCRED | TH_OPTION_NOSUSP | TH_OPTION_WORKQ;
+ /*
+ * Create thread, but don't pin control port just yet, in case someone calls
+ * task_threads() and deallocates pinned port before kernel copyout happens,
+ * which will result in pinned port guard exception. Instead, pin and make
+ * it immovable atomically at copyout during workq_setup_and_run().
+ */
+ int options = TH_OPTION_NOCRED | TH_OPTION_NOSUSP | TH_OPTION_WORKQ | TH_OPTION_IMMOVABLE;
return thread_create_waiting_internal(task, continuation, NULL,
kThreadWaitParkedWorkQueue, options, new_thread);
}
return KERN_INVALID_ARGUMENT;
}
- assert(host_priv == &realhost);
-
if (prev_state) {
*prev_state = (thread->options & TH_OPT_VMPRIV) != 0;
}
ip_unlock(port);
return;
}
- thread = (thread_t)port->ip_kobject;
+ thread = (thread_t)ipc_kobject_get(port);
kotype = ip_kotype(port);
if (thread != THREAD_NULL) {
assert((IKOT_THREAD_READ == kotype) || (IKOT_THREAD_INSPECT == kotype));
return;
}
+ if (kotype == IKOT_THREAD_READ) {
+ flavor = THREAD_FLAVOR_READ;
+ } else {
+ flavor = THREAD_FLAVOR_INSPECT;
+ }
+
thread_mtx_lock(thread);
ip_lock(port);
- require_ip_active(port);
/*
+ * If the port is no longer active, then ipc_thread_terminate() ran
+ * and destroyed the kobject already. Just deallocate the task
+ * ref we took and go away.
+ *
+ * It is also possible that several nsrequests are in flight,
+ * only one shall NULL-out the port entry, and this is the one
+ * that gets to dealloc the port.
+ *
* Check for a stale no-senders notification. A call to any function
* that vends out send rights to this port could resurrect it between
* this notification being generated and actually being handled here.
*/
- if (port->ip_srights > 0) {
+ if (!ip_active(port) ||
+ thread->ith_thread_ports[flavor] != port ||
+ port->ip_srights > 0) {
ip_unlock(port);
thread_mtx_unlock(thread);
thread_deallocate(thread);
return;
}
- if (kotype == IKOT_THREAD_READ) {
- flavor = THREAD_FLAVOR_READ;
- } else {
- flavor = THREAD_FLAVOR_INSPECT;
- }
- assert(thread->ith_self[flavor] == port);
- thread->ith_self[flavor] = IP_NULL;
- port->ip_kobject = IKOT_NONE;
+
+ assert(thread->ith_thread_ports[flavor] == port);
+ thread->ith_thread_ports[flavor] = IP_NULL;
+ ipc_kobject_set_atomically(port, IKO_NULL, IKOT_NONE);
ip_unlock(port);
thread_mtx_unlock(thread);
thread_deallocate(thread);
vm_offset_t kernel_stack; /* current kernel stack */
vm_offset_t reserved_stack; /* reserved kernel stack */
+ /*** Machine-dependent state ***/
+ struct machine_thread machine;
+
#if KASAN
struct kasan_thread_data kasan_data;
#endif
/* Miscellaneous bits guarded by mutex */
uint32_t
- active:1, /* Thread is active and has not been terminated */
- started:1, /* Thread has been started after creation */
- static_param:1, /* Disallow policy parameter changes */
- inspection:1, /* TRUE when task is being inspected by crash reporter */
- policy_reset:1, /* Disallow policy parameter changes on terminating threads */
- suspend_parked:1, /* thread parked in thread_suspended */
- corpse_dup:1, /* TRUE when thread is an inactive duplicate in a corpse */
+ active:1, /* Thread is active and has not been terminated */
+ ipc_active:1, /* IPC with the thread ports is allowed */
+ started:1, /* Thread has been started after creation */
+ static_param:1, /* Disallow policy parameter changes */
+ inspection:1, /* TRUE when task is being inspected by crash reporter */
+ policy_reset:1, /* Disallow policy parameter changes on terminating threads */
+ suspend_parked:1, /* thread parked in thread_suspended */
+ corpse_dup:1, /* TRUE when thread is an inactive duplicate in a corpse */
:0;
decl_lck_mtx_data(, mutex);
* Different flavors of thread port.
* These flavors THREAD_FLAVOR_* are defined in mach_types.h
*/
- struct ipc_port *ith_self[THREAD_SELF_PORT_COUNT]; /* does not hold right */
+ struct ipc_port *ith_thread_ports[THREAD_SELF_PORT_COUNT]; /* does not hold right */
struct ipc_port *ith_settable_self; /* a send right */
+ struct ipc_port *ith_self; /* immovable/pinned thread port */
struct ipc_port *ith_special_reply_port; /* ref to special reply port */
struct exception_action *exc_actions;
void *hv_thread_target;
#endif /* HYPERVISOR */
- /*** Machine-dependent state ***/
- struct machine_thread machine;
-
/* Statistics accumulated per-thread and aggregated per-task */
uint32_t syscalls_unix;
uint32_t syscalls_mach;
#if SCHED_TRACE_THREAD_WAKEUPS
uintptr_t thread_wakeup_bt[64];
#endif
- turnstile_update_flags_t inheritor_flags; /* inheritor flags for inheritor field */
- block_hint_t pending_block_hint;
- block_hint_t block_hint; /* What type of primitive last caused us to block. */
- integer_t decompressions; /* Per-thread decompressions counter to be added to per-task decompressions counter */
- int thread_region_page_shift; /* Page shift that this thread would like to use when */
- /* introspecting a task. This is currently being used */
- /* by footprint which uses a thread for each task being inspected. */
+ turnstile_update_flags_t inheritor_flags; /* inheritor flags for inheritor field */
+ block_hint_t pending_block_hint;
+ block_hint_t block_hint; /* What type of primitive last caused us to block. */
+ integer_t decompressions; /* Per-thread decompressions counter to be added to per-task decompressions counter */
+ int thread_region_page_shift; /* Page shift that this thread would like to use when */
+ /* introspecting a task. This is currently being used */
+ /* by footprint which uses a thread for each task being inspected. */
};
#define ith_state saved.receive.state
extern void thread_terminate_self(void);
+__options_decl(thread_terminate_options_t, uint32_t, {
+ TH_TERMINATE_OPTION_NONE,
+ TH_TERMINATE_OPTION_UNPIN
+});
+
extern kern_return_t thread_terminate_internal(
- thread_t thread);
+ thread_t thread,
+ thread_terminate_options_t options);
extern void thread_start(
thread_t thread) __attribute__ ((noinline));
thread_t *new_thread,
thread_continue_t continuation);
-extern kern_return_t thread_create_waiting(task_t task,
- thread_continue_t continuation,
- event_t event,
- thread_t *new_thread);
+/* thread_create_waiting options */
+__options_decl(th_create_waiting_options_t, uint32_t, {
+ TH_CREATE_WAITING_OPTION_PINNED = 0x10,
+ TH_CREATE_WAITING_OPTION_IMMOVABLE = 0x20,
+});
+#define TH_CREATE_WAITING_OPTION_MASK 0x30
+
+extern kern_return_t thread_create_waiting(task_t task,
+ thread_continue_t continuation,
+ event_t event,
+ th_create_waiting_options_t options,
+ thread_t *new_thread);
extern kern_return_t thread_create_workq_waiting(
task_t task,
void thread_set_honor_qlimit(thread_t thread);
void thread_clear_honor_qlimit(thread_t thread);
extern ipc_port_t convert_thread_to_port(thread_t);
+extern ipc_port_t convert_thread_to_port_pinned(thread_t);
extern ipc_port_t convert_thread_inspect_to_port(thread_inspect_t);
extern ipc_port_t convert_thread_read_to_port(thread_read_t);
extern boolean_t is_vm_privileged(void);
extern void thread_port_with_flavor_notify(mach_msg_header_t *msg);
extern int thread_self_region_page_shift(void);
extern void thread_self_region_page_shift_set(int pgshift);
+extern kern_return_t thread_create_pinned(task_t task, thread_t *new_thread);
+extern kern_return_t thread_create_immovable(task_t task, thread_t *new_thread);
+extern kern_return_t thread_terminate_pinned(thread_t thread);
#endif /* KERNEL_PRIVATE */
__END_DECLS
*/
kern_return_t
thread_terminate_internal(
- thread_t thread)
+ thread_t thread,
+ thread_terminate_options_t options)
{
kern_return_t result = KERN_SUCCESS;
+ boolean_t test_pin_bit = false;
thread_mtx_lock(thread);
} else {
thread_start(thread);
}
+ /* This bit can be reliably tested only if the thread is still active */
+ test_pin_bit = (options == TH_TERMINATE_OPTION_UNPIN) ? true : false;
} else {
result = KERN_TERMINATED;
}
thread_affinity_terminate(thread);
}
+ /*
+ * <rdar://problem/53562036> thread_terminate shouldn't be allowed on pthread
+ * Until thread_terminate is disallowed for pthreads, always unpin the pinned port
+ * when the thread is being terminated.
+ */
+ ipc_thread_port_unpin(thread->ith_self, test_pin_bit);
+
thread_mtx_unlock(thread);
if (thread != current_thread() && result == KERN_SUCCESS) {
return KERN_FAILURE;
}
- kern_return_t result = thread_terminate_internal(thread);
+ kern_return_t result = thread_terminate_internal(thread, TH_TERMINATE_OPTION_NONE);
/*
* If a kernel thread is terminating itself, force handle the APC_AST here.
return result;
}
+kern_return_t
+thread_terminate_pinned(
+ thread_t thread)
+{
+ if (thread == THREAD_NULL) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ assert(thread->task != kernel_task);
+
+ kern_return_t result = thread_terminate_internal(thread, TH_TERMINATE_OPTION_UNPIN);
+ return result;
+}
+
/*
* Suspend execution of the specified thread.
* This is a recursive-style suspension of the thread, a count of
static ZONE_DECLARE(thread_call_zone, "thread_call",
sizeof(thread_call_data_t), ZC_NOENCRYPT);
-static struct waitq daemon_waitq;
-
typedef enum {
TCF_ABSOLUTE = 0,
TCF_CONTINUOUS = 1,
uint32_t target_thread_count;
thread_call_group_flags_t tcg_flags;
+
+ struct waitq waiters_waitq;
} thread_call_groups[THREAD_CALL_INDEX_MAX] = {
[THREAD_CALL_INDEX_HIGH] = {
.tcg_name = "high",
timer_call_setup(&group->dealloc_timer, thread_call_dealloc_timer, group);
+ waitq_init(&group->waiters_waitq, SYNC_POLICY_DISABLE_IRQ);
+
/* Reverse the wait order so we re-use the most recently parked thread from the pool */
waitq_init(&group->idle_waitq, SYNC_POLICY_REVERSED | SYNC_POLICY_DISABLE_IRQ);
}
}
void
-thread_call_setup(
+thread_call_setup_with_options(
thread_call_t call,
thread_call_func_t func,
- thread_call_param_t param0)
+ thread_call_param_t param0,
+ thread_call_priority_t pri,
+ thread_call_options_t options)
{
bzero(call, sizeof(*call));
*call = (struct thread_call) {
.tc_func = func,
.tc_param0 = param0,
-
- /*
- * Thread calls default to the HIGH group
- * unless otherwise specified.
- */
- .tc_index = THREAD_CALL_INDEX_HIGH,
};
+
+ switch (pri) {
+ case THREAD_CALL_PRIORITY_HIGH:
+ call->tc_index = THREAD_CALL_INDEX_HIGH;
+ break;
+ case THREAD_CALL_PRIORITY_KERNEL:
+ call->tc_index = THREAD_CALL_INDEX_KERNEL;
+ break;
+ case THREAD_CALL_PRIORITY_USER:
+ call->tc_index = THREAD_CALL_INDEX_USER;
+ break;
+ case THREAD_CALL_PRIORITY_LOW:
+ call->tc_index = THREAD_CALL_INDEX_LOW;
+ break;
+ case THREAD_CALL_PRIORITY_KERNEL_HIGH:
+ call->tc_index = THREAD_CALL_INDEX_KERNEL_HIGH;
+ break;
+ default:
+ panic("Invalid thread call pri value: %d", pri);
+ break;
+ }
+
+ if (options & THREAD_CALL_OPTIONS_ONCE) {
+ call->tc_flags |= THREAD_CALL_ONCE;
+ }
+ if (options & THREAD_CALL_OPTIONS_SIGNAL) {
+ call->tc_flags |= THREAD_CALL_SIGNAL | THREAD_CALL_ONCE;
+ }
+}
+
+void
+thread_call_setup(
+ thread_call_t call,
+ thread_call_func_t func,
+ thread_call_param_t param0)
+{
+ thread_call_setup_with_options(call, func, param0,
+ THREAD_CALL_PRIORITY_HIGH, 0);
}
static void
thread_call_internal_queue_count--;
thread_call_setup(call, func, param0);
- call->tc_refs = 0;
- call->tc_flags = 0; /* THREAD_CALL_ALLOC not set, do not free back to zone */
+ /* THREAD_CALL_ALLOC not set, do not free back to zone */
+ assert((call->tc_flags & THREAD_CALL_ALLOC) == 0);
enable_ints_and_unlock(group, s);
return call;
thread_call_priority_t pri,
thread_call_options_t options)
{
- thread_call_t call = thread_call_allocate(func, param0);
-
- switch (pri) {
- case THREAD_CALL_PRIORITY_HIGH:
- call->tc_index = THREAD_CALL_INDEX_HIGH;
- break;
- case THREAD_CALL_PRIORITY_KERNEL:
- call->tc_index = THREAD_CALL_INDEX_KERNEL;
- break;
- case THREAD_CALL_PRIORITY_USER:
- call->tc_index = THREAD_CALL_INDEX_USER;
- break;
- case THREAD_CALL_PRIORITY_LOW:
- call->tc_index = THREAD_CALL_INDEX_LOW;
- break;
- case THREAD_CALL_PRIORITY_KERNEL_HIGH:
- call->tc_index = THREAD_CALL_INDEX_KERNEL_HIGH;
- break;
- default:
- panic("Invalid thread call pri value: %d", pri);
- break;
- }
+ thread_call_t call = zalloc(thread_call_zone);
- if (options & THREAD_CALL_OPTIONS_ONCE) {
- call->tc_flags |= THREAD_CALL_ONCE;
- }
- if (options & THREAD_CALL_OPTIONS_SIGNAL) {
- call->tc_flags |= THREAD_CALL_SIGNAL | THREAD_CALL_ONCE;
- }
+ thread_call_setup_with_options(call, func, param0, pri, options);
+ call->tc_refs = 1;
+ call->tc_flags |= THREAD_CALL_ALLOC;
return call;
}
thread_call_func_t func,
thread_call_param_t param0)
{
- thread_call_t call = zalloc(thread_call_zone);
-
- thread_call_setup(call, func, param0);
- call->tc_refs = 1;
- call->tc_flags = THREAD_CALL_ALLOC;
-
- return call;
+ return thread_call_allocate_with_options(func, param0,
+ THREAD_CALL_PRIORITY_HIGH, 0);
}
/*
if (group->idle_count) {
__assert_only kern_return_t kr;
- kr = waitq_wakeup64_one(&group->idle_waitq, NO_EVENT64,
+ kr = waitq_wakeup64_one(&group->idle_waitq, CAST_EVENT64_T(group),
THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
assert(kr == KERN_SUCCESS);
if (thread_call_group_should_add_thread(group) &&
os_atomic_cmpxchg(&thread_call_daemon_awake,
false, true, relaxed)) {
- waitq_wakeup64_all(&daemon_waitq, NO_EVENT64,
+ waitq_wakeup64_all(&daemon_waitq, CAST_EVENT64_T(&thread_call_daemon_awake),
THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
}
}
bool repend = false;
bool signal = call->tc_flags & THREAD_CALL_SIGNAL;
+ bool alloc = call->tc_flags & THREAD_CALL_ALLOC;
call->tc_finish_count++;
- if (!signal) {
+ if (!signal && alloc) {
/* The thread call thread owns a ref until the call is finished */
if (call->tc_refs <= 0) {
panic("thread_call_finish: detected over-released thread call: %p", call);
thread_call_flags_t old_flags = call->tc_flags;
call->tc_flags &= ~(THREAD_CALL_RESCHEDULE | THREAD_CALL_RUNNING | THREAD_CALL_WAIT);
- if (call->tc_refs != 0 && (old_flags & THREAD_CALL_RESCHEDULE) != 0) {
+ if ((!alloc || call->tc_refs != 0) &&
+ (old_flags & THREAD_CALL_RESCHEDULE) != 0) {
assert(old_flags & THREAD_CALL_ONCE);
thread_call_flavor_t flavor = thread_call_get_flavor(call);
}
}
- if (!signal && (call->tc_refs == 0)) {
+ if (!signal && alloc && call->tc_refs == 0) {
if ((old_flags & THREAD_CALL_WAIT) != 0) {
panic("Someone waiting on a thread call that is scheduled for free: %p\n", call->tc_func);
}
if ((old_flags & THREAD_CALL_WAIT) != 0) {
/*
- * Dropping lock here because the sched call for the
- * high-pri group can take the big lock from under
- * a thread lock.
+ * This may wake up a thread with a registered sched_call.
+ * That call might need the group lock, so we drop the lock
+ * to avoid deadlocking.
+ *
+ * We also must use a separate waitq from the idle waitq, as
+ * this path goes waitq lock->thread lock->group lock, but
+ * the idle wait goes group lock->waitq_lock->thread_lock.
*/
thread_call_unlock(group);
- thread_wakeup((event_t)call);
+
+ waitq_wakeup64_all(&group->waiters_waitq, CAST_EVENT64_T(call),
+ THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
+
thread_call_lock_spin(group);
/* THREAD_CALL_SIGNAL call may have been freed */
}
*/
bool needs_finish = false;
if (call->tc_flags & THREAD_CALL_ALLOC) {
+ call->tc_refs++; /* Delay free until we're done */
+ }
+ if (call->tc_flags & (THREAD_CALL_ALLOC | THREAD_CALL_ONCE)) {
+ /*
+ * If THREAD_CALL_ONCE is used, and the timer wasn't
+ * THREAD_CALL_ALLOC, then clients swear they will use
+ * thread_call_cancel_wait() before destroying
+ * the thread call.
+ *
+ * Else, the storage for the thread call might have
+ * disappeared when thread_call_invoke() ran.
+ */
needs_finish = true;
call->tc_flags |= THREAD_CALL_RUNNING;
- call->tc_refs++; /* Delay free until we're done */
}
thc_state.thc_call = call;
s = disable_ints_and_lock(group);
if (needs_finish) {
- /* Release refcount, may free */
+ /* Release refcount, may free, may temporarily drop lock */
thread_call_finish(call, group, &s);
}
}
}
/* Wait for more work (or termination) */
- wres = waitq_assert_wait64(&group->idle_waitq, NO_EVENT64, THREAD_INTERRUPTIBLE, 0);
+ wres = waitq_assert_wait64(&group->idle_waitq, CAST_EVENT64_T(group), THREAD_INTERRUPTIBLE, 0);
if (wres != THREAD_WAITING) {
panic("kcall worker unable to assert wait?");
}
if (group->idle_count < group->target_thread_count) {
group->idle_count++;
- waitq_assert_wait64(&group->idle_waitq, NO_EVENT64, THREAD_UNINT, 0); /* Interrupted means to exit */
+ waitq_assert_wait64(&group->idle_waitq, CAST_EVENT64_T(group), THREAD_UNINT, 0); /* Interrupted means to exit */
enable_ints_and_unlock(group, s);
}
} while (os_atomic_load(&thread_call_daemon_awake, relaxed));
- waitq_assert_wait64(&daemon_waitq, NO_EVENT64, THREAD_UNINT, 0);
+ waitq_assert_wait64(&daemon_waitq, CAST_EVENT64_T(&thread_call_daemon_awake), THREAD_UNINT, 0);
if (os_atomic_load(&thread_call_daemon_awake, relaxed)) {
clear_wait(current_thread(), THREAD_AWAKENED);
if (now > group->idle_timestamp + thread_call_dealloc_interval_abs) {
terminated = true;
group->idle_count--;
- res = waitq_wakeup64_one(&group->idle_waitq, NO_EVENT64,
+ res = waitq_wakeup64_one(&group->idle_waitq, CAST_EVENT64_T(group),
THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES);
if (res != KERN_SUCCESS) {
panic("Unable to wake up idle thread for termination?");
*
* Takes the thread call lock locked, returns unlocked
* This lets us avoid a spurious take/drop after waking up from thread_block
+ *
+ * This thread could be a thread call thread itself, blocking and therefore making a
+ * sched_call upcall into the thread call subsystem, needing the group lock.
+ * However, we're saved from deadlock because the 'block' upcall is made in
+ * thread_block, not in assert_wait.
*/
static bool
thread_call_wait_once_locked(thread_call_t call, spl_t s)
/* call is running, so we have to wait for it */
call->tc_flags |= THREAD_CALL_WAIT;
- wait_result_t res = assert_wait(call, THREAD_UNINT);
+ wait_result_t res = waitq_assert_wait64(&group->waiters_waitq, CAST_EVENT64_T(call), THREAD_UNINT, 0);
if (res != THREAD_WAITING) {
panic("Unable to assert wait: %d", res);
}
while (call->tc_finish_count < submit_count) {
call->tc_flags |= THREAD_CALL_WAIT;
- wait_result_t res = assert_wait(call, THREAD_UNINT);
+ wait_result_t res = waitq_assert_wait64(&group->waiters_waitq,
+ CAST_EVENT64_T(call), THREAD_UNINT, 0);
+
if (res != THREAD_WAITING) {
panic("Unable to assert wait: %d", res);
}
thread_call_func_t func,
thread_call_param_t param0);
+extern void thread_call_setup_with_options(
+ thread_call_t call,
+ thread_call_func_t func,
+ thread_call_param_t param0,
+ thread_call_priority_t pri,
+ thread_call_options_t options);
+
extern void thread_call_delayed_timer_rescan_all(void);
extern uint64_t thread_call_get_armed_deadline(thread_call_t call);
thread_set_thread_group(current_thread(), thread_group_find_by_id_and_retain(THREAD_GROUP_VM), false);
}
-uint64_t
-kdp_thread_group_get_flags(struct thread_group *tg)
+uint32_t
+thread_group_get_flags(struct thread_group *tg)
{
return tg->tg_flags;
}
typedef void (*thread_group_iterate_fn_t)(void*, int, struct thread_group *);
kern_return_t thread_group_iterate_stackshot(thread_group_iterate_fn_t callout, void *arg);
-uint64_t kdp_thread_group_get_flags(struct thread_group *);
+uint32_t thread_group_get_flags(struct thread_group *);
boolean_t thread_group_smp_restricted(struct thread_group *tg);
void thread_group_update_recommendation(struct thread_group *tg, cluster_type_t new_recommendation);
static uint64_t
kdp_turnstile_traverse_inheritor_chain(struct turnstile *ts, uint64_t *flags, uint8_t *hops)
{
+ uint8_t unknown_hops;
+
if (waitq_held(&ts->ts_waitq)) {
*flags |= STACKSHOT_TURNSTILE_STATUS_LOCKED_WAITQ;
return 0;
}
*hops = *hops + 1;
+ unknown_hops = *hops;
+
+ /*
+ * If a turnstile is inheriting our priority, recurse. If we get back *exactly* UNKNOWN,
+ * continue on, since we may be able to give a more specific answer. To
+ * give an accurate hops count, we reset *hops, saving the recursive value in
+ * unknown_hops to use if we can't give a better answer.
+ */
+ if (ts->ts_inheritor_flags & TURNSTILE_INHERITOR_TURNSTILE) {
+ uint8_t pre_hops = *hops;
+ uint64_t ret = kdp_turnstile_traverse_inheritor_chain(ts->ts_inheritor, flags, hops);
+ /*
+ * Note that while flags is usually |=ed, we're checking with != here to
+ * make sure we only replace *exactly* UNKNOWN
+ */
+ if (ret != 0 || *flags != STACKSHOT_TURNSTILE_STATUS_UNKNOWN) {
+ return ret;
+ }
+ /* restore original hops value, saving the new one if we fall through to unknown */
+ unknown_hops = *hops;
+ *hops = pre_hops;
+ *flags = 0;
+ }
+
+ if (ts->ts_inheritor_flags & TURNSTILE_INHERITOR_THREAD) {
+ *flags |= STACKSHOT_TURNSTILE_STATUS_THREAD;
+ return (uint64_t) thread_tid(ts->ts_inheritor);
+ }
+
+ if (ts->ts_inheritor_flags & TURNSTILE_INHERITOR_WORKQ) {
+ *flags |= STACKSHOT_TURNSTILE_STATUS_WORKQUEUE;
+ return VM_KERNEL_UNSLIDE_OR_PERM(ts->ts_inheritor);
+ }
/*
* If we found a send turnstile, try to get the task that the turnstile's
* port is in the ipc space of
*/
if (turnstile_is_send_turnstile(ts)) {
- task_t dest_task = TASK_NULL;
ipc_port_t port = (ipc_port_t)ts->ts_proprietor;
if (port && ip_active(port)) {
if (ip_lock_held_kdp(port)) {
*flags |= STACKSHOT_TURNSTILE_STATUS_HELD_IPLOCK;
-
return 0;
- } else {
- if (port->ip_receiver_name != 0) {
- if (port->ip_receiver) {
- ipc_space_t space = (ipc_space_t) port->ip_receiver;
-
- dest_task = space->is_task;
- } else {
- return 0;
- }
- }
}
- }
+ if (port->ip_receiver_name != 0 && port->ip_receiver) {
+ ipc_space_t space = (ipc_space_t) port->ip_receiver;
+ task_t dest_task = space->is_task;
- if (dest_task != TASK_NULL) {
- *flags |= STACKSHOT_TURNSTILE_STATUS_BLOCKED_ON_TASK;
- return pid_from_task(dest_task);
+ if (dest_task != TASK_NULL) {
+ *flags |= STACKSHOT_TURNSTILE_STATUS_BLOCKED_ON_TASK;
+ return pid_from_task(dest_task);
+ }
+ }
}
}
- if (ts->ts_inheritor_flags & TURNSTILE_INHERITOR_TURNSTILE) {
- return kdp_turnstile_traverse_inheritor_chain(ts->ts_inheritor, flags, hops);
- }
-
- if (ts->ts_inheritor_flags & TURNSTILE_INHERITOR_THREAD) {
- *flags |= STACKSHOT_TURNSTILE_STATUS_THREAD;
- return (uint64_t) thread_tid(ts->ts_inheritor);
- }
-
- if (ts->ts_inheritor_flags & TURNSTILE_INHERITOR_WORKQ) {
- *flags |= STACKSHOT_TURNSTILE_STATUS_WORKQUEUE;
- return VM_KERNEL_UNSLIDE_OR_PERM(ts->ts_inheritor);
- }
-
if (turnstile_is_receive_turnstile(ts)) {
ipc_port_t port = (ipc_port_t)ts->ts_proprietor;
if (port && ip_active(port)) {
}
}
+ *hops = unknown_hops;
*flags |= STACKSHOT_TURNSTILE_STATUS_UNKNOWN;
return 0;
}
* most Mach exceptions.
*/
-static const void *ux_handler_kobject = NULL;
-SECURITY_READ_ONLY_LATE(ipc_port_t) ux_handler_port = IP_NULL;
+static SECURITY_READ_ONLY_LATE(const void *) ux_handler_kobject = NULL;
+SECURITY_READ_ONLY_LATE(ipc_port_t) ux_handler_port = IP_NULL;
/*
* init is called early in Mach initialization
*/
#define ZALLOC_ALLOW_DEPRECATED 1
+#if !ZALLOC_TEST
#include <mach/mach_types.h>
#include <mach/vm_param.h>
#include <mach/kern_return.h>
#include <vm/vm_map.h>
#include <vm/vm_kern.h>
#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
#include <vm/vm_compressor.h> /* C_SLOT_PACKED_PTR* */
#include <pexpert/pexpert.h>
#include <san/kasan.h>
#if KASAN_ZALLOC
+/*
+ * Set to 0 to debug poisoning and ZC_ZFREE_CLEARMEM validation under kasan.
+ * Otherwise they are double-duty with what kasan already does.
+ */
+#define ZALLOC_ENABLE_POISONING 0
#define ZONE_ENABLE_LOGGING 0
#elif DEBUG || DEVELOPMENT
+#define ZALLOC_ENABLE_POISONING 1
#define ZONE_ENABLE_LOGGING 1
#else
+#define ZALLOC_ENABLE_POISONING 1
#define ZONE_ENABLE_LOGGING 0
#endif
+#if __LP64__
+#define ZALLOC_EARLY_GAPS 1
+#else
+#define ZALLOC_EARLY_GAPS 0
+#endif
+
+#if DEBUG
+#define z_debug_assert(expr) assert(expr)
+#else
+#define z_debug_assert(expr) (void)(expr)
+#endif
+
extern void vm_pageout_garbage_collect(int collect);
/* Returns pid of the task with the largest number of VM map entries. */
extern zone_t vm_map_entry_zone;
extern zone_t vm_object_zone;
-extern vm_offset_t kmapoff_kaddr;
-extern unsigned int kmapoff_pgcnt;
-extern unsigned int stack_total;
-extern unsigned long long stack_allocs;
-
-/*
- * The max # of elements in a chunk should fit into
- * zone_page_metadata.free_count (uint16_t).
- *
- * Update this if the type of free_count changes.
- */
-#define ZONE_CHUNK_MAXELEMENTS (UINT16_MAX)
-
-#define ZONE_PAGECOUNT_BITS 14
-/* Zone elements must fit both a next pointer and a backup pointer */
-#define ZONE_MIN_ELEM_SIZE (2 * sizeof(vm_offset_t))
+#define ZONE_MIN_ELEM_SIZE sizeof(uint64_t)
#define ZONE_MAX_ALLOC_SIZE (32 * 1024)
-/* per-cpu zones are special because of counters */
-#define ZONE_MIN_PCPU_ELEM_SIZE (1 * sizeof(vm_offset_t))
-
-struct zone_map_range {
- vm_offset_t min_address;
- vm_offset_t max_address;
-};
-
struct zone_page_metadata {
/* The index of the zone this metadata page belongs to */
- zone_id_t zm_index;
-
- /*
- * zm_secondary_page == 0: number of pages in this run
- * zm_secondary_page == 1: offset to the chunk start
- */
- uint16_t zm_page_count : ZONE_PAGECOUNT_BITS;
+ zone_id_t zm_index : 11;
- /* Whether this page is part of a chunk run */
- uint16_t zm_percpu : 1;
- uint16_t zm_secondary_page : 1;
+ /* Whether `zm_bitmap` is an inline bitmap or a packed bitmap reference */
+ uint16_t zm_inline_bitmap : 1;
/*
- * The start of the freelist can be maintained as a 16-bit
- * offset instead of a pointer because the free elements would
- * be at max ZONE_MAX_ALLOC_SIZE bytes away from the start
- * of the allocation chunk.
+ * Zones allocate in "chunks" of zone_t::z_chunk_pages consecutive
+ * pages, or zpercpu_count() pages if the zone is percpu.
*
- * Offset from start of the allocation chunk to free element
- * list head.
- */
- uint16_t zm_freelist_offs;
-
- /*
- * zm_secondary_page == 0: number of allocated elements in the chunk
- * zm_secondary_page == 1: unused
+ * The first page of it has its metadata set with:
+ * - 0 if none of the pages are currently wired
+ * - the number of wired pages in the chunk (not scaled for percpu).
*
- * PAGE_METADATA_EMPTY_FREELIST indicates an empty freelist
+ * Other pages in the chunk have their zm_chunk_len set to
+ * ZM_SECONDARY_PAGE or ZM_SECONDARY_PCPU_PAGE depending on whether
+ * the zone is percpu or not. For those, zm_page_index holds the
+ * index of that page in the run.
*/
- uint16_t zm_alloc_count;
-#define PAGE_METADATA_EMPTY_FREELIST UINT16_MAX
+ uint16_t zm_chunk_len : 4;
+#define ZM_CHUNK_LEN_MAX 0x8
+#define ZM_SECONDARY_PAGE 0xe
+#define ZM_SECONDARY_PCPU_PAGE 0xf
+
+ union {
+#define ZM_ALLOC_SIZE_LOCK 1u
+ uint16_t zm_alloc_size; /* first page only */
+ uint16_t zm_page_index; /* secondary pages only */
+ };
+ union {
+ uint32_t zm_bitmap; /* most zones */
+ uint32_t zm_bump; /* permanent zones */
+ };
zone_pva_t zm_page_next;
zone_pva_t zm_page_prev;
-
- /*
- * This is only for the sake of debuggers
- */
-#define ZONE_FOREIGN_COOKIE 0x123456789abcdef
- uint64_t zm_foreign_cookie[];
};
+static_assert(sizeof(struct zone_page_metadata) == 16, "validate packing");
+__enum_closed_decl(zone_addr_kind_t, bool, {
+ ZONE_ADDR_FOREIGN,
+ ZONE_ADDR_NATIVE,
+});
+#define ZONE_ADDR_KIND_COUNT 2
-/* Align elements that use the zone page list to 32 byte boundaries. */
-#define ZONE_PAGE_FIRST_OFFSET(kind) ((kind) == ZONE_ADDR_NATIVE ? 0 : 32)
+/*!
+ * @typedef zone_element_t
+ *
+ * @brief
+ * Type that represents a "resolved" zone element.
+ *
+ * @description
+ * This type encodes an element pointer as a tuple of:
+ * { chunk base, element index, element protection }.
+ *
+ * The chunk base is extracted with @c trunc_page()
+ * as it is always page aligned, and occupies the bits above @c PAGE_SHIFT.
+ *
+ * The low two bits encode the protection mode (see @c zprot_mode_t).
+ *
+ * The other bits encode the element index in the chunk rather than its address.
+ */
+typedef struct zone_element {
+ vm_offset_t ze_value;
+} zone_element_t;
-static_assert(sizeof(struct zone_page_metadata) == 16, "validate packing");
+/*!
+ * @typedef zone_magazine_t
+ *
+ * @brief
+ * Magazine of cached allocations.
+ *
+ * @field zm_cur how many elements this magazine holds (unused while loaded).
+ * @field zm_link linkage used by magazine depots.
+ * @field zm_elems an array of @c zc_mag_size() elements.
+ */
+typedef struct zone_magazine {
+ uint16_t zm_cur;
+ STAILQ_ENTRY(zone_magazine) zm_link;
+ zone_element_t zm_elems[0];
+} *zone_magazine_t;
+
+/*!
+ * @typedef zone_cache_t
+ *
+ * @brief
+ * Magazine of cached allocations.
+ *
+ * @discussion
+ * Below is a diagram of the caching system. This design is inspired by the
+ * paper "Magazines and Vmem: Extending the Slab Allocator to Many CPUs and
+ * Arbitrary Resources" by Jeff Bonwick and Jonathan Adams and the FreeBSD UMA
+ * zone allocator (itself derived from this seminal work).
+ *
+ * It is divided into 3 layers:
+ * - the per-cpu layer,
+ * - the recirculation depot layer,
+ * - the Zone Allocator.
+ *
+ * The per-cpu and recirculation depot layer use magazines (@c zone_magazine_t),
+ * which are stacks of up to @c zc_mag_size() elements.
+ *
+ * <h2>CPU layer</h2>
+ *
+ * The CPU layer (@c zone_cache_t) looks like this:
+ *
+ * â•â”€ a ─ f ─┬───────── zm_depot ──────────╮
+ * │ â•â”€â•® â•â”€â•® │ â•â”€â•® â•â”€â•® â•â”€â•® â•â”€â•® â•â”€â•® │
+ * │ │#│ │#│ │ │#│ │#│ │#│ │#│ │#│ │
+ * │ │#│ │ │ │ │#│ │#│ │#│ │#│ │#│ │
+ * │ │ │ │ │ │ │#│ │#│ │#│ │#│ │#│ │
+ * │ ╰─╯ ╰─╯ │ ╰─╯ ╰─╯ ╰─╯ ╰─╯ ╰─╯ │
+ * ╰─────────┴─────────────────────────────╯
+ *
+ * It has two pre-loaded magazines (a)lloc and (f)ree which we allocate from,
+ * or free to. Serialization is achieved through disabling preemption, and only
+ * the current CPU can acces those allocations. This is represented on the left
+ * hand side of the diagram above.
+ *
+ * The right hand side is the per-cpu depot. It consists of @c zm_depot_count
+ * full magazines, and is protected by the @c zm_depot_lock for access.
+ * The lock is expected to absolutely never be contended, as only the local CPU
+ * tends to access the local per-cpu depot in regular operation mode.
+ *
+ * However unlike UMA, our implementation allows for the zone GC to reclaim
+ * per-CPU magazines aggresively, which is serialized with the @c zm_depot_lock.
+ *
+ *
+ * <h2>Recirculation Depot</h2>
+ *
+ * The recirculation depot layer is a list similar to the per-cpu depot,
+ * however it is different in two fundamental ways:
+ *
+ * - it is protected by the regular zone lock,
+ * - elements referenced by the magazines in that layer appear free
+ * to the zone layer.
+ *
+ *
+ * <h2>Magazine circulation and sizing</h2>
+ *
+ * The caching system sizes itself dynamically. Operations that allocate/free
+ * a single element call @c zone_lock_nopreempt_check_contention() which records
+ * contention on the lock by doing a trylock and recording its success.
+ *
+ * This information is stored in the @c z_contention_cur field of the zone,
+ * and a windoed moving average is maintained in @c z_contention_wma.
+ * Each time a CPU registers any contention, it will also allow its own per-cpu
+ * cache to grow, incrementing @c zc_depot_max, which is how the per-cpu layer
+ * might grow into using its local depot.
+ *
+ * Note that @c zc_depot_max assume that the (a) and (f) pre-loaded magazines
+ * on average contain @c zc_mag_size() elements.
+ *
+ * When a per-cpu layer cannot hold more full magazines in its depot,
+ * then it will overflow about 1/3 of its depot into the recirculation depot
+ * (see @c zfree_cached_slow(). Conversely, when a depot is empty, then it will
+ * refill its per-cpu depot to about 1/3 of its size from the recirculation
+ * depot (see @c zalloc_cached_slow()).
+ *
+ * Lastly, the zone layer keeps track of the high and low watermark of how many
+ * elements have been free per period of time (including being part of the
+ * recirculation depot) in the @c z_elems_free_min and @c z_elems_free_max
+ * fields. A weighted moving average of the amplitude of this is maintained in
+ * the @c z_elems_free_wss which informs the zone GC on how to gently trim
+ * zones without hurting performance.
+ *
+ *
+ * <h2>Security considerations</h2>
+ *
+ * The zone caching layer has been designed to avoid returning elements in
+ * a strict LIFO behavior: @c zalloc() will allocate from the (a) magazine,
+ * and @c zfree() free to the (f) magazine, and only swap them when the
+ * requested operation cannot be fulfilled.
+ *
+ * The per-cpu overflow depot or the recirculation depots are similarly used
+ * in FIFO order.
+ *
+ * More importantly, when magazines flow through the recirculation depot,
+ * the elements they contain are marked as "free" in the zone layer bitmaps.
+ * Because allocations out of per-cpu caches verify the bitmaps at allocation
+ * time, this acts as a poor man's double-free quarantine. The magazines
+ * allow to avoid the cost of the bit-scanning involved in the zone-level
+ * @c zalloc_item() codepath.
+ *
+ *
+ * @field zc_alloc_cur denormalized number of elements in the (a) magazine
+ * @field zc_free_cur denormalized number of elements in the (f) magazine
+ * @field zc_alloc_elems a pointer to the array of elements in (a)
+ * @field zc_free_elems a pointer to the array of elements in (f)
+ *
+ * @field zc_depot_lock a lock to access @c zc_depot, @c zc_depot_cur.
+ * @field zc_depot a list of @c zc_depot_cur full magazines
+ * @field zc_depot_cur number of magazines in @c zc_depot
+ * @field zc_depot_max the maximum number of elements in @c zc_depot,
+ * protected by the zone lock.
+ */
+typedef struct zone_cache {
+ uint16_t zc_alloc_cur;
+ uint16_t zc_free_cur;
+ uint16_t zc_depot_cur;
+ uint16_t __zc_padding;
+ zone_element_t *zc_alloc_elems;
+ zone_element_t *zc_free_elems;
+ hw_lock_bit_t zc_depot_lock;
+ uint32_t zc_depot_max;
+ struct zone_depot zc_depot;
+} *zone_cache_t;
static __security_const_late struct {
- struct zone_map_range zi_map_range;
- struct zone_map_range zi_general_range;
- struct zone_map_range zi_meta_range;
- struct zone_map_range zi_foreign_range;
+ struct zone_map_range zi_map_range[ZONE_ADDR_KIND_COUNT];
+ struct zone_map_range zi_meta_range; /* debugging only */
+ struct zone_map_range zi_bits_range; /* bits buddy allocator */
/*
* The metadata lives within the zi_meta_range address range.
*
* The correct formula to find a metadata index is:
- * absolute_page_index - page_index(zi_meta_range.min_address)
+ * absolute_page_index - page_index(MIN(zi_map_range[*].min_address))
*
* And then this index is used to dereference zi_meta_range.min_address
* as a `struct zone_page_metadata` array.
*
* To avoid doing that substraction all the time in the various fast-paths,
- * zi_array_base is offset by `page_index(zi_meta_range.min_address)`
- * to avoid redoing that math all the time.
+ * zi_meta_base are pre-offset with that minimum page index to avoid redoing
+ * that math all the time.
+ *
+ * Do note that the array might have a hole punched in the middle,
+ * see zone_metadata_init().
*/
- struct zone_page_metadata *zi_array_base;
+ struct zone_page_metadata *zi_meta_base;
} zone_info;
+/*
+ * Initial array of metadata for stolen memory.
+ *
+ * The numbers here have to be kept in sync with vm_map_steal_memory()
+ * so that we have reserved enough metadata.
+ *
+ * After zone_init() has run (which happens while the kernel is still single
+ * threaded), the metadata is moved to its final dynamic location, and
+ * this array is unmapped with the rest of __startup_data at lockdown.
+ */
+#if CONFIG_GZALLOC
+#define ZONE_FOREIGN_META_INLINE_COUNT 20032
+#else
+#define ZONE_FOREIGN_META_INLINE_COUNT 64
+#endif
+__startup_data
+static struct zone_page_metadata
+ zone_foreign_meta_array_startup[ZONE_FOREIGN_META_INLINE_COUNT];
+
/*
* The zone_locks_grp allows for collecting lock statistics.
* All locks are associated to this group in zinit.
* Look at tools/lockstat for debugging lock contention.
*/
-LCK_GRP_DECLARE(zone_locks_grp, "zone_locks");
-LCK_MTX_EARLY_DECLARE(zone_metadata_region_lck, &zone_locks_grp);
+static LCK_GRP_DECLARE(zone_locks_grp, "zone_locks");
+static LCK_MTX_EARLY_DECLARE(zone_metadata_region_lck, &zone_locks_grp);
/*
* Exclude more than one concurrent garbage collection
*/
-LCK_GRP_DECLARE(zone_gc_lck_grp, "zone_gc");
-LCK_MTX_EARLY_DECLARE(zone_gc_lock, &zone_gc_lck_grp);
+static LCK_GRP_DECLARE(zone_gc_lck_grp, "zone_gc");
+static LCK_MTX_EARLY_DECLARE(zone_gc_lock, &zone_gc_lck_grp);
-boolean_t panic_include_zprint = FALSE;
+bool panic_include_zprint = FALSE;
mach_memory_info_t *panic_kext_memory_info = NULL;
vm_size_t panic_kext_memory_size = 0;
* zone_destroyed_bitmap
*/
static SIMPLE_LOCK_DECLARE(all_zones_lock, 0);
-static unsigned int num_zones_in_use;
-unsigned int _Atomic num_zones;
+static zone_id_t num_zones_in_use;
+zone_id_t _Atomic num_zones;
SECURITY_READ_ONLY_LATE(unsigned int) zone_view_count;
#if KASAN_ZALLOC
#else /* !KASAN_ZALLOC */
#define MAX_ZONES 402
#endif/* !KASAN_ZALLOC */
-struct zone zone_array[MAX_ZONES];
+
+/*
+ * Initial globals for zone stats until we can allocate the real ones.
+ * Those get migrated inside the per-CPU ones during zone_init() and
+ * this array is unmapped with the rest of __startup_data at lockdown.
+ */
+
+/* zone to allocate zone_magazine structs from */
+static SECURITY_READ_ONLY_LATE(zone_t) zc_magazine_zone;
+/*
+ * Until pid1 is made, zone caching is off,
+ * until compute_zone_working_set_size() runs for the firt time.
+ *
+ * -1 represents the "never enabled yet" value.
+ */
+static int8_t zone_caching_disabled = -1;
+
+__startup_data
+static struct zone_cache zone_cache_startup[MAX_ZONES];
+__startup_data
+static struct zone_stats zone_stats_startup[MAX_ZONES];
+struct zone zone_array[MAX_ZONES];
/* Initialized in zone_bootstrap(), how many "copies" the per-cpu system does */
static SECURITY_READ_ONLY_LATE(unsigned) zpercpu_early_count;
/* Used to keep track of destroyed slots in the zone_array */
static bitmap_t zone_destroyed_bitmap[BITMAP_LEN(MAX_ZONES)];
-/* number of pages used by all zones */
-static long _Atomic zones_phys_page_count;
-
/* number of zone mapped pages used by all zones */
static long _Atomic zones_phys_page_mapped_count;
#if VM_MAX_TAG_ZONES
/* enable tags for zones that ask for it */
-TUNABLE(bool, zone_tagging_on, "-zt", false);
+static TUNABLE(bool, zone_tagging_on, "-zt", false);
#endif /* VM_MAX_TAG_ZONES */
#if DEBUG || DEVELOPMENT
TUNABLE(bool, zalloc_disable_copyio_check, "-no-copyio-zalloc-check", false);
-__options_decl(zalloc_debug_t, uint32_t, {
- ZALLOC_DEBUG_ZONEGC = 0x00000001,
- ZALLOC_DEBUG_ZCRAM = 0x00000002,
-});
-
-TUNABLE(zalloc_debug_t, zalloc_debug, "zalloc_debug", 0);
#endif /* DEBUG || DEVELOPMENT */
#if CONFIG_ZLEAKS
/* Making pointer scanning leaks detection possible for all zones */
-TUNABLE(bool, zone_leaks_scan_enable, "-zl", false);
+static TUNABLE(bool, zone_leaks_scan_enable, "-zl", false);
#else
#define zone_leaks_scan_enable false
#endif
-/*
- * Async allocation of zones
- * This mechanism allows for bootstrapping an empty zone which is setup with
- * non-blocking flags. The first call to zalloc_noblock() will kick off a thread_call
- * to zalloc_async. We perform a zalloc() (which may block) and then an immediate free.
- * This will prime the zone for the next use.
- *
- * Currently the thread_callout function (zalloc_async) will loop through all zones
- * looking for any zone with async_pending set and do the work for it.
+/*! @enum zprot_mode_t
*
- * NOTE: If the calling thread for zalloc_noblock is lower priority than thread_call,
- * then zalloc_noblock to an empty zone may succeed.
- */
-static void zalloc_async(thread_call_param_t p0, thread_call_param_t p1);
-static thread_call_data_t call_async_alloc;
-static void zcram_and_lock(zone_t zone, vm_offset_t newmem, vm_size_t size);
-
-/*
- * Zone Corruption Debugging
+ * @brief
+ * Zone element corruption detection mode.
*
+ * @discussion
* We use four techniques to detect modification of a zone element
* after it's been freed.
*
- * (1) Check the freelist next pointer for sanity.
- * (2) Store a backup of the next pointer at the end of the element,
- * and compare it to the primary next pointer when the element is allocated
- * to detect corruption of the freelist due to use-after-free bugs.
- * The backup pointer is also XORed with a per-boot random cookie.
- * (3) Poison the freed element by overwriting it with 0xdeadbeef,
- * and check for that value when the element is being reused to make sure
- * no part of the element has been modified while it was on the freelist.
- * This will also help catch read-after-frees, as code will now dereference
- * 0xdeadbeef instead of a valid but freed pointer.
- * (4) If the zfree_clear_mem flag is set clear the element on free and
- * assert that it is still clear when alloc-ed.
- *
- * (1) and (2) occur for every allocation and free to a zone.
- * This is done to make it slightly more difficult for an attacker to
- * manipulate the freelist to behave in a specific way.
- *
- * Poisoning (3) occurs periodically for every N frees (counted per-zone).
+ * Elements that are in zones can be in 3 possible states:
+ * - zeroed out (@c ZPM_ZERO)
+ * - poisoned (@c ZPM_POISON) with the @c ZONE_POISON pattern
+ * - with a left and right canary (@c ZPM_CANARY).
+ *
+ * @c ZPM_AUTO is used when the actual protection for the element is unknown,
+ * and will be detected looking at the last word of the allocation at validation
+ * time.
+ *
+ * The mode of an element in zones is discovered by looking at its last
+ * pointer-sized value:
+ * - 0 means that it is zeroed out
+ * - @c ZONE_POISON means it is poisoned
+ * - any other value means it is using canaries.
+ *
+ * Elements are zeroed if:
+ * - the element size is smaller than @c zp_min_size,
+ * - the owning zone has the @c z_free_zeroes flag set,
+ * - the chunk backing store is fresh (and was just allocated).
+ *
+ * Elements are poisoned periodically for every N frees (counted per-zone),
+ * if the elements aren't otherwise zeroed out.
* If -zp is passed as a boot arg, poisoning occurs for every free.
*
- * Zeroing (4) is done for those zones that pass the ZC_ZFREE_CLEARMEM
- * flag on creation or if the element size is less than one cacheline.
+ * Else elements use canaries. When canaries are used, the first and last
+ * pointer sized values in the allocation are set to values derived from the
+ * element address and the @c zp_canary nonce. The first @c zp_min_size
+ * bytes of the elment are also cleared.
*
* Performance slowdown is inversely proportional to the frequency of poisoning,
* with a 4-5% hit around N=1, down to ~0.3% at N=16 and just "noise" at N=32
*
* For a more heavyweight, but finer-grained method of detecting misuse
* of zone memory, look up the "Guard mode" zone allocator in gzalloc.c.
- *
- * Zone Corruption Logging
- *
- * You can also track where corruptions come from by using the boot-arguments
- * "zlog=<zone name to log> -zc". Search for "Zone corruption logging" later
- * in this document for more implementation and usage information.
- *
- * Zone Leak Detection
- *
- * To debug leaks of zone memory, use the zone leak detection tool 'zleaks'
- * found later in this file via the showtopztrace and showz* macros in kgmacros,
- * or use zlog without the -zc argument.
- *
*/
+__enum_closed_decl(zprot_mode_t, vm_offset_t, {
+ ZPM_AUTO, /* element is indeterminate */
+ ZPM_ZERO, /* element is zeroed */
+ ZPM_POISON, /* element is poisoned */
+ ZPM_CANARY, /* element extremities have a canary */
+});
+#define ZPM_MASK ((zprot_mode_t)0x3)
-#define ZP_DEFAULT_SAMPLING_FACTOR 16
-#define ZP_DEFAULT_SCALE_FACTOR 4
/*
* set by zp-factor=N boot arg
* A zp_factor of 1 indicates zone poisoning is on for all elements and can be
* set by passing the -zp boot-arg.
*/
-static TUNABLE(uint32_t, zp_factor, "zp-factor", ZP_DEFAULT_SAMPLING_FACTOR);
+static TUNABLE(uint32_t, zp_factor, "zp-factor", 16);
/* set by zp-scale=N boot arg, scales zp_factor by zone size */
-static TUNABLE(uint32_t, zp_scale, "zp-scale", ZP_DEFAULT_SCALE_FACTOR);
-
-/* initialized to a per-boot random value in zp_bootstrap */
-static SECURITY_READ_ONLY_LATE(uintptr_t) zp_poisoned_cookie;
-static SECURITY_READ_ONLY_LATE(uintptr_t) zp_nopoison_cookie;
-static SECURITY_READ_ONLY_LATE(uintptr_t) zp_min_size;
-static SECURITY_READ_ONLY_LATE(uint64_t) zone_phys_mapped_max;
-
-static SECURITY_READ_ONLY_LATE(vm_map_t) zone_submaps[Z_SUBMAP_IDX_COUNT];
-static SECURITY_READ_ONLY_LATE(uint32_t) zone_last_submap_idx;
+static TUNABLE(uint32_t, zp_scale, "zp-scale", 4);
-static struct bool_gen zone_bool_gen;
-static zone_t zone_find_largest(void);
-static void zone_drop_free_elements(zone_t z);
-
-#define submap_for_zone(z) zone_submaps[(z)->submap_idx]
-#define MAX_SUBMAP_NAME 16
-
-/* Globals for random boolean generator for elements in free list */
-#define MAX_ENTROPY_PER_ZCRAM 4
-
-#if CONFIG_ZCACHE
/*
- * Specifies a single zone to enable CPU caching for.
- * Can be set using boot-args: zcc_enable_for_zone_name=<zone>
+ * Zone caching tunables
+ *
+ * zc_mag_size():
+ * size of magazines, larger to reduce contention at the expense of memory
+ *
+ * zc_auto_enable_threshold
+ * number of contentions per second after which zone caching engages
+ * automatically.
+ *
+ * 0 to disable.
+ *
+ * zc_grow_threshold
+ * numer of contentions per second after which the per-cpu depot layer
+ * grows at each newly observed contention without restriction.
+ *
+ * 0 to disable.
+ *
+ * zc_recirc_denom
+ * denominator of the fraction of per-cpu depot to migrate to/from
+ * the recirculation depot layer at a time. Default 3 (1/3).
+ *
+ * zc_defrag_ratio
+ * percentage of the working set to recirc size below which
+ * the zone is defragmented. Default is 50%.
+ *
+ * zc_free_batch_size
+ * The size of batches of frees/reclaim that can be done keeping
+ * the zone lock held (and preemption disabled).
+ */
+static TUNABLE(uint16_t, zc_magazine_size, "zc_mag_size()", 8);
+static TUNABLE(uint32_t, zc_auto_threshold, "zc_auto_enable_threshold", 20);
+static TUNABLE(uint32_t, zc_grow_threshold, "zc_grow_threshold", 8);
+static TUNABLE(uint32_t, zc_recirc_denom, "zc_recirc_denom", 3);
+static TUNABLE(uint32_t, zc_defrag_ratio, "zc_defrag_ratio", 50);
+static TUNABLE(uint32_t, zc_free_batch_size, "zc_free_batch_size", 1024);
+
+static SECURITY_READ_ONLY_LATE(uintptr_t) zp_canary;
+/*
+ * Perf results for zeroing all non data zones and 2K of data zones
+ * showed little regression, therefore setting zp_min_size to 2048
*/
-static char cache_zone_name[MAX_ZONE_NAME];
-static TUNABLE(bool, zcc_kalloc, "zcc_kalloc", false);
+static TUNABLE(uint32_t, zp_min_size, "zclear_size", 2048);
+static SECURITY_READ_ONLY_LATE(uint32_t) zone_phys_mapped_max_pages;
+static SECURITY_READ_ONLY_LATE(vm_map_t) zone_submaps[Z_SUBMAP_IDX_COUNT];
+static SECURITY_READ_ONLY_LATE(uint32_t) zone_last_submap_idx;
-__header_always_inline bool
-zone_caching_enabled(zone_t z)
-{
- return z->zcache.zcc_depot != NULL;
-}
-#else
-__header_always_inline bool
-zone_caching_enabled(zone_t z __unused)
-{
- return false;
-}
-#endif /* CONFIG_ZCACHE */
+static zone_t zone_find_largest(void);
+#endif /* !ZALLOC_TEST */
#pragma mark Zone metadata
-
-__enum_closed_decl(zone_addr_kind_t, bool, {
- ZONE_ADDR_NATIVE,
- ZONE_ADDR_FOREIGN,
-});
+#if !ZALLOC_TEST
static inline zone_id_t
zone_index(zone_t z)
return zone_array + zid == z;
}
-static inline vm_size_t
-zone_elem_count(zone_t zone, vm_size_t alloc_size, zone_addr_kind_t kind)
+static zone_element_t
+zone_element_encode(vm_offset_t base, vm_offset_t eidx, zprot_mode_t zpm)
{
- if (kind == ZONE_ADDR_NATIVE) {
- if (zone->percpu) {
- return PAGE_SIZE / zone_elem_size(zone);
- }
- return alloc_size / zone_elem_size(zone);
- } else {
- assert(alloc_size == PAGE_SIZE);
- return (PAGE_SIZE - ZONE_PAGE_FIRST_OFFSET(kind)) / zone_elem_size(zone);
- }
+ return (zone_element_t){ .ze_value = base | (eidx << 2) | zpm };
+}
+
+static vm_offset_t
+zone_element_base(zone_element_t ze)
+{
+ return trunc_page(ze.ze_value);
+}
+
+static vm_offset_t
+zone_element_idx(zone_element_t ze)
+{
+ return (ze.ze_value & PAGE_MASK) >> 2;
+}
+
+#if ZALLOC_ENABLE_POISONING
+static zprot_mode_t
+zone_element_prot(zone_element_t ze)
+{
+ return (zprot_mode_t)(ze.ze_value & ZPM_MASK);
+}
+#endif
+
+static vm_offset_t
+zone_element_addr(zone_element_t ze, vm_offset_t esize)
+{
+ return zone_element_base(ze) + esize * zone_element_idx(ze);
}
__abortlike
(void *)addr, zone_heap_name(zone), zone->z_name);
}
+__abortlike
+static void
+zone_invalid_element_panic(zone_t zone, zone_element_t ze)
+{
+ panic("zone element pointer validation failed (elem: %p,%d, zone %s%s)",
+ (void *)zone_element_base(ze), (int)zone_element_idx(ze),
+ zone_heap_name(zone), zone->z_name);
+}
+
__abortlike
static void
zone_page_metadata_index_confusion_panic(zone_t zone, vm_offset_t addr,
meta, zone_heap_name(zone), zone->z_name);
}
-__abortlike
-static void
-zone_page_metadata_foreign_queue_corruption(zone_t zone, zone_pva_t *queue)
-{
- panic("native metadata index %d enqueued in foreign head %p from zone %s%s",
- queue->packed_address, queue, zone_heap_name(zone), zone->z_name);
-}
-
-__abortlike
-static void
-zone_page_metadata_foreign_confusion_panic(zone_t zone, vm_offset_t addr)
-{
- panic("manipulating foreign address %p in a native-only zone %s%s",
- (void *)addr, zone_heap_name(zone), zone->z_name);
-}
-
__abortlike __unused
static void
zone_invalid_foreign_addr_panic(zone_t zone, vm_offset_t addr)
zone_heap_name(zone), zone->z_name, meta);
}
+__abortlike
+static void
+zone_meta_double_free_panic(zone_t zone, zone_element_t ze, const char *caller)
+{
+ panic("%s: double free of %p to zone %s%s", caller,
+ (void *)zone_element_addr(ze, zone_elem_size(zone)),
+ zone_heap_name(zone), zone->z_name);
+}
+
__abortlike
static void
zone_accounting_panic(zone_t zone, const char *kind)
zone_heap_name(zone), zone->z_name);
}
+#define zone_counter_sub(z, stat, value) ({ \
+ if (os_sub_overflow((z)->stat, value, &(z)->stat)) { \
+ zone_accounting_panic(z, #stat " wrap-around"); \
+ } \
+ (z)->stat; \
+})
+
+static inline void
+zone_elems_free_add(zone_t z, uint32_t count)
+{
+ uint32_t n = (z->z_elems_free += count);
+ if (z->z_elems_free_max < n) {
+ z->z_elems_free_max = n;
+ }
+}
+
+static inline void
+zone_elems_free_sub(zone_t z, uint32_t count)
+{
+ uint32_t n = zone_counter_sub(z, z_elems_free, count);
+
+ if (z->z_elems_free_min > n) {
+ z->z_elems_free_min = n;
+ }
+}
+
+static inline uint16_t
+zone_meta_alloc_size_add(zone_t z, struct zone_page_metadata *m,
+ vm_offset_t esize)
+{
+ if (os_add_overflow(m->zm_alloc_size, (uint16_t)esize, &m->zm_alloc_size)) {
+ zone_page_meta_accounting_panic(z, m, "alloc_size wrap-around");
+ }
+ return m->zm_alloc_size;
+}
+
+static inline uint16_t
+zone_meta_alloc_size_sub(zone_t z, struct zone_page_metadata *m,
+ vm_offset_t esize)
+{
+ if (os_sub_overflow(m->zm_alloc_size, esize, &m->zm_alloc_size)) {
+ zone_page_meta_accounting_panic(z, m, "alloc_size wrap-around");
+ }
+ return m->zm_alloc_size;
+}
+
__abortlike
static void
zone_nofail_panic(zone_t zone)
return rmax - rmin;
}
-#define from_zone_map(addr, size) \
- zone_range_contains(&zone_info.zi_map_range, (vm_offset_t)(addr), size)
-
-#define from_general_submap(addr, size) \
- zone_range_contains(&zone_info.zi_general_range, (vm_offset_t)(addr), size)
+#define from_zone_map(addr, size, kind) \
+ zone_range_contains(&zone_info.zi_map_range[kind], \
+ (vm_offset_t)(addr), size)
-#define from_foreign_range(addr, size) \
- zone_range_contains(&zone_info.zi_foreign_range, (vm_offset_t)(addr), size)
+#define zone_native_size() \
+ zone_range_size(&zone_info.zi_map_range[ZONE_ADDR_NATIVE])
-#define from_native_meta_map(addr) \
- zone_range_contains(&zone_info.zi_meta_range, (vm_offset_t)(addr), \
- sizeof(struct zone_page_metadata))
-
-#define zone_addr_kind(addr, size) \
- (from_zone_map(addr, size) ? ZONE_ADDR_NATIVE : ZONE_ADDR_FOREIGN)
+#define zone_foreign_size() \
+ zone_range_size(&zone_info.zi_map_range[ZONE_ADDR_FOREIGN])
__header_always_inline bool
zone_pva_is_null(zone_pva_t page)
return (zone_pva_t){ (uint32_t)((intptr_t)addr >> PAGE_SHIFT) };
}
+__header_always_inline zone_pva_t
+zone_pva_from_element(zone_element_t ze)
+{
+ return zone_pva_from_addr(ze.ze_value);
+}
+
__header_always_inline vm_address_t
zone_pva_to_addr(zone_pva_t page)
{
}
__header_always_inline struct zone_page_metadata *
-zone_pva_to_meta(zone_pva_t page, zone_addr_kind_t kind)
+zone_pva_to_meta(zone_pva_t page)
{
- if (kind == ZONE_ADDR_NATIVE) {
- return &zone_info.zi_array_base[page.packed_address];
- } else {
- return (struct zone_page_metadata *)zone_pva_to_addr(page);
- }
+ return &zone_info.zi_meta_base[page.packed_address];
}
__header_always_inline zone_pva_t
-zone_pva_from_meta(struct zone_page_metadata *meta, zone_addr_kind_t kind)
+zone_pva_from_meta(struct zone_page_metadata *meta)
{
- if (kind == ZONE_ADDR_NATIVE) {
- uint32_t index = (uint32_t)(meta - zone_info.zi_array_base);
- return (zone_pva_t){ index };
- } else {
- return zone_pva_from_addr((vm_address_t)meta);
- }
+ return (zone_pva_t){ (uint32_t)(meta - zone_info.zi_meta_base) };
}
__header_always_inline struct zone_page_metadata *
-zone_meta_from_addr(vm_offset_t addr, zone_addr_kind_t kind)
+zone_meta_from_addr(vm_offset_t addr)
{
- if (kind == ZONE_ADDR_NATIVE) {
- return zone_pva_to_meta(zone_pva_from_addr(addr), kind);
- } else {
- return (struct zone_page_metadata *)trunc_page(addr);
- }
+ return zone_pva_to_meta(zone_pva_from_addr(addr));
+}
+
+__header_always_inline struct zone_page_metadata *
+zone_meta_from_element(zone_element_t ze)
+{
+ return zone_pva_to_meta(zone_pva_from_element(ze));
}
-#define zone_native_meta_from_addr(addr) \
- zone_meta_from_addr((vm_offset_t)(addr), ZONE_ADDR_NATIVE)
+__header_always_inline zone_id_t
+zone_index_from_ptr(const void *ptr)
+{
+ return zone_pva_to_meta(zone_pva_from_addr((vm_offset_t)ptr))->zm_index;
+}
__header_always_inline vm_offset_t
-zone_meta_to_addr(struct zone_page_metadata *meta, zone_addr_kind_t kind)
+zone_meta_to_addr(struct zone_page_metadata *meta)
{
- if (kind == ZONE_ADDR_NATIVE) {
- return ptoa((int)(meta - zone_info.zi_array_base));
- } else {
- return (vm_offset_t)meta;
- }
+ return ptoa((int32_t)(meta - zone_info.zi_meta_base));
}
__header_always_inline void
zone_meta_queue_push(zone_t z, zone_pva_t *headp,
- struct zone_page_metadata *meta, zone_addr_kind_t kind)
+ struct zone_page_metadata *meta)
{
zone_pva_t head = *headp;
zone_pva_t queue_pva = zone_queue_encode(headp);
meta->zm_page_next = head;
if (!zone_pva_is_null(head)) {
- tmp = zone_pva_to_meta(head, kind);
+ tmp = zone_pva_to_meta(head);
if (!zone_pva_is_equal(tmp->zm_page_prev, queue_pva)) {
zone_page_metadata_list_corruption(z, meta);
}
- tmp->zm_page_prev = zone_pva_from_meta(meta, kind);
+ tmp->zm_page_prev = zone_pva_from_meta(meta);
}
meta->zm_page_prev = queue_pva;
- *headp = zone_pva_from_meta(meta, kind);
+ *headp = zone_pva_from_meta(meta);
}
__header_always_inline struct zone_page_metadata *
-zone_meta_queue_pop(zone_t z, zone_pva_t *headp, zone_addr_kind_t kind,
- vm_offset_t *page_addrp)
+zone_meta_queue_pop_native(zone_t z, zone_pva_t *headp, vm_offset_t *page_addrp)
{
zone_pva_t head = *headp;
- struct zone_page_metadata *meta = zone_pva_to_meta(head, kind);
+ struct zone_page_metadata *meta = zone_pva_to_meta(head);
vm_offset_t page_addr = zone_pva_to_addr(head);
struct zone_page_metadata *tmp;
- if (kind == ZONE_ADDR_NATIVE && !from_native_meta_map(meta)) {
+ if (!from_zone_map(page_addr, 1, ZONE_ADDR_NATIVE)) {
zone_page_metadata_native_queue_corruption(z, headp);
}
- if (kind == ZONE_ADDR_FOREIGN && from_zone_map(meta, sizeof(*meta))) {
- zone_page_metadata_foreign_queue_corruption(z, headp);
- }
if (!zone_pva_is_null(meta->zm_page_next)) {
- tmp = zone_pva_to_meta(meta->zm_page_next, kind);
+ tmp = zone_pva_to_meta(meta->zm_page_next);
if (!zone_pva_is_equal(tmp->zm_page_prev, head)) {
zone_page_metadata_list_corruption(z, meta);
}
}
*headp = meta->zm_page_next;
+ meta->zm_page_next = meta->zm_page_prev = (zone_pva_t){ 0 };
*page_addrp = page_addr;
+
+ if (!zone_has_index(z, meta->zm_index)) {
+ zone_page_metadata_index_confusion_panic(z,
+ zone_meta_to_addr(meta), meta);
+ }
return meta;
}
__header_always_inline void
-zone_meta_requeue(zone_t z, zone_pva_t *headp,
- struct zone_page_metadata *meta, zone_addr_kind_t kind)
+zone_meta_remqueue(zone_t z, struct zone_page_metadata *meta)
{
- zone_pva_t meta_pva = zone_pva_from_meta(meta, kind);
+ zone_pva_t meta_pva = zone_pva_from_meta(meta);
struct zone_page_metadata *tmp;
if (!zone_pva_is_null(meta->zm_page_next)) {
- tmp = zone_pva_to_meta(meta->zm_page_next, kind);
+ tmp = zone_pva_to_meta(meta->zm_page_next);
if (!zone_pva_is_equal(tmp->zm_page_prev, meta_pva)) {
zone_page_metadata_list_corruption(z, meta);
}
if (zone_pva_is_queue(meta->zm_page_prev)) {
zone_queue_set_head(z, meta->zm_page_prev, meta_pva, meta);
} else {
- tmp = zone_pva_to_meta(meta->zm_page_prev, kind);
+ tmp = zone_pva_to_meta(meta->zm_page_prev);
if (!zone_pva_is_equal(tmp->zm_page_next, meta_pva)) {
zone_page_metadata_list_corruption(z, meta);
}
tmp->zm_page_next = meta->zm_page_next;
}
- zone_meta_queue_push(z, headp, meta, kind);
+ meta->zm_page_next = meta->zm_page_prev = (zone_pva_t){ 0 };
+}
+
+__header_always_inline void
+zone_meta_requeue(zone_t z, zone_pva_t *headp,
+ struct zone_page_metadata *meta)
+{
+ zone_meta_remqueue(z, meta);
+ zone_meta_queue_push(z, headp, meta);
+}
+
+/* prevents a given metadata from ever reaching the z_pageq_empty queue */
+static inline void
+zone_meta_lock_in_partial(zone_t z, struct zone_page_metadata *m, uint32_t len)
+{
+ uint16_t new_size = zone_meta_alloc_size_add(z, m, ZM_ALLOC_SIZE_LOCK);
+
+ assert(new_size % sizeof(vm_offset_t) == ZM_ALLOC_SIZE_LOCK);
+ if (new_size == ZM_ALLOC_SIZE_LOCK) {
+ zone_meta_requeue(z, &z->z_pageq_partial, m);
+ zone_counter_sub(z, z_wired_empty, len);
+ }
+}
+
+/* allows a given metadata to reach the z_pageq_empty queue again */
+static inline void
+zone_meta_unlock_from_partial(zone_t z, struct zone_page_metadata *m, uint32_t len)
+{
+ uint16_t new_size = zone_meta_alloc_size_sub(z, m, ZM_ALLOC_SIZE_LOCK);
+
+ assert(new_size % sizeof(vm_offset_t) == 0);
+ if (new_size == 0) {
+ zone_meta_requeue(z, &z->z_pageq_empty, m);
+ z->z_wired_empty += len;
+ }
}
/*
* Must be called without the zone lock held as it might potentially block.
*/
static void
-zone_meta_populate(struct zone_page_metadata *from, struct zone_page_metadata *to)
+zone_meta_populate(vm_offset_t base, vm_size_t size)
{
+ struct zone_page_metadata *from = zone_meta_from_addr(base);
+ struct zone_page_metadata *to = from + atop(size);
vm_offset_t page_addr = trunc_page(from);
for (; page_addr < (vm_offset_t)to; page_addr += PAGE_SIZE) {
}
}
-static inline bool
-zone_allocated_element_offset_is_valid(zone_t zone, vm_offset_t addr,
- vm_offset_t page, zone_addr_kind_t kind)
+__header_always_inline
+struct zone_page_metadata *
+zone_element_validate(zone_t zone, zone_element_t ze)
{
- vm_offset_t offs = addr - page - ZONE_PAGE_FIRST_OFFSET(kind);
- vm_offset_t esize = zone_elem_size(zone);
+ struct zone_page_metadata *meta;
+ vm_offset_t page = zone_element_base(ze);
- if (esize & (esize - 1)) { /* not a power of 2 */
- return (offs % esize) == 0;
- } else {
- return (offs & (esize - 1)) == 0;
+ if (!from_zone_map(page, 1, ZONE_ADDR_NATIVE) &&
+ !from_zone_map(page, 1, ZONE_ADDR_FOREIGN)) {
+ zone_invalid_element_panic(zone, ze);
+ }
+ meta = zone_meta_from_addr(page);
+
+ if (meta->zm_chunk_len > ZM_CHUNK_LEN_MAX) {
+ zone_invalid_element_panic(zone, ze);
+ }
+ if (zone_element_idx(ze) >= zone->z_chunk_elems) {
+ zone_invalid_element_panic(zone, ze);
+ }
+
+ if (!zone_has_index(zone, meta->zm_index)) {
+ vm_offset_t addr = zone_element_addr(ze, zone_elem_size(zone));
+ zone_page_metadata_index_confusion_panic(zone, addr, meta);
}
+
+ return meta;
}
__attribute__((always_inline))
static struct zone_page_metadata *
-zone_allocated_element_resolve(zone_t zone, vm_offset_t addr,
- vm_offset_t *pagep, zone_addr_kind_t *kindp)
+zone_element_resolve(zone_t zone, vm_offset_t addr, vm_offset_t esize,
+ zone_element_t *ze)
{
struct zone_page_metadata *meta;
- zone_addr_kind_t kind;
- vm_offset_t page;
- vm_offset_t esize = zone_elem_size(zone);
+ vm_offset_t page, eidx;
- kind = zone_addr_kind(addr, esize);
+ if (!from_zone_map(addr, esize, ZONE_ADDR_NATIVE) &&
+ !from_zone_map(addr, esize, ZONE_ADDR_FOREIGN)) {
+ zone_invalid_element_addr_panic(zone, addr);
+ }
page = trunc_page(addr);
- meta = zone_meta_from_addr(addr, kind);
+ meta = zone_meta_from_addr(addr);
- if (kind == ZONE_ADDR_NATIVE) {
- if (meta->zm_secondary_page) {
- if (meta->zm_percpu) {
- zone_invalid_element_addr_panic(zone, addr);
- }
- page -= ptoa(meta->zm_page_count);
- meta -= meta->zm_page_count;
- }
- } else if (!zone->allows_foreign) {
- zone_page_metadata_foreign_confusion_panic(zone, addr);
-#if __LP64__
- } else if (!from_foreign_range(addr, esize)) {
- zone_invalid_foreign_addr_panic(zone, addr);
-#else
- } else if (!pmap_kernel_va(addr)) {
+ if (meta->zm_chunk_len == ZM_SECONDARY_PCPU_PAGE) {
zone_invalid_element_addr_panic(zone, addr);
-#endif
+ }
+ if (meta->zm_chunk_len == ZM_SECONDARY_PAGE) {
+ page -= ptoa(meta->zm_page_index);
+ meta -= meta->zm_page_index;
}
- if (!zone_allocated_element_offset_is_valid(zone, addr, page, kind)) {
+ eidx = (addr - page) / esize;
+ if ((addr - page) % esize) {
zone_invalid_element_addr_panic(zone, addr);
}
zone_page_metadata_index_confusion_panic(zone, addr, meta);
}
- if (kindp) {
- *kindp = kind;
- }
- if (pagep) {
- *pagep = page;
- }
+ *ze = zone_element_encode(page, eidx, ZPM_AUTO);
return meta;
}
-__attribute__((always_inline))
-void
-zone_allocated_element_validate(zone_t zone, vm_offset_t addr)
-{
- zone_allocated_element_resolve(zone, addr, NULL, NULL);
-}
-
-__header_always_inline vm_offset_t
-zone_page_meta_get_freelist(zone_t zone, struct zone_page_metadata *meta,
- vm_offset_t page)
-{
- assert(!meta->zm_secondary_page);
- if (meta->zm_freelist_offs == PAGE_METADATA_EMPTY_FREELIST) {
- return 0;
- }
-
- vm_size_t size = ptoa(meta->zm_percpu ? 1 : meta->zm_page_count);
- if (meta->zm_freelist_offs + zone_elem_size(zone) > size) {
- zone_metadata_corruption(zone, meta, "freelist corruption");
- }
-
- return page + meta->zm_freelist_offs;
-}
-
-__header_always_inline void
-zone_page_meta_set_freelist(struct zone_page_metadata *meta,
- vm_offset_t page, vm_offset_t addr)
-{
- assert(!meta->zm_secondary_page);
- if (addr) {
- meta->zm_freelist_offs = (uint16_t)(addr - page);
- } else {
- meta->zm_freelist_offs = PAGE_METADATA_EMPTY_FREELIST;
- }
-}
-
-static bool
-zone_page_meta_is_sane_element(zone_t zone, struct zone_page_metadata *meta,
- vm_offset_t page, vm_offset_t element, zone_addr_kind_t kind)
-{
- if (element == 0) {
- /* ends of the freelist are NULL */
- return true;
- }
- if (element < page + ZONE_PAGE_FIRST_OFFSET(kind)) {
- return false;
- }
- vm_size_t size = ptoa(meta->zm_percpu ? 1 : meta->zm_page_count);
- if (element > page + size - zone_elem_size(zone)) {
- return false;
- }
- return true;
-}
-
/* Routine to get the size of a zone allocated address.
* If the address doesnt belong to the zone maps, returns 0.
*/
vm_size_t
zone_element_size(void *addr, zone_t *z)
{
- struct zone_page_metadata *meta;
struct zone *src_zone;
- if (from_zone_map(addr, sizeof(void *))) {
- meta = zone_native_meta_from_addr(addr);
- src_zone = &zone_array[meta->zm_index];
+ if (from_zone_map(addr, sizeof(void *), ZONE_ADDR_NATIVE) ||
+ from_zone_map(addr, sizeof(void *), ZONE_ADDR_FOREIGN)) {
+ src_zone = &zone_array[zone_index_from_ptr(addr)];
if (z) {
*z = src_zone;
}
return zone_elem_size(src_zone);
}
+
#if CONFIG_GZALLOC
if (__improbable(gzalloc_enabled())) {
vm_size_t gzsize;
uint32_t zindex;
zone_t other;
- if (!from_zone_map(addr, zone_elem_size(zone))) {
+ if (!from_zone_map(addr, zone_elem_size(zone), ZONE_ADDR_NATIVE)) {
panic("zone_require failed: address not in a zone (addr: %p)", addr);
}
- zindex = zone_native_meta_from_addr(addr)->zm_index;
+ zindex = zone_index_from_ptr(addr);
other = &zone_array[zindex];
if (zindex >= os_atomic_load(&num_zones, relaxed) || !other->z_self) {
panic("zone_require failed: invalid zone index %d "
void
zone_require(zone_t zone, void *addr)
{
- if (__probable(from_general_submap(addr, zone_elem_size(zone)) &&
- (zone_has_index(zone, zone_native_meta_from_addr(addr)->zm_index)))) {
- return;
- }
+ vm_size_t esize = zone_elem_size(zone);
+
+ if (__probable(from_zone_map(addr, esize, ZONE_ADDR_NATIVE))) {
+ if (zone_has_index(zone, zone_index_from_ptr(addr))) {
+ return;
+ }
#if CONFIG_GZALLOC
- if (__probable(gzalloc_enabled())) {
+ } else if (__probable(zone->gzalloc_tracked)) {
return;
- }
#endif
+ }
zone_require_panic(zone, addr);
}
void
zone_id_require(zone_id_t zid, vm_size_t esize, void *addr)
{
- if (__probable(from_general_submap(addr, esize) &&
- (zid == zone_native_meta_from_addr(addr)->zm_index))) {
+ if (__probable(from_zone_map(addr, esize, ZONE_ADDR_NATIVE))) {
+ if (zid == zone_index_from_ptr(addr)) {
+ return;
+ }
+#if CONFIG_GZALLOC
+ } else if (__probable(zone_array[zid].gzalloc_tracked)) {
return;
+#endif
}
+ zone_id_require_panic(zid, addr);
+}
+
+void
+zone_id_require_allow_foreign(zone_id_t zid, vm_size_t esize, void *addr)
+{
+ if (__probable(from_zone_map(addr, esize, ZONE_ADDR_NATIVE) ||
+ from_zone_map(addr, esize, ZONE_ADDR_FOREIGN))) {
+ if (zid == zone_index_from_ptr(addr)) {
+ return;
+ }
#if CONFIG_GZALLOC
- if (__probable(gzalloc_enabled())) {
+ } else if (__probable(zone_array[zid].gzalloc_tracked)) {
return;
- }
#endif
+ }
zone_id_require_panic(zid, addr);
}
bool
zone_owns(zone_t zone, void *addr)
{
- if (__probable(from_general_submap(addr, zone_elem_size(zone)) &&
- (zone_has_index(zone, zone_native_meta_from_addr(addr)->zm_index)))) {
- return true;
- }
+ vm_size_t esize = zone_elem_size(zone);
+
+ if (__probable(from_zone_map(addr, esize, ZONE_ADDR_NATIVE))) {
+ return zone_has_index(zone, zone_index_from_ptr(addr));
#if CONFIG_GZALLOC
- if (__probable(gzalloc_enabled())) {
+ } else if (__probable(zone->gzalloc_tracked)) {
return true;
- }
#endif
+ }
return false;
}
-#pragma mark ZTAGS
-#if VM_MAX_TAG_ZONES
+#endif /* !ZALLOC_TEST */
+#pragma mark Zone bits allocator
-// for zones with tagging enabled:
+/*!
+ * @defgroup Zone Bitmap allocator
+ * @{
+ *
+ * @brief
+ * Functions implementing the zone bitmap allocator
+ *
+ * @discussion
+ * The zone allocator maintains which elements are allocated or free in bitmaps.
+ *
+ * When the number of elements per page is smaller than 32, it is stored inline
+ * on the @c zone_page_metadata structure (@c zm_inline_bitmap is set,
+ * and @c zm_bitmap used for storage).
+ *
+ * When the number of elements is larger, then a bitmap is allocated from
+ * a buddy allocator (impelemented under the @c zba_* namespace). Pointers
+ * to bitmaps are implemented as a packed 32 bit bitmap reference, stored in
+ * @c zm_bitmap. The low 3 bits encode the scale (order) of the allocation in
+ * @c ZBA_GRANULE units, and hence actual allocations encoded with that scheme
+ * cannot be larger than 1024 bytes (8192 bits).
+ *
+ * This buddy allocator can actually accomodate allocations as large
+ * as 8k on 16k systems and 2k on 4k systems.
+ *
+ * Note: @c zba_* functions are implementation details not meant to be used
+ * outside of the allocation of the allocator itself. Interfaces to the rest of
+ * the zone allocator are documented and not @c zba_* prefixed.
+ */
-// calculate a pointer to the tag base entry,
-// holding either a uint32_t the first tag offset for a page in the zone map,
-// or two uint16_t tags if the page can only hold one or two elements
+#define ZBA_CHUNK_SIZE PAGE_MAX_SIZE
+#define ZBA_GRANULE sizeof(uint64_t)
+#define ZBA_GRANULE_BITS (8 * sizeof(uint64_t))
+#define ZBA_MAX_ORDER (PAGE_MAX_SHIFT - 4)
+#define ZBA_MAX_ALLOC_ORDER 7
+#define ZBA_SLOTS (ZBA_CHUNK_SIZE / ZBA_GRANULE)
+static_assert(2ul * ZBA_GRANULE << ZBA_MAX_ORDER == ZBA_CHUNK_SIZE, "chunk sizes");
+static_assert(ZBA_MAX_ALLOC_ORDER <= ZBA_MAX_ORDER, "ZBA_MAX_ORDER is enough");
+
+struct zone_bits_chain {
+ uint32_t zbc_next;
+ uint32_t zbc_prev;
+} __attribute__((aligned(ZBA_GRANULE)));
+
+struct zone_bits_head {
+ uint32_t zbh_next;
+ uint32_t zbh_unused;
+} __attribute__((aligned(ZBA_GRANULE)));
+
+static_assert(sizeof(struct zone_bits_chain) == ZBA_GRANULE, "zbc size");
+static_assert(sizeof(struct zone_bits_head) == ZBA_GRANULE, "zbh size");
+
+struct zone_bits_allocator_meta {
+ uint32_t zbam_chunks;
+ uint32_t __zbam_padding;
+ struct zone_bits_head zbam_lists[ZBA_MAX_ORDER + 1];
+};
-#define ZTAGBASE(zone, element) \
- (&((uint32_t *)zone_tagbase_min)[atop((element) - zone_info.zi_map_range.min_address)])
+struct zone_bits_allocator_header {
+ uint64_t zbah_bits[ZBA_SLOTS / (8 * sizeof(uint64_t))];
+};
-// pointer to the tag for an element
-#define ZTAG(zone, element) \
- ({ \
- vm_tag_t * result; \
- if ((zone)->tags_inline) { \
- result = (vm_tag_t *) ZTAGBASE((zone), (element)); \
- if ((page_mask & element) >= zone_elem_size(zone)) result++; \
- } else { \
- result = &((vm_tag_t *)zone_tags_min)[ZTAGBASE((zone), (element))[0] + ((element) & page_mask) / zone_elem_size((zone))]; \
- } \
- result; \
- })
+#if ZALLOC_TEST
+static struct zalloc_bits_allocator_test_setup {
+ vm_offset_t zbats_base;
+ void (*zbats_populate)(vm_address_t addr, vm_size_t size);
+} zba_test_info;
+static struct zone_bits_allocator_header *
+zba_base_header(void)
+{
+ return (struct zone_bits_allocator_header *)zba_test_info.zbats_base;
+}
-static vm_offset_t zone_tagbase_min;
-static vm_offset_t zone_tagbase_max;
-static vm_offset_t zone_tagbase_map_size;
-static vm_map_t zone_tagbase_map;
+static void
+zba_populate(uint32_t n)
+{
+ vm_address_t base = zba_test_info.zbats_base;
+ zba_test_info.zbats_populate(base + n * ZBA_CHUNK_SIZE, ZBA_CHUNK_SIZE);
+}
+#else
+__startup_data
+static uint8_t zba_chunk_startup[ZBA_CHUNK_SIZE]
+__attribute__((aligned(ZBA_CHUNK_SIZE)));
+static LCK_MTX_EARLY_DECLARE(zba_mtx, &zone_locks_grp);
-static vm_offset_t zone_tags_min;
-static vm_offset_t zone_tags_max;
-static vm_offset_t zone_tags_map_size;
-static vm_map_t zone_tags_map;
+static struct zone_bits_allocator_header *
+zba_base_header(void)
+{
+ return (struct zone_bits_allocator_header *)zone_info.zi_bits_range.min_address;
+}
-// simple heap allocator for allocating the tags for new memory
+static void
+zba_lock(void)
+{
+ lck_mtx_lock(&zba_mtx);
+}
-LCK_MTX_EARLY_DECLARE(ztLock, &zone_locks_grp); /* heap lock */
+static void
+zba_unlock(void)
+{
+ lck_mtx_unlock(&zba_mtx);
+}
-enum{
- ztFreeIndexCount = 8,
- ztFreeIndexMax = (ztFreeIndexCount - 1),
- ztTagsPerBlock = 4
-};
+static void
+zba_populate(uint32_t n)
+{
+ vm_size_t size = ZBA_CHUNK_SIZE;
+ vm_address_t addr;
-struct ztBlock {
-#if __LITTLE_ENDIAN__
- uint64_t free:1,
- next:21,
- prev:21,
- size:21;
-#else
-// ztBlock needs free bit least significant
-#error !__LITTLE_ENDIAN__
+ addr = zone_info.zi_bits_range.min_address + n * size;
+ if (addr >= zone_info.zi_bits_range.max_address) {
+ zone_t z = zone_find_largest();
+ panic("zba_populate: out of bitmap space, "
+ "likely due to memory leak in zone [%s%s] "
+ "(%luM, %d elements allocated)",
+ zone_heap_name(z), zone_name(z),
+ (unsigned long)zone_size_wired(z) >> 20,
+ zone_count_allocated(z));
+ }
+
+ for (;;) {
+ kern_return_t kr = KERN_SUCCESS;
+
+ if (0 == pmap_find_phys(kernel_pmap, addr)) {
+ kr = kernel_memory_populate(kernel_map, addr, size,
+ KMA_NOPAGEWAIT | KMA_KOBJECT | KMA_ZERO,
+ VM_KERN_MEMORY_OSFMK);
+ }
+
+ if (kr == KERN_SUCCESS) {
+ return;
+ }
+
+ zba_unlock();
+ VM_PAGE_WAIT();
+ zba_lock();
+ }
+}
#endif
-};
-typedef struct ztBlock ztBlock;
-static ztBlock * ztBlocks;
-static uint32_t ztBlocksCount;
-static uint32_t ztBlocksFree;
+__pure2
+static struct zone_bits_allocator_meta *
+zba_meta(void)
+{
+ return (struct zone_bits_allocator_meta *)&zba_base_header()[1];
+}
+
+__pure2
+static uint64_t *
+zba_slot_base(void)
+{
+ return (uint64_t *)zba_base_header();
+}
+
+__pure2
+static vm_address_t
+zba_page_addr(uint32_t n)
+{
+ return (vm_address_t)zba_base_header() + n * ZBA_CHUNK_SIZE;
+}
+
+__pure2
+static struct zone_bits_head *
+zba_head(uint32_t order)
+{
+ return &zba_meta()->zbam_lists[order];
+}
+__pure2
static uint32_t
-ztLog2up(uint32_t size)
+zba_head_index(uint32_t order)
{
- if (1 == size) {
- size = 0;
- } else {
- size = 32 - __builtin_clz(size - 1);
- }
- return size;
+ uint32_t hdr_size = sizeof(struct zone_bits_allocator_header) +
+ offsetof(struct zone_bits_allocator_meta, zbam_lists);
+ return (hdr_size / ZBA_GRANULE) + order;
}
+__pure2
+static struct zone_bits_chain *
+zba_chain_for_index(uint32_t index)
+{
+ return (struct zone_bits_chain *)(zba_slot_base() + index);
+}
+
+__pure2
static uint32_t
-ztLog2down(uint32_t size)
+zba_chain_to_index(const struct zone_bits_chain *zbc)
{
- size = 31 - __builtin_clz(size);
- return size;
+ return (uint32_t)((const uint64_t *)zbc - zba_slot_base());
}
+__abortlike
static void
-ztFault(vm_map_t map, const void * address, size_t size, uint32_t flags)
+zba_head_corruption_panic(uint32_t order)
{
- vm_map_offset_t addr = (vm_map_offset_t) address;
- vm_map_offset_t page, end;
+ panic("zone bits allocator head[%d:%p] is corrupt", order,
+ zba_head(order));
+}
- page = trunc_page(addr);
- end = round_page(addr + size);
+__abortlike
+static void
+zba_chain_corruption_panic(struct zone_bits_chain *a, struct zone_bits_chain *b)
+{
+ panic("zone bits allocator freelist is corrupt (%p <-> %p)", a, b);
+}
- for (; page < end; page += page_size) {
- if (!pmap_find_phys(kernel_pmap, page)) {
- kern_return_t __unused
- ret = kernel_memory_populate(map, page, PAGE_SIZE,
- KMA_KOBJECT | flags, VM_KERN_MEMORY_DIAG);
- assert(ret == KERN_SUCCESS);
+static void
+zba_push_block(struct zone_bits_chain *zbc, uint32_t order)
+{
+ struct zone_bits_head *hd = zba_head(order);
+ uint32_t hd_index = zba_head_index(order);
+ uint32_t index = zba_chain_to_index(zbc);
+ struct zone_bits_chain *next;
+
+ if (hd->zbh_next) {
+ next = zba_chain_for_index(hd->zbh_next);
+ if (next->zbc_prev != hd_index) {
+ zba_head_corruption_panic(order);
}
+ next->zbc_prev = index;
}
+ zbc->zbc_next = hd->zbh_next;
+ zbc->zbc_prev = hd_index;
+ hd->zbh_next = index;
}
-static boolean_t
-ztPresent(const void * address, size_t size)
+static void
+zba_remove_block(struct zone_bits_chain *zbc)
{
- vm_map_offset_t addr = (vm_map_offset_t) address;
- vm_map_offset_t page, end;
- boolean_t result;
+ struct zone_bits_chain *prev = zba_chain_for_index(zbc->zbc_prev);
+ uint32_t index = zba_chain_to_index(zbc);
- page = trunc_page(addr);
- end = round_page(addr + size);
- for (result = TRUE; (page < end); page += page_size) {
- result = pmap_find_phys(kernel_pmap, page);
- if (!result) {
- break;
+ if (prev->zbc_next != index) {
+ zba_chain_corruption_panic(prev, zbc);
+ }
+ if ((prev->zbc_next = zbc->zbc_next)) {
+ struct zone_bits_chain *next = zba_chain_for_index(zbc->zbc_next);
+ if (next->zbc_prev != index) {
+ zba_chain_corruption_panic(zbc, next);
}
+ next->zbc_prev = zbc->zbc_prev;
}
- return result;
}
-
-void __unused
-ztDump(boolean_t sanity);
-void __unused
-ztDump(boolean_t sanity)
+static vm_address_t
+zba_try_pop_block(uint32_t order)
{
- uint32_t q, cq, p;
+ struct zone_bits_head *hd = zba_head(order);
+ struct zone_bits_chain *zbc;
- for (q = 0; q <= ztFreeIndexMax; q++) {
- p = q;
- do{
- if (sanity) {
- cq = ztLog2down(ztBlocks[p].size);
- if (cq > ztFreeIndexMax) {
- cq = ztFreeIndexMax;
- }
- if (!ztBlocks[p].free
- || ((p != q) && (q != cq))
- || (ztBlocks[ztBlocks[p].next].prev != p)
- || (ztBlocks[ztBlocks[p].prev].next != p)) {
- kprintf("zterror at %d", p);
- ztDump(FALSE);
- kprintf("zterror at %d", p);
- assert(FALSE);
- }
- continue;
- }
- kprintf("zt[%03d]%c %d, %d, %d\n",
- p, ztBlocks[p].free ? 'F' : 'A',
- ztBlocks[p].next, ztBlocks[p].prev,
- ztBlocks[p].size);
- p = ztBlocks[p].next;
- if (p == q) {
- break;
- }
- }while (p != q);
- if (!sanity) {
- printf("\n");
- }
- }
- if (!sanity) {
- printf("-----------------------\n");
+ if (hd->zbh_next == 0) {
+ return 0;
}
+
+ zbc = zba_chain_for_index(hd->zbh_next);
+ zba_remove_block(zbc);
+ return (vm_address_t)zbc;
}
+static struct zone_bits_allocator_header *
+zba_header(vm_offset_t addr)
+{
+ addr &= -(vm_offset_t)ZBA_CHUNK_SIZE;
+ return (struct zone_bits_allocator_header *)addr;
+}
+static size_t
+zba_node_parent(size_t node)
+{
+ return (node - 1) / 2;
+}
-#define ZTBDEQ(idx) \
- ztBlocks[ztBlocks[(idx)].prev].next = ztBlocks[(idx)].next; \
- ztBlocks[ztBlocks[(idx)].next].prev = ztBlocks[(idx)].prev;
+static size_t
+zba_node_left_child(size_t node)
+{
+ return node * 2 + 1;
+}
-static void
-ztFree(zone_t zone __unused, uint32_t index, uint32_t count)
+static size_t
+zba_node_buddy(size_t node)
{
- uint32_t q, w, p, size, merge;
+ return ((node - 1) ^ 1) + 1;
+}
- assert(count);
- ztBlocksFree += count;
+static size_t
+zba_node(vm_offset_t addr, uint32_t order)
+{
+ vm_offset_t offs = (addr % ZBA_CHUNK_SIZE) / ZBA_GRANULE;
+ return (offs >> order) + (1 << (ZBA_MAX_ORDER - order + 1)) - 1;
+}
- // merge with preceding
- merge = (index + count);
- if ((merge < ztBlocksCount)
- && ztPresent(&ztBlocks[merge], sizeof(ztBlocks[merge]))
- && ztBlocks[merge].free) {
- ZTBDEQ(merge);
- count += ztBlocks[merge].size;
- }
-
- // merge with following
- merge = (index - 1);
- if ((merge > ztFreeIndexMax)
- && ztPresent(&ztBlocks[merge], sizeof(ztBlocks[merge]))
- && ztBlocks[merge].free) {
- size = ztBlocks[merge].size;
- count += size;
- index -= size;
- ZTBDEQ(index);
- }
-
- q = ztLog2down(count);
- if (q > ztFreeIndexMax) {
- q = ztFreeIndexMax;
- }
- w = q;
- // queue in order of size
- while (TRUE) {
- p = ztBlocks[w].next;
- if (p == q) {
- break;
- }
- if (ztBlocks[p].size >= count) {
- break;
- }
- w = p;
- }
- ztBlocks[p].prev = index;
- ztBlocks[w].next = index;
-
- // fault in first
- ztFault(zone_tags_map, &ztBlocks[index], sizeof(ztBlocks[index]), 0);
-
- // mark first & last with free flag and size
- ztBlocks[index].free = TRUE;
- ztBlocks[index].size = count;
- ztBlocks[index].prev = w;
- ztBlocks[index].next = p;
- if (count > 1) {
- index += (count - 1);
- // fault in last
- ztFault(zone_tags_map, &ztBlocks[index], sizeof(ztBlocks[index]), 0);
- ztBlocks[index].free = TRUE;
- ztBlocks[index].size = count;
- }
+static struct zone_bits_chain *
+zba_chain_for_node(struct zone_bits_allocator_header *zbah, size_t node, uint32_t order)
+{
+ vm_offset_t offs = (node - (1 << (ZBA_MAX_ORDER - order + 1)) + 1) << order;
+ return (struct zone_bits_chain *)((vm_offset_t)zbah + offs * ZBA_GRANULE);
}
-static uint32_t
-ztAlloc(zone_t zone, uint32_t count)
+static void
+zba_node_flip_split(struct zone_bits_allocator_header *zbah, size_t node)
{
- uint32_t q, w, p, leftover;
-
- assert(count);
-
- q = ztLog2up(count);
- if (q > ztFreeIndexMax) {
- q = ztFreeIndexMax;
- }
- do{
- w = q;
- while (TRUE) {
- p = ztBlocks[w].next;
- if (p == q) {
- break;
- }
- if (ztBlocks[p].size >= count) {
- // dequeue, mark both ends allocated
- ztBlocks[w].next = ztBlocks[p].next;
- ztBlocks[ztBlocks[p].next].prev = w;
- ztBlocks[p].free = FALSE;
- ztBlocksFree -= ztBlocks[p].size;
- if (ztBlocks[p].size > 1) {
- ztBlocks[p + ztBlocks[p].size - 1].free = FALSE;
- }
-
- // fault all the allocation
- ztFault(zone_tags_map, &ztBlocks[p], count * sizeof(ztBlocks[p]), 0);
- // mark last as allocated
- if (count > 1) {
- ztBlocks[p + count - 1].free = FALSE;
- }
- // free remainder
- leftover = ztBlocks[p].size - count;
- if (leftover) {
- ztFree(zone, p + ztBlocks[p].size - leftover, leftover);
- }
-
- return p;
- }
- w = p;
- }
- q++;
- }while (q <= ztFreeIndexMax);
+ zbah->zbah_bits[node / 64] ^= 1ull << (node % 64);
+}
- return -1U;
+static bool
+zba_node_is_split(struct zone_bits_allocator_header *zbah, size_t node)
+{
+ return zbah->zbah_bits[node / 64] & (1ull << (node % 64));
}
-__startup_func
static void
-zone_tagging_init(vm_size_t max_zonemap_size)
+zba_free(vm_offset_t addr, uint32_t order)
{
- kern_return_t ret;
- vm_map_kernel_flags_t vmk_flags;
- uint32_t idx;
-
- // allocate submaps VM_KERN_MEMORY_DIAG
-
- zone_tagbase_map_size = atop(max_zonemap_size) * sizeof(uint32_t);
- vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
- vmk_flags.vmkf_permanent = TRUE;
- ret = kmem_suballoc(kernel_map, &zone_tagbase_min, zone_tagbase_map_size,
- FALSE, VM_FLAGS_ANYWHERE, vmk_flags, VM_KERN_MEMORY_DIAG,
- &zone_tagbase_map);
+ struct zone_bits_allocator_header *zbah = zba_header(addr);
+ struct zone_bits_chain *zbc;
+ size_t node = zba_node(addr, order);
- if (ret != KERN_SUCCESS) {
- panic("zone_init: kmem_suballoc failed");
- }
- zone_tagbase_max = zone_tagbase_min + round_page(zone_tagbase_map_size);
+ while (node) {
+ size_t parent = zba_node_parent(node);
- zone_tags_map_size = 2048 * 1024 * sizeof(vm_tag_t);
- vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
- vmk_flags.vmkf_permanent = TRUE;
- ret = kmem_suballoc(kernel_map, &zone_tags_min, zone_tags_map_size,
- FALSE, VM_FLAGS_ANYWHERE, vmk_flags, VM_KERN_MEMORY_DIAG,
- &zone_tags_map);
+ zba_node_flip_split(zbah, parent);
+ if (zba_node_is_split(zbah, parent)) {
+ break;
+ }
- if (ret != KERN_SUCCESS) {
- panic("zone_init: kmem_suballoc failed");
+ zbc = zba_chain_for_node(zbah, zba_node_buddy(node), order);
+ zba_remove_block(zbc);
+ order++;
+ node = parent;
}
- zone_tags_max = zone_tags_min + round_page(zone_tags_map_size);
-
- ztBlocks = (ztBlock *) zone_tags_min;
- ztBlocksCount = (uint32_t)(zone_tags_map_size / sizeof(ztBlock));
- // initialize the qheads
- lck_mtx_lock(&ztLock);
+ zba_push_block(zba_chain_for_node(zbah, node, order), order);
+}
- ztFault(zone_tags_map, &ztBlocks[0], sizeof(ztBlocks[0]), 0);
- for (idx = 0; idx < ztFreeIndexCount; idx++) {
- ztBlocks[idx].free = TRUE;
- ztBlocks[idx].next = idx;
- ztBlocks[idx].prev = idx;
- ztBlocks[idx].size = 0;
+static vm_size_t
+zba_chunk_header_size(uint32_t n)
+{
+ vm_size_t hdr_size = sizeof(struct zone_bits_allocator_header);
+ if (n == 0) {
+ hdr_size += sizeof(struct zone_bits_allocator_meta);
}
- // free remaining space
- ztFree(NULL, ztFreeIndexCount, ztBlocksCount - ztFreeIndexCount);
-
- lck_mtx_unlock(&ztLock);
+ return hdr_size;
}
static void
-ztMemoryAdd(zone_t zone, vm_offset_t mem, vm_size_t size)
+zba_init_chunk(uint32_t n)
{
- uint32_t * tagbase;
- uint32_t count, block, blocks, idx;
- size_t pages;
-
- pages = atop(size);
- tagbase = ZTAGBASE(zone, mem);
-
- lck_mtx_lock(&ztLock);
+ vm_size_t hdr_size = zba_chunk_header_size(n);
+ vm_offset_t page = zba_page_addr(n);
+ struct zone_bits_allocator_header *zbah = zba_header(page);
+ vm_size_t size = ZBA_CHUNK_SIZE;
+ size_t node;
- // fault tagbase
- ztFault(zone_tagbase_map, tagbase, pages * sizeof(uint32_t), 0);
-
- if (!zone->tags_inline) {
- // allocate tags
- count = (uint32_t)(size / zone_elem_size(zone));
- blocks = ((count + ztTagsPerBlock - 1) / ztTagsPerBlock);
- block = ztAlloc(zone, blocks);
- if (-1U == block) {
- ztDump(false);
+ for (uint32_t o = ZBA_MAX_ORDER + 1; o-- > 0;) {
+ if (size < hdr_size + (ZBA_GRANULE << o)) {
+ continue;
}
- assert(-1U != block);
+ size -= ZBA_GRANULE << o;
+ node = zba_node(page + size, o);
+ zba_node_flip_split(zbah, zba_node_parent(node));
+ zba_push_block(zba_chain_for_node(zbah, node, o), o);
}
- lck_mtx_unlock(&ztLock);
-
- if (!zone->tags_inline) {
- // set tag base for each page
- block *= ztTagsPerBlock;
- for (idx = 0; idx < pages; idx++) {
- vm_offset_t esize = zone_elem_size(zone);
- tagbase[idx] = block + (uint32_t)((ptoa(idx) + esize - 1) / esize);
- }
- }
+ zba_meta()->zbam_chunks = n + 1;
}
+__attribute__((noinline))
static void
-ztMemoryRemove(zone_t zone, vm_offset_t mem, vm_size_t size)
+zba_grow(void)
{
- uint32_t * tagbase;
- uint32_t count, block, blocks, idx;
- size_t pages;
-
- // set tag base for each page
- pages = atop(size);
- tagbase = ZTAGBASE(zone, mem);
- block = tagbase[0];
- for (idx = 0; idx < pages; idx++) {
- tagbase[idx] = 0xFFFFFFFF;
- }
+ uint32_t chunk = zba_meta()->zbam_chunks;
- lck_mtx_lock(&ztLock);
- if (!zone->tags_inline) {
- count = (uint32_t)(size / zone_elem_size(zone));
- blocks = ((count + ztTagsPerBlock - 1) / ztTagsPerBlock);
- assert(block != 0xFFFFFFFF);
- block /= ztTagsPerBlock;
- ztFree(NULL /* zone is unlocked */, block, blocks);
+ zba_populate(chunk);
+ if (zba_meta()->zbam_chunks == chunk) {
+ zba_init_chunk(chunk);
}
-
- lck_mtx_unlock(&ztLock);
}
-uint32_t
-zone_index_from_tag_index(uint32_t tag_zone_index, vm_size_t * elem_size)
+static vm_offset_t
+zba_alloc(uint32_t order)
{
- simple_lock(&all_zones_lock, &zone_locks_grp);
+ struct zone_bits_allocator_header *zbah;
+ uint32_t cur = order;
+ vm_address_t addr;
+ size_t node;
- zone_index_foreach(idx) {
- zone_t z = &zone_array[idx];
- if (!z->tags) {
- continue;
- }
- if (tag_zone_index != z->tag_zone_index) {
- continue;
+ while ((addr = zba_try_pop_block(cur)) == 0) {
+ if (cur++ >= ZBA_MAX_ORDER) {
+ zba_grow();
+ cur = order;
}
-
- *elem_size = zone_elem_size(z);
- simple_unlock(&all_zones_lock);
- return idx;
}
- simple_unlock(&all_zones_lock);
+ zbah = zba_header(addr);
+ node = zba_node(addr, cur);
+ zba_node_flip_split(zbah, zba_node_parent(node));
+ while (cur > order) {
+ cur--;
+ zba_node_flip_split(zbah, node);
+ node = zba_node_left_child(node);
+ zba_push_block(zba_chain_for_node(zbah, node + 1, cur), cur);
+ }
- return -1U;
+ return addr;
}
-#endif /* VM_MAX_TAG_ZONES */
-#pragma mark zalloc helpers
+#define zba_map_index(type, n) (n / (8 * sizeof(type)))
+#define zba_map_bit(type, n) ((type)1 << (n % (8 * sizeof(type))))
+#define zba_map_mask_lt(type, n) (zba_map_bit(type, n) - 1)
+#define zba_map_mask_ge(type, n) ((type)-zba_map_bit(type, n))
-const char *
-zone_name(zone_t z)
+#if !ZALLOC_TEST
+static uint32_t
+zba_bits_ref_order(uint32_t bref)
{
- return z->z_name;
+ return bref & 0x7;
}
-const char *
-zone_heap_name(zone_t z)
+static bitmap_t *
+zba_bits_ref_ptr(uint32_t bref)
{
- if (__probable(z->kalloc_heap < KHEAP_ID_COUNT)) {
- return kalloc_heap_names[z->kalloc_heap];
- }
- return "invalid";
+ return zba_slot_base() + (bref >> 3);
}
-static inline vm_size_t
-zone_submaps_approx_size(void)
+static vm_offset_t
+zba_scan_bitmap_inline(zone_t zone, struct zone_page_metadata *meta,
+ vm_offset_t eidx)
{
- vm_size_t size = 0;
+ size_t i = eidx / 32;
+ uint32_t map;
- for (unsigned idx = 0; idx <= zone_last_submap_idx; idx++) {
- size += zone_submaps[idx]->size;
+ if (eidx % 32) {
+ map = meta[i].zm_bitmap & zba_map_mask_ge(uint32_t, eidx);
+ if (map) {
+ eidx = __builtin_ctz(map);
+ meta[i].zm_bitmap ^= 1u << eidx;
+ return i * 32 + eidx;
+ }
+ i++;
}
- return size;
-}
+ uint32_t chunk_len = meta->zm_chunk_len;
+ if (chunk_len == 1 && zone->z_percpu) {
+ chunk_len = zpercpu_count();
+ }
+ for (int j = 0; j < chunk_len; j++, i++) {
+ if (i >= chunk_len) {
+ i = 0;
+ }
+ if (__probable(map = meta[i].zm_bitmap)) {
+ meta[i].zm_bitmap &= map - 1;
+ return i * 32 + __builtin_ctz(map);
+ }
+ }
-bool
-zone_maps_owned(vm_address_t addr, vm_size_t size)
-{
- return from_zone_map(addr, size);
+ zone_page_meta_accounting_panic(zone, meta, "zm_bitmap");
}
-void
-zone_map_sizes(
- vm_map_size_t *psize,
- vm_map_size_t *pfree,
- vm_map_size_t *plargest_free)
+static vm_offset_t
+zba_scan_bitmap_ref(zone_t zone, struct zone_page_metadata *meta,
+ vm_offset_t eidx)
{
- vm_map_sizes(zone_submaps[Z_SUBMAP_IDX_GENERAL_MAP], psize, pfree, plargest_free);
-}
+ uint32_t bits_size = 1 << zba_bits_ref_order(meta->zm_bitmap);
+ bitmap_t *bits = zba_bits_ref_ptr(meta->zm_bitmap);
+ size_t i = eidx / 64;
+ uint64_t map;
-vm_map_t
-zone_submap(zone_t zone)
-{
- return submap_for_zone(zone);
+ if (eidx % 64) {
+ map = bits[i] & zba_map_mask_ge(uint64_t, eidx);
+ if (map) {
+ eidx = __builtin_ctzll(map);
+ bits[i] ^= 1ull << eidx;
+ return i * 64 + eidx;
+ }
+ i++;
+ }
+
+ for (int j = 0; j < bits_size; i++, j++) {
+ if (i >= bits_size) {
+ i = 0;
+ }
+ if (__probable(map = bits[i])) {
+ bits[i] &= map - 1;
+ return i * 64 + __builtin_ctzll(map);
+ }
+ }
+
+ zone_page_meta_accounting_panic(zone, meta, "zm_bitmap");
}
-unsigned
-zpercpu_count(void)
+/*!
+ * @function zone_meta_find_and_clear_bit
+ *
+ * @brief
+ * The core of the bitmap allocator: find a bit set in the bitmaps.
+ *
+ * @discussion
+ * This method will round robin through available allocations,
+ * with a per-core memory of the last allocated element index allocated.
+ *
+ * This is done in order to avoid a fully LIFO behavior which makes exploiting
+ * double-free bugs way too practical.
+ *
+ * @param zone The zone we're allocating from.
+ * @param meta The main metadata for the chunk being allocated from.
+ */
+static vm_offset_t
+zone_meta_find_and_clear_bit(zone_t zone, struct zone_page_metadata *meta)
{
- return zpercpu_early_count;
+ zone_stats_t zs = zpercpu_get(zone->z_stats);
+ vm_offset_t eidx = zs->zs_alloc_rr + 1;
+
+ if (meta->zm_inline_bitmap) {
+ eidx = zba_scan_bitmap_inline(zone, meta, eidx);
+ } else {
+ eidx = zba_scan_bitmap_ref(zone, meta, eidx);
+ }
+ zs->zs_alloc_rr = (uint16_t)eidx;
+ return eidx;
}
-int
-track_this_zone(const char *zonename, const char *logname)
+/*!
+ * @function zone_meta_bits_init
+ *
+ * @brief
+ * Initializes the zm_bitmap field(s) for a newly assigned chunk.
+ *
+ * @param meta The main metadata for the initialized chunk.
+ * @param count The number of elements the chunk can hold
+ * (which might be partial for partially populated chunks).
+ * @param nbits The maximum nuber of bits that will be used.
+ */
+static void
+zone_meta_bits_init(struct zone_page_metadata *meta,
+ uint32_t count, uint32_t nbits)
{
- unsigned int len;
- const char *zc = zonename;
- const char *lc = logname;
-
- /*
- * Compare the strings. We bound the compare by MAX_ZONE_NAME.
- */
+ static_assert(ZONE_MAX_ALLOC_SIZE / ZONE_MIN_ELEM_SIZE <=
+ ZBA_GRANULE_BITS << ZBA_MAX_ORDER, "bitmaps will be large enough");
- for (len = 1; len <= MAX_ZONE_NAME; zc++, lc++, len++) {
+ if (meta->zm_inline_bitmap) {
/*
- * If the current characters don't match, check for a space in
- * in the zone name and a corresponding period in the log name.
- * If that's not there, then the strings don't match.
+ * We're called with the metadata zm_bitmap fields already
+ * zeroed out.
*/
-
- if (*zc != *lc && !(*zc == ' ' && *lc == '.')) {
- break;
+ for (size_t i = 0; 32 * i < count; i++) {
+ if (32 * i + 32 <= count) {
+ meta[i].zm_bitmap = ~0u;
+ } else {
+ meta[i].zm_bitmap = zba_map_mask_lt(uint32_t, count);
+ }
}
+ } else {
+ uint32_t order = flsll((nbits - 1) / ZBA_GRANULE_BITS);
+ uint64_t *bits;
- /*
- * The strings are equal so far. If we're at the end, then it's a match.
- */
+ assert(order <= ZBA_MAX_ALLOC_ORDER);
+ assert(count <= ZBA_GRANULE_BITS << order);
- if (*zc == '\0') {
- return TRUE;
+ zba_lock();
+ bits = (uint64_t *)zba_alloc(order);
+ zba_unlock();
+
+ for (size_t i = 0; i < 1u << order; i++) {
+ if (64 * i + 64 <= count) {
+ bits[i] = ~0ull;
+ } else if (64 * i < count) {
+ bits[i] = zba_map_mask_lt(uint64_t, count);
+ } else {
+ bits[i] = 0ull;
+ }
}
- }
- return FALSE;
+ meta->zm_bitmap = (uint32_t)((vm_offset_t)bits -
+ (vm_offset_t)zba_slot_base()) + order;
+ }
}
-#if DEBUG || DEVELOPMENT
-
-vm_size_t
-zone_element_info(void *addr, vm_tag_t * ptag)
+/*!
+ * @function zone_meta_bits_merge
+ *
+ * @brief
+ * Adds elements <code>[start, end)</code> to a chunk being extended.
+ *
+ * @param meta The main metadata for the extended chunk.
+ * @param start The index of the first element to add to the chunk.
+ * @param end The index of the last (exclusive) element to add.
+ */
+static void
+zone_meta_bits_merge(struct zone_page_metadata *meta,
+ uint32_t start, uint32_t end)
{
- vm_size_t size = 0;
- vm_tag_t tag = VM_KERN_MEMORY_NONE;
- struct zone_page_metadata *meta;
- struct zone *src_zone;
+ if (meta->zm_inline_bitmap) {
+ while (start < end) {
+ size_t s_i = start / 32;
+ size_t s_e = end / 32;
- if (from_zone_map(addr, sizeof(void *))) {
- meta = zone_native_meta_from_addr(addr);
- src_zone = &zone_array[meta->zm_index];
-#if VM_MAX_TAG_ZONES
- if (__improbable(src_zone->tags)) {
- tag = (ZTAG(src_zone, (vm_offset_t) addr)[0] >> 1);
+ if (s_i == s_e) {
+ meta[s_i].zm_bitmap |= zba_map_mask_lt(uint32_t, end) &
+ zba_map_mask_ge(uint32_t, start);
+ break;
+ }
+
+ meta[s_i].zm_bitmap |= zba_map_mask_ge(uint32_t, start);
+ start += 32 - (start % 32);
}
-#endif /* VM_MAX_TAG_ZONES */
- size = zone_elem_size(src_zone);
} else {
-#if CONFIG_GZALLOC
- gzalloc_element_size(addr, NULL, &size);
-#endif /* CONFIG_GZALLOC */
- }
- *ptag = tag;
- return size;
-}
+ uint64_t *bits = zba_bits_ref_ptr(meta->zm_bitmap);
-#endif /* DEBUG || DEVELOPMENT */
+ while (start < end) {
+ size_t s_i = start / 64;
+ size_t s_e = end / 64;
-/* Someone wrote to freed memory. */
-__abortlike
-static void
-zone_element_was_modified_panic(
- zone_t zone,
- vm_offset_t element,
- vm_offset_t found,
- vm_offset_t expected,
- vm_offset_t offset)
-{
- panic("a freed zone element has been modified in zone %s%s: "
- "expected %p but found %p, bits changed %p, "
- "at offset %d of %d in element %p, cookies %p %p",
- zone_heap_name(zone),
- zone->z_name,
- (void *) expected,
- (void *) found,
- (void *) (expected ^ found),
- (uint32_t) offset,
- (uint32_t) zone_elem_size(zone),
- (void *) element,
- (void *) zp_nopoison_cookie,
- (void *) zp_poisoned_cookie);
+ if (s_i == s_e) {
+ bits[s_i] |= zba_map_mask_lt(uint64_t, end) &
+ zba_map_mask_ge(uint64_t, start);
+ break;
+ }
+ bits[s_i] |= zba_map_mask_ge(uint64_t, start);
+ start += 64 - (start % 64);
+ }
+ }
}
-/* The backup pointer is stored in the last pointer-sized location in an element. */
-__header_always_inline vm_offset_t *
-get_backup_ptr(vm_size_t elem_size, vm_offset_t *element)
+/*!
+ * @function zone_bits_free
+ *
+ * @brief
+ * Frees a bitmap to the zone bitmap allocator.
+ *
+ * @param bref
+ * A bitmap reference set by @c zone_meta_bits_init() in a @c zm_bitmap field.
+ */
+static void
+zone_bits_free(uint32_t bref)
{
- return (vm_offset_t *)((vm_offset_t)element + elem_size - sizeof(vm_offset_t));
+ zba_lock();
+ zba_free((vm_offset_t)zba_bits_ref_ptr(bref), zba_bits_ref_order(bref));
+ zba_unlock();
}
-/*
- * The primary and backup pointers don't match.
- * Determine which one was likely the corrupted pointer, find out what it
- * probably should have been, and panic.
+/*!
+ * @function zone_meta_is_free
+ *
+ * @brief
+ * Returns whether a given element appears free.
*/
-__abortlike
-static void
-backup_ptr_mismatch_panic(
- zone_t zone,
- struct zone_page_metadata *page_meta,
- vm_offset_t page,
- vm_offset_t element)
-{
- vm_offset_t primary = *(vm_offset_t *)element;
- vm_offset_t backup = *get_backup_ptr(zone_elem_size(zone), &element);
- vm_offset_t likely_backup;
- vm_offset_t likely_primary;
- zone_addr_kind_t kind = zone_addr_kind(page, zone_elem_size(zone));
-
- likely_primary = primary ^ zp_nopoison_cookie;
- boolean_t sane_backup;
- boolean_t sane_primary = zone_page_meta_is_sane_element(zone, page_meta,
- page, likely_primary, kind);
- boolean_t element_was_poisoned = (backup & 0x1);
-
-#if defined(__LP64__)
- /* We can inspect the tag in the upper bits for additional confirmation */
- if ((backup & 0xFFFFFF0000000000) == 0xFACADE0000000000) {
- element_was_poisoned = TRUE;
- } else if ((backup & 0xFFFFFF0000000000) == 0xC0FFEE0000000000) {
- element_was_poisoned = FALSE;
- }
-#endif
-
- if (element_was_poisoned) {
- likely_backup = backup ^ zp_poisoned_cookie;
+static bool
+zone_meta_is_free(struct zone_page_metadata *meta, zone_element_t ze)
+{
+ vm_offset_t eidx = zone_element_idx(ze);
+ if (meta->zm_inline_bitmap) {
+ uint32_t bit = zba_map_bit(uint32_t, eidx);
+ return meta[zba_map_index(uint32_t, eidx)].zm_bitmap & bit;
} else {
- likely_backup = backup ^ zp_nopoison_cookie;
- }
- sane_backup = zone_page_meta_is_sane_element(zone, page_meta,
- page, likely_backup, kind);
-
- /* The primary is definitely the corrupted one */
- if (!sane_primary && sane_backup) {
- zone_element_was_modified_panic(zone, element, primary, (likely_backup ^ zp_nopoison_cookie), 0);
- }
-
- /* The backup is definitely the corrupted one */
- if (sane_primary && !sane_backup) {
- zone_element_was_modified_panic(zone, element, backup,
- (likely_primary ^ (element_was_poisoned ? zp_poisoned_cookie : zp_nopoison_cookie)),
- zone_elem_size(zone) - sizeof(vm_offset_t));
- }
-
- /*
- * Not sure which is the corrupted one.
- * It's less likely that the backup pointer was overwritten with
- * ( (sane address) ^ (valid cookie) ), so we'll guess that the
- * primary pointer has been overwritten with a sane but incorrect address.
- */
- if (sane_primary && sane_backup) {
- zone_element_was_modified_panic(zone, element, primary, (likely_backup ^ zp_nopoison_cookie), 0);
+ bitmap_t *bits = zba_bits_ref_ptr(meta->zm_bitmap);
+ uint64_t bit = zba_map_bit(uint64_t, eidx);
+ return bits[zba_map_index(uint64_t, eidx)] & bit;
}
-
- /* Neither are sane, so just guess. */
- zone_element_was_modified_panic(zone, element, primary, (likely_backup ^ zp_nopoison_cookie), 0);
}
-/*
- * zone_sequestered_page_get
- * z is locked
+/*!
+ * @function zone_meta_mark_free
+ *
+ * @brief
+ * Marks an element as free and returns whether it was marked as used.
*/
-static struct zone_page_metadata *
-zone_sequestered_page_get(zone_t z, vm_offset_t *page)
+static bool
+zone_meta_mark_free(struct zone_page_metadata *meta, zone_element_t ze)
{
- const zone_addr_kind_t kind = ZONE_ADDR_NATIVE;
+ vm_offset_t eidx = zone_element_idx(ze);
- if (!zone_pva_is_null(z->pages_sequester)) {
- if (os_sub_overflow(z->sequester_page_count, z->alloc_pages,
- &z->sequester_page_count)) {
- zone_accounting_panic(z, "sequester_page_count wrap-around");
+ if (meta->zm_inline_bitmap) {
+ uint32_t bit = zba_map_bit(uint32_t, eidx);
+ if (meta[zba_map_index(uint32_t, eidx)].zm_bitmap & bit) {
+ return false;
+ }
+ meta[zba_map_index(uint32_t, eidx)].zm_bitmap ^= bit;
+ } else {
+ bitmap_t *bits = zba_bits_ref_ptr(meta->zm_bitmap);
+ uint64_t bit = zba_map_bit(uint64_t, eidx);
+ if (bits[zba_map_index(uint64_t, eidx)] & bit) {
+ return false;
}
- return zone_meta_queue_pop(z, &z->pages_sequester, kind, page);
+ bits[zba_map_index(uint64_t, eidx)] ^= bit;
}
-
- return NULL;
+ return true;
}
-/*
- * zone_sequestered_page_populate
- * z is unlocked
- * page_meta is invalid on failure
+/*!
+ * @function zone_meta_mark_used
+ *
+ * @brief
+ * Marks an element as used and returns whether it was marked as free
*/
-static kern_return_t
-zone_sequestered_page_populate(zone_t z, struct zone_page_metadata *page_meta,
- vm_offset_t space, vm_size_t alloc_size, int zflags)
+static bool
+zone_meta_mark_used(struct zone_page_metadata *meta, zone_element_t ze)
{
- kern_return_t retval;
+ vm_offset_t eidx = zone_element_idx(ze);
- assert(alloc_size == ptoa(z->alloc_pages));
- retval = kernel_memory_populate(submap_for_zone(z), space, alloc_size,
- zflags, VM_KERN_MEMORY_ZONE);
- if (retval != KERN_SUCCESS) {
- lock_zone(z);
- zone_meta_queue_push(z, &z->pages_sequester, page_meta, ZONE_ADDR_NATIVE);
- z->sequester_page_count += z->alloc_pages;
- unlock_zone(z);
+ if (meta->zm_inline_bitmap) {
+ uint32_t bit = zba_map_bit(uint32_t, eidx);
+ if (meta[zba_map_index(uint32_t, eidx)].zm_bitmap & bit) {
+ meta[zba_map_index(uint32_t, eidx)].zm_bitmap ^= bit;
+ return true;
+ }
+ } else {
+ bitmap_t *bits = zba_bits_ref_ptr(meta->zm_bitmap);
+ uint64_t bit = zba_map_bit(uint64_t, eidx);
+ if (bits[zba_map_index(uint64_t, eidx)] & bit) {
+ bits[zba_map_index(uint64_t, eidx)] ^= bit;
+ return true;
+ }
}
- return retval;
+ return false;
}
-#pragma mark Zone poisoning/zeroing
-
+#endif /* !ZALLOC_TEST */
+/*! @} */
+#pragma mark ZTAGS
+#if !ZALLOC_TEST
+#if VM_MAX_TAG_ZONES
/*
- * Initialize zone poisoning
- * called from zone_bootstrap before any allocations are made from zalloc
+ * Zone tagging allows for per "tag" accounting of allocations for the kalloc
+ * zones only.
+ *
+ * There are 3 kinds of tags that can be used:
+ * - pre-registered VM_KERN_MEMORY_*
+ * - dynamic tags allocated per call sites in core-kernel (using vm_tag_alloc())
+ * - per-kext tags computed by IOKit (using the magic VM_TAG_BT marker).
+ *
+ * The VM tracks the statistics in lazily allocated structures.
+ * See vm_tag_will_update_zone(), vm_tag_update_zone_size().
+ *
+ * If for some reason the requested tag cannot be accounted for,
+ * the tag is forced to VM_KERN_MEMORY_KALLOC which is pre-allocated.
+ *
+ * Each allocated element also remembers the tag it was assigned,
+ * in its ztSlot() which lets zalloc/zfree update statistics correctly.
*/
-__startup_func
-static void
-zp_bootstrap(void)
-{
- char temp_buf[16];
- /*
- * Initialize backup pointer random cookie for poisoned elements
- * Try not to call early_random() back to back, it may return
- * the same value if mach_absolute_time doesn't have sufficient time
- * to tick over between calls. <rdar://problem/11597395>
- * (This is only a problem on embedded devices)
- */
- zp_poisoned_cookie = (uintptr_t) early_random();
+// for zones with tagging enabled:
- /* -zp: enable poisoning for every alloc and free */
- if (PE_parse_boot_argn("-zp", temp_buf, sizeof(temp_buf))) {
- zp_factor = 1;
- }
+// calculate a pointer to the tag base entry,
+// holding either a uint32_t the first tag offset for a page in the zone map,
+// or two uint16_t tags if the page can only hold one or two elements
- /* -no-zp: disable poisoning */
- if (PE_parse_boot_argn("-no-zp", temp_buf, sizeof(temp_buf))) {
- zp_factor = 0;
- printf("Zone poisoning disabled\n");
- }
+#define ZTAGBASE(zone, element) \
+ (&((uint32_t *)zone_tagbase_min)[atop((element) - \
+ zone_info.zi_map_range[ZONE_ADDR_NATIVE].min_address)])
- /* Initialize backup pointer random cookie for unpoisoned elements */
- zp_nopoison_cookie = (uintptr_t) early_random();
+static vm_offset_t zone_tagbase_min;
+static vm_offset_t zone_tagbase_max;
+static vm_offset_t zone_tagbase_map_size;
+static vm_map_t zone_tagbase_map;
-#if MACH_ASSERT
- if (zp_poisoned_cookie == zp_nopoison_cookie) {
- panic("early_random() is broken: %p and %p are not random\n",
- (void *) zp_poisoned_cookie, (void *) zp_nopoison_cookie);
- }
-#endif
+static vm_offset_t zone_tags_min;
+static vm_offset_t zone_tags_max;
+static vm_offset_t zone_tags_map_size;
+static vm_map_t zone_tags_map;
- /*
- * Use the last bit in the backup pointer to hint poisoning state
- * to backup_ptr_mismatch_panic. Valid zone pointers are aligned, so
- * the low bits are zero.
- */
- zp_poisoned_cookie |= (uintptr_t)0x1ULL;
- zp_nopoison_cookie &= ~((uintptr_t)0x1ULL);
+// simple heap allocator for allocating the tags for new memory
-#if defined(__LP64__)
- /*
- * Make backup pointers more obvious in GDB for 64 bit
- * by making OxFFFFFF... ^ cookie = 0xFACADE...
- * (0xFACADE = 0xFFFFFF ^ 0x053521)
- * (0xC0FFEE = 0xFFFFFF ^ 0x3f0011)
- * The high 3 bytes of a zone pointer are always 0xFFFFFF, and are checked
- * by the sanity check, so it's OK for that part of the cookie to be predictable.
- *
- * TODO: Use #defines, xors, and shifts
- */
+static LCK_MTX_EARLY_DECLARE(ztLock, &zone_locks_grp); /* heap lock */
- zp_poisoned_cookie &= 0x000000FFFFFFFFFF;
- zp_poisoned_cookie |= 0x0535210000000000; /* 0xFACADE */
+enum{
+ ztFreeIndexCount = 8,
+ ztFreeIndexMax = (ztFreeIndexCount - 1),
+ ztTagsPerBlock = 4
+};
- zp_nopoison_cookie &= 0x000000FFFFFFFFFF;
- zp_nopoison_cookie |= 0x3f00110000000000; /* 0xC0FFEE */
+struct ztBlock {
+#if __LITTLE_ENDIAN__
+ uint64_t free:1,
+ next:21,
+ prev:21,
+ size:21;
+#else
+// ztBlock needs free bit least significant
+#error !__LITTLE_ENDIAN__
#endif
+};
+typedef struct ztBlock ztBlock;
- /*
- * Initialize zp_min_size to two cachelines. Elements smaller than this will
- * be zero-ed.
- */
- ml_cpu_info_t cpu_info;
- ml_cpu_get_info(&cpu_info);
- zp_min_size = 2 * cpu_info.cache_line_size;
-}
-
-inline uint32_t
-zone_poison_count_init(zone_t zone)
-{
- return zp_factor + (((uint32_t)zone_elem_size(zone)) >> zp_scale) ^
- (mach_absolute_time() & 0x7);
-}
+static ztBlock * ztBlocks;
+static uint32_t ztBlocksCount;
+static uint32_t ztBlocksFree;
-#if ZALLOC_ENABLE_POISONING
-static bool
-zfree_poison_element(zone_t zone, uint32_t *zp_count, vm_offset_t elem)
+static uint32_t
+ztLog2up(uint32_t size)
{
- bool poison = false;
- uint32_t zp_count_local;
-
- assert(!zone->percpu);
- if (zp_factor != 0) {
- /*
- * Poison the memory of every zp_count-th element before it ends up
- * on the freelist to catch use-after-free and use of uninitialized
- * memory.
- *
- * Every element is poisoned when zp_factor is set to 1.
- *
- */
- zp_count_local = os_atomic_load(zp_count, relaxed);
- if (__improbable(zp_count_local == 0 || zp_factor == 1)) {
- poison = true;
-
- os_atomic_store(zp_count, zone_poison_count_init(zone), relaxed);
-
- /* memset_pattern{4|8} could help make this faster: <rdar://problem/4662004> */
- vm_offset_t *element_cursor = ((vm_offset_t *) elem);
- vm_offset_t *end_cursor = (vm_offset_t *)(elem + zone_elem_size(zone));
-
- for (; element_cursor < end_cursor; element_cursor++) {
- *element_cursor = ZONE_POISON;
- }
- } else {
- os_atomic_store(zp_count, zp_count_local - 1, relaxed);
- /*
- * Zero first zp_min_size bytes of elements that aren't being poisoned.
- * Element size is larger than zp_min_size in this path as elements
- * that are smaller will always be zero-ed.
- */
- bzero((void *) elem, zp_min_size);
- }
+ if (1 == size) {
+ size = 0;
+ } else {
+ size = 32 - __builtin_clz(size - 1);
}
- return poison;
-}
-#else
-static bool
-zfree_poison_element(zone_t zone, uint32_t *zp_count, vm_offset_t elem)
-{
-#pragma unused(zone, zp_count, elem)
- assert(!zone->percpu);
- return false;
+ return size;
}
-#endif
-__attribute__((always_inline))
-static bool
-zfree_clear(zone_t zone, vm_offset_t addr, vm_size_t elem_size)
+// pointer to the tag for an element
+static vm_tag_t *
+ztSlot(zone_t zone, vm_offset_t element)
{
- assert(zone->zfree_clear_mem);
- if (zone->percpu) {
- zpercpu_foreach_cpu(i) {
- bzero((void *)(addr + ptoa(i)), elem_size);
+ vm_tag_t *result;
+ if (zone->tags_inline) {
+ result = (vm_tag_t *)ZTAGBASE(zone, element);
+ if ((PAGE_MASK & element) >= zone_elem_size(zone)) {
+ result++;
}
} else {
- bzero((void *)addr, elem_size);
+ result = &((vm_tag_t *)zone_tags_min)[ZTAGBASE(zone, element)[0] +
+ (element & PAGE_MASK) / zone_elem_size(zone)];
}
-
- return true;
+ return result;
}
-/*
- * Zero the element if zone has zfree_clear_mem flag set else poison
- * the element if zp_count hits 0.
- */
-__attribute__((always_inline))
-bool
-zfree_clear_or_poison(zone_t zone, uint32_t *zp_count, vm_offset_t addr)
+static uint32_t
+ztLog2down(uint32_t size)
{
- vm_size_t elem_size = zone_elem_size(zone);
-
- if (zone->zfree_clear_mem) {
- return zfree_clear(zone, addr, elem_size);
- }
-
- return zfree_poison_element(zone, zp_count, (vm_offset_t)addr);
+ size = 31 - __builtin_clz(size);
+ return size;
}
+static void
+ztFault(vm_map_t map, const void * address, size_t size, uint32_t flags)
+{
+ vm_map_offset_t addr = (vm_map_offset_t) address;
+ vm_map_offset_t page, end;
+
+ page = trunc_page(addr);
+ end = round_page(addr + size);
+
+ for (; page < end; page += page_size) {
+ if (!pmap_find_phys(kernel_pmap, page)) {
+ kern_return_t __unused
+ ret = kernel_memory_populate(map, page, PAGE_SIZE,
+ KMA_KOBJECT | flags, VM_KERN_MEMORY_DIAG);
+ assert(ret == KERN_SUCCESS);
+ }
+ }
+}
+
+static boolean_t
+ztPresent(const void * address, size_t size)
+{
+ vm_map_offset_t addr = (vm_map_offset_t) address;
+ vm_map_offset_t page, end;
+ boolean_t result;
+
+ page = trunc_page(addr);
+ end = round_page(addr + size);
+ for (result = TRUE; (page < end); page += page_size) {
+ result = pmap_find_phys(kernel_pmap, page);
+ if (!result) {
+ break;
+ }
+ }
+ return result;
+}
+
+
+void __unused
+ztDump(boolean_t sanity);
+void __unused
+ztDump(boolean_t sanity)
+{
+ uint32_t q, cq, p;
+
+ for (q = 0; q <= ztFreeIndexMax; q++) {
+ p = q;
+ do{
+ if (sanity) {
+ cq = ztLog2down(ztBlocks[p].size);
+ if (cq > ztFreeIndexMax) {
+ cq = ztFreeIndexMax;
+ }
+ if (!ztBlocks[p].free
+ || ((p != q) && (q != cq))
+ || (ztBlocks[ztBlocks[p].next].prev != p)
+ || (ztBlocks[ztBlocks[p].prev].next != p)) {
+ kprintf("zterror at %d", p);
+ ztDump(FALSE);
+ kprintf("zterror at %d", p);
+ assert(FALSE);
+ }
+ continue;
+ }
+ kprintf("zt[%03d]%c %d, %d, %d\n",
+ p, ztBlocks[p].free ? 'F' : 'A',
+ ztBlocks[p].next, ztBlocks[p].prev,
+ ztBlocks[p].size);
+ p = ztBlocks[p].next;
+ if (p == q) {
+ break;
+ }
+ }while (p != q);
+ if (!sanity) {
+ printf("\n");
+ }
+ }
+ if (!sanity) {
+ printf("-----------------------\n");
+ }
+}
+
+
+
+#define ZTBDEQ(idx) \
+ ztBlocks[ztBlocks[(idx)].prev].next = ztBlocks[(idx)].next; \
+ ztBlocks[ztBlocks[(idx)].next].prev = ztBlocks[(idx)].prev;
+
+static void
+ztFree(zone_t zone __unused, uint32_t index, uint32_t count)
+{
+ uint32_t q, w, p, size, merge;
+
+ assert(count);
+ ztBlocksFree += count;
+
+ // merge with preceding
+ merge = (index + count);
+ if ((merge < ztBlocksCount)
+ && ztPresent(&ztBlocks[merge], sizeof(ztBlocks[merge]))
+ && ztBlocks[merge].free) {
+ ZTBDEQ(merge);
+ count += ztBlocks[merge].size;
+ }
+
+ // merge with following
+ merge = (index - 1);
+ if ((merge > ztFreeIndexMax)
+ && ztPresent(&ztBlocks[merge], sizeof(ztBlocks[merge]))
+ && ztBlocks[merge].free) {
+ size = ztBlocks[merge].size;
+ count += size;
+ index -= size;
+ ZTBDEQ(index);
+ }
+
+ q = ztLog2down(count);
+ if (q > ztFreeIndexMax) {
+ q = ztFreeIndexMax;
+ }
+ w = q;
+ // queue in order of size
+ while (TRUE) {
+ p = ztBlocks[w].next;
+ if (p == q) {
+ break;
+ }
+ if (ztBlocks[p].size >= count) {
+ break;
+ }
+ w = p;
+ }
+ ztBlocks[p].prev = index;
+ ztBlocks[w].next = index;
+
+ // fault in first
+ ztFault(zone_tags_map, &ztBlocks[index], sizeof(ztBlocks[index]), 0);
+
+ // mark first & last with free flag and size
+ ztBlocks[index].free = TRUE;
+ ztBlocks[index].size = count;
+ ztBlocks[index].prev = w;
+ ztBlocks[index].next = p;
+ if (count > 1) {
+ index += (count - 1);
+ // fault in last
+ ztFault(zone_tags_map, &ztBlocks[index], sizeof(ztBlocks[index]), 0);
+ ztBlocks[index].free = TRUE;
+ ztBlocks[index].size = count;
+ }
+}
+
+static uint32_t
+ztAlloc(zone_t zone, uint32_t count)
+{
+ uint32_t q, w, p, leftover;
+
+ assert(count);
+
+ q = ztLog2up(count);
+ if (q > ztFreeIndexMax) {
+ q = ztFreeIndexMax;
+ }
+ do{
+ w = q;
+ while (TRUE) {
+ p = ztBlocks[w].next;
+ if (p == q) {
+ break;
+ }
+ if (ztBlocks[p].size >= count) {
+ // dequeue, mark both ends allocated
+ ztBlocks[w].next = ztBlocks[p].next;
+ ztBlocks[ztBlocks[p].next].prev = w;
+ ztBlocks[p].free = FALSE;
+ ztBlocksFree -= ztBlocks[p].size;
+ if (ztBlocks[p].size > 1) {
+ ztBlocks[p + ztBlocks[p].size - 1].free = FALSE;
+ }
+
+ // fault all the allocation
+ ztFault(zone_tags_map, &ztBlocks[p], count * sizeof(ztBlocks[p]), 0);
+ // mark last as allocated
+ if (count > 1) {
+ ztBlocks[p + count - 1].free = FALSE;
+ }
+ // free remainder
+ leftover = ztBlocks[p].size - count;
+ if (leftover) {
+ ztFree(zone, p + ztBlocks[p].size - leftover, leftover);
+ }
+
+ return p;
+ }
+ w = p;
+ }
+ q++;
+ }while (q <= ztFreeIndexMax);
+
+ return -1U;
+}
+
+__startup_func
+static void
+zone_tagging_init(vm_size_t max_zonemap_size)
+{
+ kern_return_t ret;
+ vm_map_kernel_flags_t vmk_flags;
+ uint32_t idx;
+
+ // allocate submaps VM_KERN_MEMORY_DIAG
+
+ zone_tagbase_map_size = atop(max_zonemap_size) * sizeof(uint32_t);
+ vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
+ vmk_flags.vmkf_permanent = TRUE;
+ ret = kmem_suballoc(kernel_map, &zone_tagbase_min, zone_tagbase_map_size,
+ FALSE, VM_FLAGS_ANYWHERE, vmk_flags, VM_KERN_MEMORY_DIAG,
+ &zone_tagbase_map);
+
+ if (ret != KERN_SUCCESS) {
+ panic("zone_init: kmem_suballoc failed");
+ }
+ zone_tagbase_max = zone_tagbase_min + round_page(zone_tagbase_map_size);
+
+ zone_tags_map_size = 2048 * 1024 * sizeof(vm_tag_t);
+ vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
+ vmk_flags.vmkf_permanent = TRUE;
+ ret = kmem_suballoc(kernel_map, &zone_tags_min, zone_tags_map_size,
+ FALSE, VM_FLAGS_ANYWHERE, vmk_flags, VM_KERN_MEMORY_DIAG,
+ &zone_tags_map);
+
+ if (ret != KERN_SUCCESS) {
+ panic("zone_init: kmem_suballoc failed");
+ }
+ zone_tags_max = zone_tags_min + round_page(zone_tags_map_size);
+
+ ztBlocks = (ztBlock *) zone_tags_min;
+ ztBlocksCount = (uint32_t)(zone_tags_map_size / sizeof(ztBlock));
+
+ // initialize the qheads
+ lck_mtx_lock(&ztLock);
+
+ ztFault(zone_tags_map, &ztBlocks[0], sizeof(ztBlocks[0]), 0);
+ for (idx = 0; idx < ztFreeIndexCount; idx++) {
+ ztBlocks[idx].free = TRUE;
+ ztBlocks[idx].next = idx;
+ ztBlocks[idx].prev = idx;
+ ztBlocks[idx].size = 0;
+ }
+ // free remaining space
+ ztFree(NULL, ztFreeIndexCount, ztBlocksCount - ztFreeIndexCount);
+
+ lck_mtx_unlock(&ztLock);
+}
+
+static void
+ztMemoryAdd(zone_t zone, vm_offset_t mem, vm_size_t size)
+{
+ uint32_t * tagbase;
+ uint32_t count, block, blocks, idx;
+ size_t pages;
+
+ pages = atop(size);
+ tagbase = ZTAGBASE(zone, mem);
+
+ lck_mtx_lock(&ztLock);
+
+ // fault tagbase
+ ztFault(zone_tagbase_map, tagbase, pages * sizeof(uint32_t), 0);
+
+ if (!zone->tags_inline) {
+ // allocate tags
+ count = (uint32_t)(size / zone_elem_size(zone));
+ blocks = ((count + ztTagsPerBlock - 1) / ztTagsPerBlock);
+ block = ztAlloc(zone, blocks);
+ if (-1U == block) {
+ ztDump(false);
+ }
+ assert(-1U != block);
+ }
+
+ lck_mtx_unlock(&ztLock);
+
+ if (!zone->tags_inline) {
+ // set tag base for each page
+ block *= ztTagsPerBlock;
+ for (idx = 0; idx < pages; idx++) {
+ vm_offset_t esize = zone_elem_size(zone);
+ tagbase[idx] = block + (uint32_t)((ptoa(idx) + esize - 1) / esize);
+ }
+ }
+}
+
+static void
+ztMemoryRemove(zone_t zone, vm_offset_t mem, vm_size_t size)
+{
+ uint32_t * tagbase;
+ uint32_t count, block, blocks, idx;
+ size_t pages;
+
+ // set tag base for each page
+ pages = atop(size);
+ tagbase = ZTAGBASE(zone, mem);
+ block = tagbase[0];
+ for (idx = 0; idx < pages; idx++) {
+ tagbase[idx] = 0xFFFFFFFF;
+ }
+
+ lck_mtx_lock(&ztLock);
+ if (!zone->tags_inline) {
+ count = (uint32_t)(size / zone_elem_size(zone));
+ blocks = ((count + ztTagsPerBlock - 1) / ztTagsPerBlock);
+ assert(block != 0xFFFFFFFF);
+ block /= ztTagsPerBlock;
+ ztFree(NULL /* zone is unlocked */, block, blocks);
+ }
+
+ lck_mtx_unlock(&ztLock);
+}
+
+uint32_t
+zone_index_from_tag_index(uint32_t tag_zone_index, vm_size_t * elem_size)
+{
+ simple_lock(&all_zones_lock, &zone_locks_grp);
+
+ zone_index_foreach(idx) {
+ zone_t z = &zone_array[idx];
+ if (!z->tags) {
+ continue;
+ }
+ if (tag_zone_index != z->tag_zone_index) {
+ continue;
+ }
+
+ *elem_size = zone_elem_size(z);
+ simple_unlock(&all_zones_lock);
+ return idx;
+ }
+
+ simple_unlock(&all_zones_lock);
+
+ return -1U;
+}
+
+#endif /* VM_MAX_TAG_ZONES */
+#endif /* !ZALLOC_TEST */
+#pragma mark zalloc helpers
+#if !ZALLOC_TEST
+
+__pure2
+static inline uint16_t
+zc_mag_size(void)
+{
+ return zc_magazine_size;
+}
+
+__attribute__((noinline, cold))
+static void
+zone_lock_was_contended(zone_t zone, zone_cache_t zc)
+{
+ lck_spin_lock_nopreempt(&zone->z_lock);
+
+ /*
+ * If zone caching has been disabled due to memory pressure,
+ * then recording contention is not useful, give the system
+ * time to recover.
+ */
+ if (__improbable(zone_caching_disabled)) {
+ return;
+ }
+
+ zone->z_contention_cur++;
+
+ if (zc == NULL || zc->zc_depot_max >= INT16_MAX * zc_mag_size()) {
+ return;
+ }
+
+ /*
+ * Let the depot grow based on how bad the contention is,
+ * and how populated the zone is.
+ */
+ if (zone->z_contention_wma < 2 * Z_CONTENTION_WMA_UNIT) {
+ if (zc->zc_depot_max * zpercpu_count() * 20u >=
+ zone->z_elems_avail) {
+ return;
+ }
+ }
+ if (zone->z_contention_wma < 4 * Z_CONTENTION_WMA_UNIT) {
+ if (zc->zc_depot_max * zpercpu_count() * 10u >=
+ zone->z_elems_avail) {
+ return;
+ }
+ }
+ if (!zc_grow_threshold || zone->z_contention_wma <
+ zc_grow_threshold * Z_CONTENTION_WMA_UNIT) {
+ return;
+ }
+
+ zc->zc_depot_max++;
+}
+
+static inline void
+zone_lock_nopreempt_check_contention(zone_t zone, zone_cache_t zc)
+{
+ if (lck_spin_try_lock_nopreempt(&zone->z_lock)) {
+ return;
+ }
+
+ zone_lock_was_contended(zone, zc);
+}
+
+static inline void
+zone_lock_check_contention(zone_t zone, zone_cache_t zc)
+{
+ disable_preemption();
+ zone_lock_nopreempt_check_contention(zone, zc);
+}
+
+static inline void
+zone_unlock_nopreempt(zone_t zone)
+{
+ lck_spin_unlock_nopreempt(&zone->z_lock);
+}
+
+static inline void
+zone_depot_lock_nopreempt(zone_cache_t zc)
+{
+ hw_lock_bit_nopreempt(&zc->zc_depot_lock, 0, &zone_locks_grp);
+}
+
+static inline void
+zone_depot_unlock_nopreempt(zone_cache_t zc)
+{
+ hw_unlock_bit_nopreempt(&zc->zc_depot_lock, 0);
+}
+
+static inline void
+zone_depot_lock(zone_cache_t zc)
+{
+ hw_lock_bit(&zc->zc_depot_lock, 0, &zone_locks_grp);
+}
+
+static inline void
+zone_depot_unlock(zone_cache_t zc)
+{
+ hw_unlock_bit(&zc->zc_depot_lock, 0);
+}
+
+const char *
+zone_name(zone_t z)
+{
+ return z->z_name;
+}
+
+const char *
+zone_heap_name(zone_t z)
+{
+ if (__probable(z->kalloc_heap < KHEAP_ID_COUNT)) {
+ return kalloc_heap_names[z->kalloc_heap];
+ }
+ return "invalid";
+}
+
+static uint32_t
+zone_alloc_pages_for_nelems(zone_t z, vm_size_t max_elems)
+{
+ vm_size_t elem_count, chunks;
+
+ elem_count = ptoa(z->z_percpu ? 1 : z->z_chunk_pages) / zone_elem_size(z);
+ chunks = (max_elems + elem_count - 1) / elem_count;
+
+ return (uint32_t)MIN(UINT32_MAX, chunks * z->z_chunk_pages);
+}
+
+static inline vm_size_t
+zone_submaps_approx_size(void)
+{
+ vm_size_t size = 0;
+
+ for (unsigned idx = 0; idx <= zone_last_submap_idx; idx++) {
+ size += zone_submaps[idx]->size;
+ }
+
+ return size;
+}
+
+static void
+zone_cache_swap_magazines(zone_cache_t cache)
+{
+ uint16_t count_a = cache->zc_alloc_cur;
+ uint16_t count_f = cache->zc_free_cur;
+ zone_element_t *elems_a = cache->zc_alloc_elems;
+ zone_element_t *elems_f = cache->zc_free_elems;
+
+ z_debug_assert(count_a <= zc_mag_size());
+ z_debug_assert(count_f <= zc_mag_size());
+
+ cache->zc_alloc_cur = count_f;
+ cache->zc_free_cur = count_a;
+ cache->zc_alloc_elems = elems_f;
+ cache->zc_free_elems = elems_a;
+}
+
+/*!
+ * @function zone_magazine_load
+ *
+ * @brief
+ * Cache the value of @c zm_cur on the cache to avoid a dependent load
+ * on the allocation fastpath.
+ */
+static void
+zone_magazine_load(uint16_t *count, zone_element_t **elems, zone_magazine_t mag)
+{
+ z_debug_assert(mag->zm_cur <= zc_mag_size());
+ *count = mag->zm_cur;
+ *elems = mag->zm_elems;
+}
+
+/*!
+ * @function zone_magazine_replace
+ *
+ * @brief
+ * Unlod a magazine and load a new one instead.
+ */
+static zone_magazine_t
+zone_magazine_replace(uint16_t *count, zone_element_t **elems,
+ zone_magazine_t mag)
+{
+ zone_magazine_t old;
+
+ old = (zone_magazine_t)((uintptr_t)*elems -
+ offsetof(struct zone_magazine, zm_elems));
+ old->zm_cur = *count;
+ z_debug_assert(old->zm_cur <= zc_mag_size());
+ zone_magazine_load(count, elems, mag);
+
+ return old;
+}
+
+static zone_magazine_t
+zone_magazine_alloc(zalloc_flags_t flags)
+{
+ return zalloc_ext(zc_magazine_zone, zc_magazine_zone->z_stats,
+ flags | Z_ZERO);
+}
+
+static void
+zone_magazine_free(zone_magazine_t mag)
+{
+ zfree_ext(zc_magazine_zone, zc_magazine_zone->z_stats, mag);
+}
+
+static void
+zone_enable_caching(zone_t zone)
+{
+ zone_cache_t caches;
+
+ caches = zalloc_percpu_permanent_type(struct zone_cache);
+ zpercpu_foreach(zc, caches) {
+ zone_magazine_load(&zc->zc_alloc_cur, &zc->zc_alloc_elems,
+ zone_magazine_alloc(Z_WAITOK | Z_NOFAIL));
+ zone_magazine_load(&zc->zc_free_cur, &zc->zc_free_elems,
+ zone_magazine_alloc(Z_WAITOK | Z_NOFAIL));
+ STAILQ_INIT(&zc->zc_depot);
+ }
+
+ if (os_atomic_xchg(&zone->z_pcpu_cache, caches, release)) {
+ panic("allocating caches for zone %s twice", zone->z_name);
+ }
+}
+
+bool
+zone_maps_owned(vm_address_t addr, vm_size_t size)
+{
+ return from_zone_map(addr, size, ZONE_ADDR_NATIVE);
+}
+
+void
+zone_map_sizes(
+ vm_map_size_t *psize,
+ vm_map_size_t *pfree,
+ vm_map_size_t *plargest_free)
+{
+ vm_map_size_t size, free, largest;
+
+ vm_map_sizes(zone_submaps[0], psize, pfree, plargest_free);
+
+ for (uint32_t i = 1; i <= zone_last_submap_idx; i++) {
+ vm_map_sizes(zone_submaps[i], &size, &free, &largest);
+ *psize += size;
+ *pfree += free;
+ *plargest_free = MAX(*plargest_free, largest);
+ }
+}
+
+__attribute__((always_inline))
+vm_map_t
+zone_submap(zone_t zone)
+{
+ return zone_submaps[zone->z_submap_idx];
+}
+
+unsigned
+zpercpu_count(void)
+{
+ return zpercpu_early_count;
+}
+
+int
+track_this_zone(const char *zonename, const char *logname)
+{
+ unsigned int len;
+ const char *zc = zonename;
+ const char *lc = logname;
+
+ /*
+ * Compare the strings. We bound the compare by MAX_ZONE_NAME.
+ */
+
+ for (len = 1; len <= MAX_ZONE_NAME; zc++, lc++, len++) {
+ /*
+ * If the current characters don't match, check for a space in
+ * in the zone name and a corresponding period in the log name.
+ * If that's not there, then the strings don't match.
+ */
+
+ if (*zc != *lc && !(*zc == ' ' && *lc == '.')) {
+ break;
+ }
+
+ /*
+ * The strings are equal so far. If we're at the end, then it's a match.
+ */
+
+ if (*zc == '\0') {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+#if DEBUG || DEVELOPMENT
+
+vm_size_t
+zone_element_info(void *addr, vm_tag_t * ptag)
+{
+ vm_size_t size = 0;
+ vm_tag_t tag = VM_KERN_MEMORY_NONE;
+ struct zone *src_zone;
+
+ if (from_zone_map(addr, sizeof(void *), ZONE_ADDR_NATIVE) ||
+ from_zone_map(addr, sizeof(void *), ZONE_ADDR_FOREIGN)) {
+ src_zone = &zone_array[zone_index_from_ptr(addr)];
+#if VM_MAX_TAG_ZONES
+ if (__improbable(src_zone->tags)) {
+ tag = *ztSlot(src_zone, (vm_offset_t)addr) >> 1;
+ }
+#endif /* VM_MAX_TAG_ZONES */
+ size = zone_elem_size(src_zone);
+ } else {
+#if CONFIG_GZALLOC
+ gzalloc_element_size(addr, NULL, &size);
+#endif /* CONFIG_GZALLOC */
+ }
+ *ptag = tag;
+ return size;
+}
+
+#endif /* DEBUG || DEVELOPMENT */
+
+/* The backup pointer is stored in the last pointer-sized location in an element. */
+__header_always_inline vm_offset_t *
+get_primary_ptr(vm_offset_t elem)
+{
+ return (vm_offset_t *)elem;
+}
+
+__header_always_inline vm_offset_t *
+get_backup_ptr(vm_offset_t elem, vm_size_t elem_size)
+{
+ return (vm_offset_t *)(elem + elem_size - sizeof(vm_offset_t));
+}
+
+#endif /* !ZALLOC_TEST */
+#pragma mark Zone poisoning/zeroing and early random
+#if !ZALLOC_TEST
+
+#define ZONE_ENTROPY_CNT 2
+static struct zone_bool_gen {
+ struct bool_gen zbg_bg;
+ uint32_t zbg_entropy[ZONE_ENTROPY_CNT];
+} zone_bool_gen[MAX_CPUS];
+
+/*
+ * Initialize zone poisoning
+ * called from zone_bootstrap before any allocations are made from zalloc
+ */
+__startup_func
+static void
+zp_bootstrap(void)
+{
+ char temp_buf[16];
+
+ /*
+ * Initialize canary random cookie.
+ *
+ * Make sure that (zp_canary ^ pointer) have non zero low bits (01)
+ * different from ZONE_POISON (11).
+ *
+ * On LP64, have (zp_canary ^ pointer) have the high bits equal 0xC0FFEE...
+ */
+ static_assert(ZONE_POISON % 4 == 3);
+ zp_canary = (uintptr_t)early_random();
+#if __LP64__
+ zp_canary &= 0x000000fffffffffc;
+ zp_canary |= 0xc0ffee0000000001 ^ 0xffffff0000000000;
+#else
+ zp_canary &= 0xfffffffc;
+ zp_canary |= 0x00000001;
+#endif
+
+ /* -zp: enable poisoning for every alloc and free */
+ if (PE_parse_boot_argn("-zp", temp_buf, sizeof(temp_buf))) {
+ zp_factor = 1;
+ }
+
+ /* -no-zp: disable poisoning */
+ if (PE_parse_boot_argn("-no-zp", temp_buf, sizeof(temp_buf))) {
+ zp_factor = 0;
+ printf("Zone poisoning disabled\n");
+ }
+
+ zpercpu_foreach_cpu(cpu) {
+ random_bool_init(&zone_bool_gen[cpu].zbg_bg);
+ }
+}
+
+static inline uint32_t
+zone_poison_count_init(zone_t zone)
+{
+ return zp_factor + (((uint32_t)zone_elem_size(zone)) >> zp_scale) ^
+ (mach_absolute_time() & 0x7);
+}
+
+/*
+ * Zero the element if zone has z_free_zeroes flag set else poison
+ * the element if zs_poison_seqno hits 0.
+ */
+static zprot_mode_t
+zfree_clear_or_poison(zone_t zone, vm_offset_t addr, vm_offset_t elem_size)
+{
+ if (zone->z_free_zeroes) {
+ if (zone->z_percpu) {
+ zpercpu_foreach_cpu(i) {
+ bzero((void *)(addr + ptoa(i)), elem_size);
+ }
+ } else {
+ bzero((void *)addr, elem_size);
+ }
+ return ZPM_ZERO;
+ }
+
+ zprot_mode_t poison = ZPM_AUTO;
+#if ZALLOC_ENABLE_POISONING
+ if (__improbable(zp_factor == 1)) {
+ poison = ZPM_POISON;
+ } else if (__probable(zp_factor != 0)) {
+ uint32_t *seqnop = &zpercpu_get(zone->z_stats)->zs_poison_seqno;
+ uint32_t seqno = os_atomic_load(seqnop, relaxed);
+ if (seqno == 0) {
+ os_atomic_store(seqnop, zone_poison_count_init(zone), relaxed);
+ poison = ZPM_POISON;
+ } else {
+ os_atomic_store(seqnop, seqno - 1, relaxed);
+ }
+ }
+ if (poison == ZPM_POISON) {
+ /* memset_pattern{4|8} could help make this faster: <rdar://problem/4662004> */
+ for (size_t i = 0; i < elem_size / sizeof(vm_offset_t); i++) {
+ ((vm_offset_t *)addr)[i] = ZONE_POISON;
+ }
+ } else {
+ /*
+ * Set a canary at the extremities.
+ *
+ * Zero first zp_min_size bytes of elements that aren't being
+ * poisoned.
+ *
+ * Element size is larger than zp_min_size in this path,
+ * zones with smaller elements have z_free_zeroes set.
+ */
+ *get_primary_ptr(addr) = zp_canary ^ (uintptr_t)addr;
+ bzero((void *)addr + sizeof(vm_offset_t),
+ zp_min_size - sizeof(vm_offset_t));
+ *get_backup_ptr(addr, elem_size) = zp_canary ^ (uintptr_t)addr;
+
+ poison = ZPM_CANARY;
+ }
+#endif /* ZALLOC_ENABLE_POISONING */
+
+ return poison;
+}
+
+#if ZALLOC_ENABLE_POISONING
+
+__abortlike
+static void
+zalloc_uaf_panic(zone_t z, uintptr_t elem, size_t size, zprot_mode_t zpm)
+{
+ uint32_t esize = (uint32_t)zone_elem_size(z);
+ uint32_t first_offs = ~0u;
+ uintptr_t first_bits = 0, v;
+ char buf[1024];
+ int pos = 0;
+ const char *how;
+
+#if __LP64__
+#define ZPF "0x%016lx"
+#else
+#define ZPF "0x%08lx"
+#endif
+
+ buf[0] = '\0';
+
+ if (zpm == ZPM_CANARY) {
+ how = "canaries";
+
+ v = *get_primary_ptr(elem);
+ if (v != (elem ^ zp_canary)) {
+ pos += scnprintf(buf + pos, sizeof(buf) - pos, "\n"
+ "%5d: got "ZPF", want "ZPF" (xor: "ZPF")",
+ 0, v, (elem ^ zp_canary), (v ^ elem ^ zp_canary));
+ if (first_offs > 0) {
+ first_offs = 0;
+ first_bits = v;
+ }
+ }
+
+ v = *get_backup_ptr(elem, esize);
+ if (v != (elem ^ zp_canary)) {
+ pos += scnprintf(buf + pos, sizeof(buf) - pos, "\n"
+ "%5d: got "ZPF", want "ZPF" (xor: "ZPF")",
+ esize - (int)sizeof(v), v, (elem ^ zp_canary),
+ (v ^ elem ^ zp_canary));
+ if (first_offs > esize - sizeof(v)) {
+ first_offs = esize - sizeof(v);
+ first_bits = v;
+ }
+ }
+
+ for (uint32_t o = sizeof(v); o < zp_min_size; o += sizeof(v)) {
+ if ((v = *(uintptr_t *)(elem + o)) == 0) {
+ continue;
+ }
+ pos += scnprintf(buf + pos, sizeof(buf) - pos, "\n"
+ "%5d: "ZPF, o, v);
+ if (first_offs > o) {
+ first_offs = o;
+ first_bits = v;
+ }
+ }
+ } else if (zpm == ZPM_ZERO) {
+ how = "zero";
+
+ for (uint32_t o = 0; o < size; o += sizeof(v)) {
+ if ((v = *(uintptr_t *)(elem + o)) == 0) {
+ continue;
+ }
+ pos += scnprintf(buf + pos, sizeof(buf) - pos, "\n"
+ "%5d: "ZPF, o, v);
+ if (first_offs > o) {
+ first_offs = o;
+ first_bits = v;
+ }
+ }
+ } else {
+ how = "poison";
+
+ for (uint32_t o = 0; o < size; o += sizeof(v)) {
+ if ((v = *(uintptr_t *)(elem + o)) == ZONE_POISON) {
+ continue;
+ }
+ pos += scnprintf(buf + pos, sizeof(buf) - pos, "\n"
+ "%5d: "ZPF" (xor: "ZPF")",
+ o, v, (v ^ ZONE_POISON));
+ if (first_offs > o) {
+ first_offs = o;
+ first_bits = v;
+ }
+ }
+ }
+
+ (panic)("[%s%s]: element modified after free "
+ "(off:%d, val:"ZPF", sz:%d, ptr:%p, prot:%s)%s",
+ zone_heap_name(z), zone_name(z),
+ first_offs, first_bits, esize, (void *)elem, how, buf);
+
+#undef ZPF
+}
+
+static void
+zalloc_validate_element_zero(zone_t zone, vm_offset_t elem, vm_size_t size)
+{
+ if (memcmp_zero_ptr_aligned((void *)elem, size)) {
+ zalloc_uaf_panic(zone, elem, size, ZPM_ZERO);
+ }
+ if (!zone->z_percpu) {
+ return;
+ }
+ for (size_t i = zpercpu_count(); --i > 0;) {
+ elem += PAGE_SIZE;
+ if (memcmp_zero_ptr_aligned((void *)elem, size)) {
+ zalloc_uaf_panic(zone, elem, size, ZPM_ZERO);
+ }
+ }
+}
+
+#if __arm64__ || __arm__
+typedef __attribute__((ext_vector_type(2))) vm_offset_t zpair_t;
+#else
+typedef struct {
+ vm_offset_t x;
+ vm_offset_t y;
+} zpair_t;
+#endif
+
+
+__attribute__((noinline))
+static void
+zalloc_validate_element_poison(zone_t zone, vm_offset_t elem, vm_size_t size)
+{
+ vm_offset_t p = elem;
+ vm_offset_t end = elem + size;
+
+ const zpair_t poison = { ZONE_POISON, ZONE_POISON };
+ zpair_t a, b;
+
+ a.x = *(const vm_offset_t *)p;
+ a.y = *(const vm_offset_t *)(end - sizeof(vm_offset_t));
+
+ a.x ^= poison.x;
+ a.y ^= poison.y;
+
+ /*
+ * align p to the next double-wide boundary
+ * align end to the previous double-wide boundary
+ */
+ p = (p + sizeof(zpair_t) - 1) & -sizeof(zpair_t);
+ end &= -sizeof(zpair_t);
+
+ if ((end - p) % (2 * sizeof(zpair_t)) == 0) {
+ b.y = 0;
+ b.y = 0;
+ } else {
+ end -= sizeof(zpair_t);
+ b.x = ((zpair_t *)end)[0].x ^ poison.x;
+ b.y = ((zpair_t *)end)[0].y ^ poison.y;
+ }
+
+ for (; p < end; p += 2 * sizeof(zpair_t)) {
+ a.x |= ((zpair_t *)p)[0].x ^ poison.x;
+ a.y |= ((zpair_t *)p)[0].y ^ poison.y;
+ b.x |= ((zpair_t *)p)[1].x ^ poison.x;
+ b.y |= ((zpair_t *)p)[1].y ^ poison.y;
+ }
+
+ a.x |= b.x;
+ a.y |= b.y;
+
+ if (a.x || a.y) {
+ zalloc_uaf_panic(zone, elem, size, ZPM_POISON);
+ }
+}
+
+static void
+zalloc_validate_element(zone_t zone, vm_offset_t elem, vm_size_t size,
+ zprot_mode_t zpm)
+{
+ vm_offset_t *primary = get_primary_ptr(elem);
+ vm_offset_t *backup = get_backup_ptr(elem, size);
+
+#if CONFIG_GZALLOC
+ if (zone->gzalloc_tracked) {
+ return;
+ }
+#endif /* CONFIG_GZALLOC */
+
+ if (zone->z_free_zeroes) {
+ return zalloc_validate_element_zero(zone, elem, size);
+ }
+
+ switch (zpm) {
+ case ZPM_AUTO:
+ if (*backup == 0) {
+ size -= sizeof(vm_size_t);
+ return zalloc_validate_element_zero(zone, elem, size);
+ }
+ if (*backup == ZONE_POISON) {
+ size -= sizeof(vm_size_t);
+ return zalloc_validate_element_poison(zone, elem, size);
+ }
+ OS_FALLTHROUGH;
+
+ case ZPM_CANARY:
+ if ((*primary ^ zp_canary) != elem || (*backup ^ zp_canary) != elem) {
+ zalloc_uaf_panic(zone, elem, size, ZPM_CANARY);
+ }
+ *primary = *backup = 0;
+ size = zp_min_size;
+ OS_FALLTHROUGH;
+
+ case ZPM_ZERO:
+ return zalloc_validate_element_zero(zone, elem, size);
+
+ case ZPM_POISON:
+ return zalloc_validate_element_poison(zone, elem, size);
+ }
+}
+
+#endif /* ZALLOC_ENABLE_POISONING */
+#if ZALLOC_EARLY_GAPS
+
+__attribute__((noinline))
+static void
+zone_early_gap_drop(int n)
+{
+ while (n-- > 0) {
+ zone_t zone0 = &zone_array[0];
+ struct zone_page_metadata *meta = NULL;
+ vm_offset_t addr;
+ uint16_t pages;
+ vm_map_t map;
+
+ lck_mtx_lock(&zone_metadata_region_lck);
+
+ if (!zone_pva_is_null(zone0->z_pageq_va)) {
+ meta = zone_meta_queue_pop_native(zone0,
+ &zone0->z_pageq_va, &addr);
+ map = zone_submaps[meta->zm_chunk_len];
+ pages = meta->zm_alloc_size;
+ __builtin_bzero(meta, sizeof(struct zone_page_metadata));
+ }
+
+ lck_mtx_unlock(&zone_metadata_region_lck);
+
+ if (!meta) {
+ break;
+ }
+
+ kmem_free(map, addr, ptoa(pages));
+ }
+}
+
+static void
+zone_early_gap_add(zone_t z, uint16_t pages)
+{
+ struct zone_page_metadata *meta = NULL;
+ zone_t zone0 = &zone_array[0];
+ kern_return_t kr;
+ vm_offset_t addr;
+
+ kma_flags_t kmaflags = KMA_KOBJECT | KMA_ZERO | KMA_VAONLY;
+ if (z->z_submap_idx == Z_SUBMAP_IDX_GENERAL &&
+ z->kalloc_heap != KHEAP_ID_NONE) {
+ kmaflags |= KMA_KHEAP;
+ }
+
+ kr = kernel_memory_allocate(zone_submap(z), &addr, ptoa(pages), 0,
+ kmaflags, VM_KERN_MEMORY_ZONE);
+
+ if (kr != KERN_SUCCESS) {
+ panic("unable to allocate early gap (%d pages): %d", pages, kr);
+ }
+
+ zone_meta_populate(addr, ptoa(pages));
+
+ meta = zone_meta_from_addr(addr);
+ meta->zm_alloc_size = pages;
+ meta->zm_chunk_len = z->z_submap_idx;
+
+ lck_mtx_lock(&zone_metadata_region_lck);
+ zone_meta_queue_push(zone0, &zone0->z_pageq_va, meta);
+ lck_mtx_unlock(&zone_metadata_region_lck);
+}
+
+/*
+ * Roughly until pd1 is made, introduce random gaps
+ * between allocated pages.
+ *
+ * This way the early boot allocations are not in a completely
+ * predictible order and relative position.
+ *
+ * Those gaps are returned to the maps afterwards.
+ *
+ * We abuse the zone 0 (which is unused) "va" pageq to remember
+ * those ranges.
+ */
+__attribute__((noinline))
+static void
+zone_allocate_random_early_gap(zone_t z)
+{
+ int16_t pages = early_random() % 16;
+
+ /*
+ * 6% of the time: drop 2 gaps
+ * 25% of the time: drop 1 gap
+ * 37% of the time: do nothing
+ * 18% of the time: add 1 gap
+ * 12% of the time: add 2 gaps
+ */
+ if (pages > 10) {
+ zone_early_gap_drop(pages == 15 ? 2 : 1);
+ }
+ if (pages < 5) {
+ /* values are 6 through 16 */
+ zone_early_gap_add(z, 6 + 2 * pages);
+ }
+ if (pages < 2) {
+ zone_early_gap_add(z, 6 + early_random() % 16);
+ }
+}
+
+static inline void
+zone_cleanup_early_gaps_if_needed(void)
+{
+ if (__improbable(!zone_pva_is_null(zone_array[0].z_pageq_va))) {
+ zone_early_gap_drop(10);
+ }
+}
+
+#endif /* ZALLOC_EARLY_GAPS */
+
+static void
+zone_early_scramble_rr(zone_t zone, zone_stats_t zstats)
+{
+ int cpu = cpu_number();
+ zone_stats_t zs = zpercpu_get_cpu(zstats, cpu);
+ uint32_t bits;
+
+ bits = random_bool_gen_bits(&zone_bool_gen[cpu].zbg_bg,
+ zone_bool_gen[cpu].zbg_entropy, ZONE_ENTROPY_CNT, 8);
+
+ zs->zs_alloc_rr += bits;
+ zs->zs_alloc_rr %= zone->z_chunk_elems;
+}
+
+#endif /* !ZALLOC_TEST */
+#pragma mark Zone Leak Detection
+#if !ZALLOC_TEST
+
+/*
+ * Zone leak debugging code
+ *
+ * When enabled, this code keeps a log to track allocations to a particular zone that have not
+ * yet been freed. Examining this log will reveal the source of a zone leak. The log is allocated
+ * only when logging is enabled, so there is no effect on the system when it's turned off. Logging is
+ * off by default.
+ *
+ * Enable the logging via the boot-args. Add the parameter "zlog=<zone>" to boot-args where <zone>
+ * is the name of the zone you wish to log.
+ *
+ * This code only tracks one zone, so you need to identify which one is leaking first.
+ * Generally, you'll know you have a leak when you get a "zalloc retry failed 3" panic from the zone
+ * garbage collector. Note that the zone name printed in the panic message is not necessarily the one
+ * containing the leak. So do a zprint from gdb and locate the zone with the bloated size. This
+ * is most likely the problem zone, so set zlog in boot-args to this zone name, reboot and re-run the test. The
+ * next time it panics with this message, examine the log using the kgmacros zstack, findoldest and countpcs.
+ * See the help in the kgmacros for usage info.
+ *
+ *
+ * Zone corruption logging
+ *
+ * Logging can also be used to help identify the source of a zone corruption. First, identify the zone
+ * that is being corrupted, then add "-zc zlog=<zone name>" to the boot-args. When -zc is used in conjunction
+ * with zlog, it changes the logging style to track both allocations and frees to the zone. So when the
+ * corruption is detected, examining the log will show you the stack traces of the callers who last allocated
+ * and freed any particular element in the zone. Use the findelem kgmacro with the address of the element that's been
+ * corrupted to examine its history. This should lead to the source of the corruption.
+ */
+
+/* Returns TRUE if we rolled over the counter at factor */
+__header_always_inline bool
+sample_counter(volatile uint32_t *count_p, uint32_t factor)
+{
+ uint32_t old_count, new_count = 0;
+ if (count_p != NULL) {
+ os_atomic_rmw_loop(count_p, old_count, new_count, relaxed, {
+ new_count = old_count + 1;
+ if (new_count >= factor) {
+ new_count = 0;
+ }
+ });
+ }
+
+ return new_count == 0;
+}
+
+#if ZONE_ENABLE_LOGGING
+/* Log allocations and frees to help debug a zone element corruption */
+static TUNABLE(bool, corruption_debug_flag, "-zc", false);
+
+#define MAX_NUM_ZONES_ALLOWED_LOGGING 10 /* Maximum 10 zones can be logged at once */
+
+static int max_num_zones_to_log = MAX_NUM_ZONES_ALLOWED_LOGGING;
+static int num_zones_logged = 0;
+
+/*
+ * The number of records in the log is configurable via the zrecs parameter in boot-args. Set this to
+ * the number of records you want in the log. For example, "zrecs=10" sets it to 10 records. Since this
+ * is the number of stacks suspected of leaking, we don't need many records.
+ */
+
+#if defined(__LP64__)
+#define ZRECORDS_MAX 2560 /* Max records allowed in the log */
+#else
+#define ZRECORDS_MAX 1536 /* Max records allowed in the log */
+#endif
+#define ZRECORDS_DEFAULT 1024 /* default records in log if zrecs is not specificed in boot-args */
+
+static TUNABLE(uint32_t, log_records, "zrecs", ZRECORDS_DEFAULT);
+
+static void
+zone_enable_logging(zone_t z)
+{
+ z->zlog_btlog = btlog_create(log_records, MAX_ZTRACE_DEPTH,
+ (corruption_debug_flag == FALSE) /* caller_will_remove_entries_for_element? */);
+
+ if (z->zlog_btlog) {
+ printf("zone: logging started for zone %s%s\n",
+ zone_heap_name(z), z->z_name);
+ } else {
+ printf("zone: couldn't allocate memory for zrecords, turning off zleak logging\n");
+ z->zone_logging = false;
+ }
+}
+
+/**
+ * @function zone_setup_logging
+ *
+ * @abstract
+ * Optionally sets up a zone for logging.
+ *
+ * @discussion
+ * We recognized two boot-args:
+ *
+ * zlog=<zone_to_log>
+ * zrecs=<num_records_in_log>
+ *
+ * The zlog arg is used to specify the zone name that should be logged,
+ * and zrecs is used to control the size of the log.
+ *
+ * If zrecs is not specified, a default value is used.
+ */
+static void
+zone_setup_logging(zone_t z)
+{
+ char zone_name[MAX_ZONE_NAME]; /* Temp. buffer for the zone name */
+ char zlog_name[MAX_ZONE_NAME]; /* Temp. buffer to create the strings zlog1, zlog2 etc... */
+ char zlog_val[MAX_ZONE_NAME]; /* the zone name we're logging, if any */
+
+ /*
+ * Don't allow more than ZRECORDS_MAX records even if the user asked for more.
+ *
+ * This prevents accidentally hogging too much kernel memory
+ * and making the system unusable.
+ */
+ if (log_records > ZRECORDS_MAX) {
+ log_records = ZRECORDS_MAX;
+ }
+
+ /*
+ * Append kalloc heap name to zone name (if zone is used by kalloc)
+ */
+ snprintf(zone_name, MAX_ZONE_NAME, "%s%s", zone_heap_name(z), z->z_name);
+
+ /* zlog0 isn't allowed. */
+ for (int i = 1; i <= max_num_zones_to_log; i++) {
+ snprintf(zlog_name, MAX_ZONE_NAME, "zlog%d", i);
+
+ if (PE_parse_boot_argn(zlog_name, zlog_val, sizeof(zlog_val)) &&
+ track_this_zone(zone_name, zlog_val)) {
+ z->zone_logging = true;
+ num_zones_logged++;
+ break;
+ }
+ }
+
+ /*
+ * Backwards compat. with the old boot-arg used to specify single zone
+ * logging i.e. zlog Needs to happen after the newer zlogn checks
+ * because the prefix will match all the zlogn
+ * boot-args.
+ */
+ if (!z->zone_logging &&
+ PE_parse_boot_argn("zlog", zlog_val, sizeof(zlog_val)) &&
+ track_this_zone(zone_name, zlog_val)) {
+ z->zone_logging = true;
+ num_zones_logged++;
+ }
+
+
+ /*
+ * If we want to log a zone, see if we need to allocate buffer space for
+ * the log.
+ *
+ * Some vm related zones are zinit'ed before we can do a kmem_alloc, so
+ * we have to defer allocation in that case.
+ *
+ * zone_init() will finish the job.
+ *
+ * If we want to log one of the VM related zones that's set up early on,
+ * we will skip allocation of the log until zinit is called again later
+ * on some other zone.
+ */
+ if (z->zone_logging && startup_phase >= STARTUP_SUB_KMEM_ALLOC) {
+ zone_enable_logging(z);
+ }
+}
+
+/*
+ * Each record in the log contains a pointer to the zone element it refers to,
+ * and a small array to hold the pc's from the stack trace. A
+ * record is added to the log each time a zalloc() is done in the zone_of_interest. For leak debugging,
+ * the record is cleared when a zfree() is done. For corruption debugging, the log tracks both allocs and frees.
+ * If the log fills, old records are replaced as if it were a circular buffer.
+ */
+
+
+/*
+ * Decide if we want to log this zone by doing a string compare between a zone name and the name
+ * of the zone to log. Return true if the strings are equal, false otherwise. Because it's not
+ * possible to include spaces in strings passed in via the boot-args, a period in the logname will
+ * match a space in the zone name.
+ */
+
+/*
+ * Test if we want to log this zalloc/zfree event. We log if this is the zone we're interested in and
+ * the buffer for the records has been allocated.
+ */
+
+#define DO_LOGGING(z) (z->zlog_btlog != NULL)
+#else /* !ZONE_ENABLE_LOGGING */
+#define DO_LOGGING(z) 0
+#endif /* !ZONE_ENABLE_LOGGING */
+#if CONFIG_ZLEAKS
+
+/*
+ * The zone leak detector, abbreviated 'zleak', keeps track of a subset of the currently outstanding
+ * allocations made by the zone allocator. Every zleak_sample_factor allocations in each zone, we capture a
+ * backtrace. Every free, we examine the table and determine if the allocation was being tracked,
+ * and stop tracking it if it was being tracked.
+ *
+ * We track the allocations in the zallocations hash table, which stores the address that was returned from
+ * the zone allocator. Each stored entry in the zallocations table points to an entry in the ztraces table, which
+ * stores the backtrace associated with that allocation. This provides uniquing for the relatively large
+ * backtraces - we don't store them more than once.
+ *
+ * Data collection begins when the zone map is 50% full, and only occurs for zones that are taking up
+ * a large amount of virtual space.
+ */
+#define ZLEAK_STATE_ENABLED 0x01 /* Zone leak monitoring should be turned on if zone_map fills up. */
+#define ZLEAK_STATE_ACTIVE 0x02 /* We are actively collecting traces. */
+#define ZLEAK_STATE_ACTIVATING 0x04 /* Some thread is doing setup; others should move along. */
+#define ZLEAK_STATE_FAILED 0x08 /* Attempt to allocate tables failed. We will not try again. */
+static uint32_t zleak_state = 0; /* State of collection, as above */
+static unsigned int zleak_sample_factor = 1000; /* Allocations per sample attempt */
+
+bool panic_include_ztrace = FALSE; /* Enable zleak logging on panic */
+vm_size_t zleak_global_tracking_threshold; /* Size of zone map at which to start collecting data */
+vm_size_t zleak_per_zone_tracking_threshold; /* Size a zone will have before we will collect data on it */
+
+/*
+ * Counters for allocation statistics.
+ */
+
+/* Times two active records want to occupy the same spot */
+static unsigned int z_alloc_collisions = 0;
+static unsigned int z_trace_collisions = 0;
+
+/* Times a new record lands on a spot previously occupied by a freed allocation */
+static unsigned int z_alloc_overwrites = 0;
+static unsigned int z_trace_overwrites = 0;
+
+/* Times a new alloc or trace is put into the hash table */
+static unsigned int z_alloc_recorded = 0;
+static unsigned int z_trace_recorded = 0;
+
+/* Times zleak_log returned false due to not being able to acquire the lock */
+static unsigned int z_total_conflicts = 0;
+
+/*
+ * Structure for keeping track of an allocation
+ * An allocation bucket is in use if its element is not NULL
+ */
+struct zallocation {
+ uintptr_t za_element; /* the element that was zalloc'ed or zfree'ed, NULL if bucket unused */
+ vm_size_t za_size; /* how much memory did this allocation take up? */
+ uint32_t za_trace_index; /* index into ztraces for backtrace associated with allocation */
+ /* TODO: #if this out */
+ uint32_t za_hit_count; /* for determining effectiveness of hash function */
+};
+
+/* Size must be a power of two for the zhash to be able to just mask off bits instead of mod */
+static uint32_t zleak_alloc_buckets = CONFIG_ZLEAK_ALLOCATION_MAP_NUM;
+static uint32_t zleak_trace_buckets = CONFIG_ZLEAK_TRACE_MAP_NUM;
+
+vm_size_t zleak_max_zonemap_size;
+
+/* Hashmaps of allocations and their corresponding traces */
+static struct zallocation* zallocations;
+static struct ztrace* ztraces;
+
+/* not static so that panic can see this, see kern/debug.c */
+struct ztrace* top_ztrace;
+
+/* Lock to protect zallocations, ztraces, and top_ztrace from concurrent modification. */
+static LCK_GRP_DECLARE(zleak_lock_grp, "zleak_lock");
+static LCK_SPIN_DECLARE(zleak_lock, &zleak_lock_grp);
+
+/*
+ * Initializes the zone leak monitor. Called from zone_init()
+ */
+__startup_func
+static void
+zleak_init(vm_size_t max_zonemap_size)
+{
+ char scratch_buf[16];
+ boolean_t zleak_enable_flag = FALSE;
+
+ zleak_max_zonemap_size = max_zonemap_size;
+ zleak_global_tracking_threshold = max_zonemap_size / 2;
+ zleak_per_zone_tracking_threshold = zleak_global_tracking_threshold / 8;
+
+#if CONFIG_EMBEDDED
+ if (PE_parse_boot_argn("-zleakon", scratch_buf, sizeof(scratch_buf))) {
+ zleak_enable_flag = TRUE;
+ printf("zone leak detection enabled\n");
+ } else {
+ zleak_enable_flag = FALSE;
+ printf("zone leak detection disabled\n");
+ }
+#else /* CONFIG_EMBEDDED */
+ /* -zleakoff (flag to disable zone leak monitor) */
+ if (PE_parse_boot_argn("-zleakoff", scratch_buf, sizeof(scratch_buf))) {
+ zleak_enable_flag = FALSE;
+ printf("zone leak detection disabled\n");
+ } else {
+ zleak_enable_flag = TRUE;
+ printf("zone leak detection enabled\n");
+ }
+#endif /* CONFIG_EMBEDDED */
+
+ /* zfactor=XXXX (override how often to sample the zone allocator) */
+ if (PE_parse_boot_argn("zfactor", &zleak_sample_factor, sizeof(zleak_sample_factor))) {
+ printf("Zone leak factor override: %u\n", zleak_sample_factor);
+ }
+
+ /* zleak-allocs=XXXX (override number of buckets in zallocations) */
+ if (PE_parse_boot_argn("zleak-allocs", &zleak_alloc_buckets, sizeof(zleak_alloc_buckets))) {
+ printf("Zone leak alloc buckets override: %u\n", zleak_alloc_buckets);
+ /* uses 'is power of 2' trick: (0x01000 & 0x00FFF == 0) */
+ if (zleak_alloc_buckets == 0 || (zleak_alloc_buckets & (zleak_alloc_buckets - 1))) {
+ printf("Override isn't a power of two, bad things might happen!\n");
+ }
+ }
+
+ /* zleak-traces=XXXX (override number of buckets in ztraces) */
+ if (PE_parse_boot_argn("zleak-traces", &zleak_trace_buckets, sizeof(zleak_trace_buckets))) {
+ printf("Zone leak trace buckets override: %u\n", zleak_trace_buckets);
+ /* uses 'is power of 2' trick: (0x01000 & 0x00FFF == 0) */
+ if (zleak_trace_buckets == 0 || (zleak_trace_buckets & (zleak_trace_buckets - 1))) {
+ printf("Override isn't a power of two, bad things might happen!\n");
+ }
+ }
+
+ if (zleak_enable_flag) {
+ zleak_state = ZLEAK_STATE_ENABLED;
+ }
+}
+
+/*
+ * Support for kern.zleak.active sysctl - a simplified
+ * version of the zleak_state variable.
+ */
+int
+get_zleak_state(void)
+{
+ if (zleak_state & ZLEAK_STATE_FAILED) {
+ return -1;
+ }
+ if (zleak_state & ZLEAK_STATE_ACTIVE) {
+ return 1;
+ }
+ return 0;
+}
+
+kern_return_t
+zleak_activate(void)
+{
+ kern_return_t retval;
+ vm_size_t z_alloc_size = zleak_alloc_buckets * sizeof(struct zallocation);
+ vm_size_t z_trace_size = zleak_trace_buckets * sizeof(struct ztrace);
+ void *allocations_ptr = NULL;
+ void *traces_ptr = NULL;
+
+ /* Only one thread attempts to activate at a time */
+ if (zleak_state & (ZLEAK_STATE_ACTIVE | ZLEAK_STATE_ACTIVATING | ZLEAK_STATE_FAILED)) {
+ return KERN_SUCCESS;
+ }
+
+ /* Indicate that we're doing the setup */
+ lck_spin_lock(&zleak_lock);
+ if (zleak_state & (ZLEAK_STATE_ACTIVE | ZLEAK_STATE_ACTIVATING | ZLEAK_STATE_FAILED)) {
+ lck_spin_unlock(&zleak_lock);
+ return KERN_SUCCESS;
+ }
+
+ zleak_state |= ZLEAK_STATE_ACTIVATING;
+ lck_spin_unlock(&zleak_lock);
+
+ /* Allocate and zero tables */
+ retval = kmem_alloc_kobject(kernel_map, (vm_offset_t*)&allocations_ptr, z_alloc_size, VM_KERN_MEMORY_DIAG);
+ if (retval != KERN_SUCCESS) {
+ goto fail;
+ }
+
+ retval = kmem_alloc_kobject(kernel_map, (vm_offset_t*)&traces_ptr, z_trace_size, VM_KERN_MEMORY_DIAG);
+ if (retval != KERN_SUCCESS) {
+ goto fail;
+ }
+
+ bzero(allocations_ptr, z_alloc_size);
+ bzero(traces_ptr, z_trace_size);
+
+ /* Everything's set. Install tables, mark active. */
+ zallocations = allocations_ptr;
+ ztraces = traces_ptr;
+
+ /*
+ * Initialize the top_ztrace to the first entry in ztraces,
+ * so we don't have to check for null in zleak_log
+ */
+ top_ztrace = &ztraces[0];
+
+ /*
+ * Note that we do need a barrier between installing
+ * the tables and setting the active flag, because the zfree()
+ * path accesses the table without a lock if we're active.
+ */
+ lck_spin_lock(&zleak_lock);
+ zleak_state |= ZLEAK_STATE_ACTIVE;
+ zleak_state &= ~ZLEAK_STATE_ACTIVATING;
+ lck_spin_unlock(&zleak_lock);
+
+ return 0;
+
+fail:
+ /*
+ * If we fail to allocate memory, don't further tax
+ * the system by trying again.
+ */
+ lck_spin_lock(&zleak_lock);
+ zleak_state |= ZLEAK_STATE_FAILED;
+ zleak_state &= ~ZLEAK_STATE_ACTIVATING;
+ lck_spin_unlock(&zleak_lock);
+
+ if (allocations_ptr != NULL) {
+ kmem_free(kernel_map, (vm_offset_t)allocations_ptr, z_alloc_size);
+ }
+
+ if (traces_ptr != NULL) {
+ kmem_free(kernel_map, (vm_offset_t)traces_ptr, z_trace_size);
+ }
+
+ return retval;
+}
+
+static inline void
+zleak_activate_if_needed(void)
+{
+ if (__probable((zleak_state & ZLEAK_STATE_ENABLED) == 0)) {
+ return;
+ }
+ if (zleak_state & ZLEAK_STATE_ACTIVE) {
+ return;
+ }
+ if (zone_submaps_approx_size() < zleak_global_tracking_threshold) {
+ return;
+ }
+
+ kern_return_t kr = zleak_activate();
+ if (kr != KERN_SUCCESS) {
+ printf("Failed to activate live zone leak debugging (%d).\n", kr);
+ }
+}
+
+static inline void
+zleak_track_if_needed(zone_t z)
+{
+ if (__improbable(zleak_state & ZLEAK_STATE_ACTIVE)) {
+ if (!z->zleak_on &&
+ zone_size_wired(z) >= zleak_per_zone_tracking_threshold) {
+ z->zleak_on = true;
+ }
+ }
+}
+
+/*
+ * TODO: What about allocations that never get deallocated,
+ * especially ones with unique backtraces? Should we wait to record
+ * until after boot has completed?
+ * (How many persistent zallocs are there?)
+ */
+
/*
- * Clear out the old next pointer and backup to avoid leaking the zone
- * poisoning cookie and so that only values on the freelist have a valid
- * cookie.
+ * This function records the allocation in the allocations table,
+ * and stores the associated backtrace in the traces table
+ * (or just increments the refcount if the trace is already recorded)
+ * If the allocation slot is in use, the old allocation is replaced with the new allocation, and
+ * the associated trace's refcount is decremented.
+ * If the trace slot is in use, it returns.
+ * The refcount is incremented by the amount of memory the allocation consumes.
+ * The return value indicates whether to try again next time.
*/
-void
-zone_clear_freelist_pointers(zone_t zone, vm_offset_t addr)
+static boolean_t
+zleak_log(uintptr_t* bt,
+ uintptr_t addr,
+ uint32_t depth,
+ vm_size_t allocation_size)
+{
+ /* Quit if there's someone else modifying the hash tables */
+ if (!lck_spin_try_lock(&zleak_lock)) {
+ z_total_conflicts++;
+ return FALSE;
+ }
+
+ struct zallocation* allocation = &zallocations[hashaddr(addr, zleak_alloc_buckets)];
+
+ uint32_t trace_index = hashbacktrace(bt, depth, zleak_trace_buckets);
+ struct ztrace* trace = &ztraces[trace_index];
+
+ allocation->za_hit_count++;
+ trace->zt_hit_count++;
+
+ /*
+ * If the allocation bucket we want to be in is occupied, and if the occupier
+ * has the same trace as us, just bail.
+ */
+ if (allocation->za_element != (uintptr_t) 0 && trace_index == allocation->za_trace_index) {
+ z_alloc_collisions++;
+
+ lck_spin_unlock(&zleak_lock);
+ return TRUE;
+ }
+
+ /* STEP 1: Store the backtrace in the traces array. */
+ /* A size of zero indicates that the trace bucket is free. */
+
+ if (trace->zt_size > 0 && bcmp(trace->zt_stack, bt, (depth * sizeof(uintptr_t))) != 0) {
+ /*
+ * Different unique trace with same hash!
+ * Just bail - if we're trying to record the leaker, hopefully the other trace will be deallocated
+ * and get out of the way for later chances
+ */
+ trace->zt_collisions++;
+ z_trace_collisions++;
+
+ lck_spin_unlock(&zleak_lock);
+ return TRUE;
+ } else if (trace->zt_size > 0) {
+ /* Same trace, already added, so increment refcount */
+ trace->zt_size += allocation_size;
+ } else {
+ /* Found an unused trace bucket, record the trace here! */
+ if (trace->zt_depth != 0) { /* if this slot was previously used but not currently in use */
+ z_trace_overwrites++;
+ }
+
+ z_trace_recorded++;
+ trace->zt_size = allocation_size;
+ memcpy(trace->zt_stack, bt, (depth * sizeof(uintptr_t)));
+
+ trace->zt_depth = depth;
+ trace->zt_collisions = 0;
+ }
+
+ /* STEP 2: Store the allocation record in the allocations array. */
+
+ if (allocation->za_element != (uintptr_t) 0) {
+ /*
+ * Straight up replace any allocation record that was there. We don't want to do the work
+ * to preserve the allocation entries that were there, because we only record a subset of the
+ * allocations anyways.
+ */
+
+ z_alloc_collisions++;
+
+ struct ztrace* associated_trace = &ztraces[allocation->za_trace_index];
+ /* Knock off old allocation's size, not the new allocation */
+ associated_trace->zt_size -= allocation->za_size;
+ } else if (allocation->za_trace_index != 0) {
+ /* Slot previously used but not currently in use */
+ z_alloc_overwrites++;
+ }
+
+ allocation->za_element = addr;
+ allocation->za_trace_index = trace_index;
+ allocation->za_size = allocation_size;
+
+ z_alloc_recorded++;
+
+ if (top_ztrace->zt_size < trace->zt_size) {
+ top_ztrace = trace;
+ }
+
+ lck_spin_unlock(&zleak_lock);
+ return TRUE;
+}
+
+/*
+ * Free the allocation record and release the stacktrace.
+ * This should be as fast as possible because it will be called for every free.
+ */
+__attribute__((noinline))
+static void
+zleak_free(uintptr_t addr,
+ vm_size_t allocation_size)
{
- vm_offset_t perm_value = 0;
+ if (addr == (uintptr_t) 0) {
+ return;
+ }
+
+ struct zallocation* allocation = &zallocations[hashaddr(addr, zleak_alloc_buckets)];
+
+ /* Double-checked locking: check to find out if we're interested, lock, check to make
+ * sure it hasn't changed, then modify it, and release the lock.
+ */
+
+ if (allocation->za_element == addr && allocation->za_trace_index < zleak_trace_buckets) {
+ /* if the allocation was the one, grab the lock, check again, then delete it */
+ lck_spin_lock(&zleak_lock);
+
+ if (allocation->za_element == addr && allocation->za_trace_index < zleak_trace_buckets) {
+ struct ztrace *trace;
+
+ /* allocation_size had better match what was passed into zleak_log - otherwise someone is freeing into the wrong zone! */
+ if (allocation->za_size != allocation_size) {
+ panic("Freeing as size %lu memory that was allocated with size %lu\n",
+ (uintptr_t)allocation_size, (uintptr_t)allocation->za_size);
+ }
+
+ trace = &ztraces[allocation->za_trace_index];
+
+ /* size of 0 indicates trace bucket is unused */
+ if (trace->zt_size > 0) {
+ trace->zt_size -= allocation_size;
+ }
- if (!zone->zfree_clear_mem) {
- perm_value = ZONE_POISON;
+ /* A NULL element means the allocation bucket is unused */
+ allocation->za_element = 0;
+ }
+ lck_spin_unlock(&zleak_lock);
}
+}
- vm_offset_t *primary = (vm_offset_t *) addr;
- vm_offset_t *backup = get_backup_ptr(zone_elem_size(zone), primary);
-
- *primary = perm_value;
- *backup = perm_value;
+#else
+static inline void
+zleak_activate_if_needed(void)
+{
}
-#if ZALLOC_ENABLE_POISONING
-__abortlike
-static void
-zone_element_not_clear_panic(zone_t zone, void *addr)
+static inline void
+zleak_track_if_needed(__unused zone_t z)
{
- panic("Zone element %p was modified after free for zone %s%s: "
- "Expected element to be cleared", addr, zone_heap_name(zone),
- zone->z_name);
}
+#endif /* CONFIG_ZLEAKS */
+#if ZONE_ENABLE_LOGGING || CONFIG_ZLEAKS
-/*
- * Validate that the element was not tampered with while it was in the
- * freelist.
- */
-void
-zalloc_validate_element(zone_t zone, vm_offset_t addr, vm_size_t size, bool validate)
+__attribute__((noinline))
+static void
+zalloc_log_or_trace_leaks(zone_t zone, vm_offset_t addr, void *fp)
{
- if (zone->percpu) {
- assert(zone->zfree_clear_mem);
- zpercpu_foreach_cpu(i) {
- if (memcmp_zero_ptr_aligned((void *)(addr + ptoa(i)), size)) {
- zone_element_not_clear_panic(zone, (void *)(addr + ptoa(i)));
+ uintptr_t zbt[MAX_ZTRACE_DEPTH]; /* used in zone leak logging and zone leak detection */
+ unsigned int numsaved = 0;
+
+#if ZONE_ENABLE_LOGGING
+ if (DO_LOGGING(zone)) {
+ numsaved = backtrace(zbt, MAX_ZTRACE_DEPTH, NULL);
+ btlog_add_entry(zone->zlog_btlog, (void *)addr,
+ ZOP_ALLOC, (void **)zbt, numsaved);
+ }
+#endif /* ZONE_ENABLE_LOGGING */
+
+#if CONFIG_ZLEAKS
+ /*
+ * Zone leak detection: capture a backtrace every zleak_sample_factor
+ * allocations in this zone.
+ */
+ if (__improbable(zone->zleak_on)) {
+ if (sample_counter(&zone->zleak_capture, zleak_sample_factor)) {
+ /* Avoid backtracing twice if zone logging is on */
+ if (numsaved == 0) {
+ numsaved = backtrace_frame(zbt, MAX_ZTRACE_DEPTH, fp, NULL);
+ }
+ /* Sampling can fail if another sample is happening at the same time in a different zone. */
+ if (!zleak_log(zbt, addr, numsaved, zone_elem_size(zone))) {
+ /* If it failed, roll back the counter so we sample the next allocation instead. */
+ zone->zleak_capture = zleak_sample_factor;
}
}
- } else if (zone->zfree_clear_mem) {
- if (memcmp_zero_ptr_aligned((void *)addr, size)) {
- zone_element_not_clear_panic(zone, (void *)addr);
- }
- } else if (__improbable(validate)) {
- const vm_offset_t *p = (vm_offset_t *)addr;
- const vm_offset_t *end = (vm_offset_t *)(addr + size);
+ }
- for (; p < end; p++) {
- if (*p != ZONE_POISON) {
- zone_element_was_modified_panic(zone, addr,
- *p, ZONE_POISON, (vm_offset_t)p - addr);
- }
+ if (__improbable(zone_leaks_scan_enable &&
+ !(zone_elem_size(zone) & (sizeof(uintptr_t) - 1)))) {
+ unsigned int count, idx;
+ /* Fill element, from tail, with backtrace in reverse order */
+ if (numsaved == 0) {
+ numsaved = backtrace_frame(zbt, MAX_ZTRACE_DEPTH, fp, NULL);
}
- } else {
- /*
- * If element wasn't poisoned or entirely cleared, validate that the
- * minimum bytes that were cleared on free haven't been corrupted.
- * addr is advanced by ptr size as we have already validated and cleared
- * the freelist pointer/zcache canary.
- */
- if (memcmp_zero_ptr_aligned((void *) (addr + sizeof(vm_offset_t)),
- zp_min_size - sizeof(vm_offset_t))) {
- zone_element_not_clear_panic(zone, (void *)addr);
+ count = (unsigned int)(zone_elem_size(zone) / sizeof(uintptr_t));
+ if (count >= numsaved) {
+ count = numsaved - 1;
+ }
+ for (idx = 0; idx < count; idx++) {
+ ((uintptr_t *)addr)[count - 1 - idx] = zbt[idx + 1];
}
}
+#endif /* CONFIG_ZLEAKS */
}
-#endif /* ZALLOC_ENABLE_POISONING */
-#pragma mark Zone Leak Detection
+static inline bool
+zalloc_should_log_or_trace_leaks(zone_t zone, vm_size_t elem_size)
+{
+#if ZONE_ENABLE_LOGGING
+ if (DO_LOGGING(zone)) {
+ return true;
+ }
+#endif /* ZONE_ENABLE_LOGGING */
+#if CONFIG_ZLEAKS
+ /*
+ * Zone leak detection: capture a backtrace every zleak_sample_factor
+ * allocations in this zone.
+ */
+ if (zone->zleak_on) {
+ return true;
+ }
+ if (zone_leaks_scan_enable && !(elem_size & (sizeof(uintptr_t) - 1))) {
+ return true;
+ }
+#endif /* CONFIG_ZLEAKS */
+ return false;
+}
-/*
- * Zone leak debugging code
- *
- * When enabled, this code keeps a log to track allocations to a particular zone that have not
- * yet been freed. Examining this log will reveal the source of a zone leak. The log is allocated
- * only when logging is enabled, so there is no effect on the system when it's turned off. Logging is
- * off by default.
- *
- * Enable the logging via the boot-args. Add the parameter "zlog=<zone>" to boot-args where <zone>
- * is the name of the zone you wish to log.
- *
- * This code only tracks one zone, so you need to identify which one is leaking first.
- * Generally, you'll know you have a leak when you get a "zalloc retry failed 3" panic from the zone
- * garbage collector. Note that the zone name printed in the panic message is not necessarily the one
- * containing the leak. So do a zprint from gdb and locate the zone with the bloated size. This
- * is most likely the problem zone, so set zlog in boot-args to this zone name, reboot and re-run the test. The
- * next time it panics with this message, examine the log using the kgmacros zstack, findoldest and countpcs.
- * See the help in the kgmacros for usage info.
- *
- *
- * Zone corruption logging
- *
- * Logging can also be used to help identify the source of a zone corruption. First, identify the zone
- * that is being corrupted, then add "-zc zlog=<zone name>" to the boot-args. When -zc is used in conjunction
- * with zlog, it changes the logging style to track both allocations and frees to the zone. So when the
- * corruption is detected, examining the log will show you the stack traces of the callers who last allocated
- * and freed any particular element in the zone. Use the findelem kgmacro with the address of the element that's been
- * corrupted to examine its history. This should lead to the source of the corruption.
- */
+#endif /* ZONE_ENABLE_LOGGING || CONFIG_ZLEAKS */
+#if ZONE_ENABLE_LOGGING
-/* Returns TRUE if we rolled over the counter at factor */
-__header_always_inline bool
-sample_counter(volatile uint32_t *count_p, uint32_t factor)
+__attribute__((noinline))
+static void
+zfree_log_trace(zone_t zone, vm_offset_t addr, void *fp)
{
- uint32_t old_count, new_count = 0;
- if (count_p != NULL) {
- os_atomic_rmw_loop(count_p, old_count, new_count, relaxed, {
- new_count = old_count + 1;
- if (new_count >= factor) {
- new_count = 0;
- }
- });
+ /*
+ * See if we're doing logging on this zone.
+ *
+ * There are two styles of logging used depending on
+ * whether we're trying to catch a leak or corruption.
+ */
+ if (__improbable(DO_LOGGING(zone))) {
+ if (corruption_debug_flag) {
+ uintptr_t zbt[MAX_ZTRACE_DEPTH];
+ unsigned int numsaved;
+ /*
+ * We're logging to catch a corruption.
+ *
+ * Add a record of this zfree operation to log.
+ */
+ numsaved = backtrace_frame(zbt, MAX_ZTRACE_DEPTH, fp, NULL);
+ btlog_add_entry(zone->zlog_btlog, (void *)addr, ZOP_FREE,
+ (void **)zbt, numsaved);
+ } else {
+ /*
+ * We're logging to catch a leak.
+ *
+ * Remove any record we might have for this element
+ * since it's being freed. Note that we may not find it
+ * if the buffer overflowed and that's OK.
+ *
+ * Since the log is of a limited size, old records get
+ * overwritten if there are more zallocs than zfrees.
+ */
+ btlog_remove_entries_for_element(zone->zlog_btlog, (void *)addr);
+ }
}
+}
- return new_count == 0;
+#endif /* ZONE_ENABLE_LOGGING */
+
+/* These functions outside of CONFIG_ZLEAKS because they are also used in
+ * mbuf.c for mbuf leak-detection. This is why they lack the z_ prefix.
+ */
+
+/* "Thomas Wang's 32/64 bit mix functions." http://www.concentric.net/~Ttwang/tech/inthash.htm */
+uintptr_t
+hash_mix(uintptr_t x)
+{
+#ifndef __LP64__
+ x += ~(x << 15);
+ x ^= (x >> 10);
+ x += (x << 3);
+ x ^= (x >> 6);
+ x += ~(x << 11);
+ x ^= (x >> 16);
+#else
+ x += ~(x << 32);
+ x ^= (x >> 22);
+ x += ~(x << 13);
+ x ^= (x >> 8);
+ x += (x << 3);
+ x ^= (x >> 15);
+ x += ~(x << 27);
+ x ^= (x >> 31);
+#endif
+ return x;
}
-#if ZONE_ENABLE_LOGGING
-/* Log allocations and frees to help debug a zone element corruption */
-TUNABLE(bool, corruption_debug_flag, "-zc", false);
+uint32_t
+hashbacktrace(uintptr_t* bt, uint32_t depth, uint32_t max_size)
+{
+ uintptr_t hash = 0;
+ uintptr_t mask = max_size - 1;
-#define MAX_NUM_ZONES_ALLOWED_LOGGING 10 /* Maximum 10 zones can be logged at once */
+ while (depth) {
+ hash += bt[--depth];
+ }
-static int max_num_zones_to_log = MAX_NUM_ZONES_ALLOWED_LOGGING;
-static int num_zones_logged = 0;
+ hash = hash_mix(hash) & mask;
+
+ assert(hash < max_size);
+
+ return (uint32_t) hash;
+}
/*
- * The number of records in the log is configurable via the zrecs parameter in boot-args. Set this to
- * the number of records you want in the log. For example, "zrecs=10" sets it to 10 records. Since this
- * is the number of stacks suspected of leaking, we don't need many records.
+ * TODO: Determine how well distributed this is
+ * max_size must be a power of 2. i.e 0x10000 because 0x10000-1 is 0x0FFFF which is a great bitmask
+ */
+uint32_t
+hashaddr(uintptr_t pt, uint32_t max_size)
+{
+ uintptr_t hash = 0;
+ uintptr_t mask = max_size - 1;
+
+ hash = hash_mix(pt) & mask;
+
+ assert(hash < max_size);
+
+ return (uint32_t) hash;
+}
+
+#endif /* !ZALLOC_TEST */
+#pragma mark zone (re)fill
+#if !ZALLOC_TEST
+
+/*!
+ * @defgroup Zone Refill
+ * @{
+ *
+ * @brief
+ * Functions handling The zone refill machinery.
+ *
+ * @discussion
+ * Zones are refilled based on 3 mechanisms: direct expansion, async expansion,
+ * VM-specific replenishment. Zones using VM-specific replenishment are marked
+ * with the @c z_replenishes property set.
+ *
+ * @c zalloc_ext() is the codepath that kicks the zone refill when the zone is
+ * dropping below half of its @c z_elems_rsv (0 for most zones) and will:
+ *
+ * - call @c zone_expand_locked() directly if the caller is allowed to block,
+ *
+ * - wakeup the asynchroous expansion thread call if the caller is not allowed
+ * to block.
+ *
+ * - call @c zone_replenish_locked() to kick the replenish state machine.
+ *
+ *
+ * <h2>Synchronous expansion</h2>
+ *
+ * This mechanism is actually the only one that may refill a zone, and all the
+ * other ones funnel through this one eventually.
+ *
+ * @c zone_expand_locked() implements the core of the expansion mechanism,
+ * and will do so while a caller specified predicate is true.
+ *
+ * Zone expansion allows for up to 2 threads to concurrently refill the zone:
+ * - one VM privileged thread,
+ * - one regular thread.
+ *
+ * Regular threads that refill will put down their identity in @c z_expander,
+ * so that priority inversion avoidance can be implemented.
+ *
+ * However, VM privileged threads are allowed to use VM page reserves,
+ * which allows for the system to recover from extreme memory pressure
+ * situations, allowing for the few allocations that @c zone_gc() or
+ * killing processes require.
+ *
+ * When a VM privileged thread is also expanding, the @c z_expander_vm_priv bit
+ * is set. @c z_expander is not necessarily the identity of this VM privileged
+ * thread (it is if the VM privileged thread came in first, but wouldn't be, and
+ * could even be @c THREAD_NULL otherwise).
+ *
+ * Note that the pageout-scan daemon might be BG and is VM privileged. To avoid
+ * spending a whole pointer on priority inheritance for VM privileged threads
+ * (and other issues related to having two owners), we use the rwlock boost as
+ * a stop gap to avoid priority inversions.
+ *
+ *
+ * <h2>Chunk wiring policies</h2>
+ *
+ * Zones allocate memory in chunks of @c zone_t::z_chunk_pages pages at a time
+ * to try to minimize fragmentation relative to element sizes not aligning with
+ * a chunk size well. However, this can grow large and be hard to fulfill on
+ * a system under a lot of memory pressure (chunks can be as long as 8 pages on
+ * 4k page systems).
+ *
+ * This is why, when under memory pressure the system allows chunks to be
+ * partially populated. The metadata of the first page in the chunk maintains
+ * the count of actually populated pages.
+ *
+ * The metadata for addresses assigned to a zone are found of 4 queues:
+ * - @c z_pageq_empty has chunk heads with populated pages and no allocated
+ * elements (those can be targeted by @c zone_gc()),
+ * - @c z_pageq_partial has chunk heads with populated pages that are partially
+ * used,
+ * - @c z_pageq_full has chunk heads with populated pages with no free elements
+ * left,
+ * - @c z_pageq_va has either chunk heads for sequestered VA space assigned to
+ * the zone forever (if @c z_va_sequester is enabled), or the first secondary
+ * metadata for a chunk whose corresponding page is not populated in the
+ * chunk.
+ *
+ * When new pages need to be wired/populated, chunks from the @c z_pageq_va
+ * queues are preferred.
+ *
+ *
+ * <h2>Asynchronous expansion</h2>
+ *
+ * This mechanism allows for refilling zones used mostly with non blocking
+ * callers. It relies on a thread call (@c zone_expand_callout) which will
+ * iterate all zones and refill the ones marked with @c z_async_refilling.
+ *
+ * NOTE: If the calling thread for zalloc_noblock is lower priority than
+ * the thread_call, then zalloc_noblock to an empty zone may succeed.
+ *
+ *
+ * <h2>Dealing with zone allocations from the mach VM code</h2>
+ *
+ * The implementation of the mach VM itself uses the zone allocator
+ * for things like the vm_map_entry data structure. In order to prevent
+ * an infinite recursion problem when adding more pages to a zone, @c zalloc
+ * uses a replenish thread to refill the VM layer's zones before they have
+ * too few remaining free entries. The reserved remaining free entries
+ * guarantee that the VM routines can get entries from already mapped pages.
+ *
+ * In order for that to work, the amount of allocations in the nested
+ * case have to be bounded. There are currently 2 replenish zones, and
+ * if each needs 1 element of each zone to add a new page to itself, that
+ * gives us a minumum reserve of 2 elements.
+ *
+ * There is also a deadlock issue with the zone garbage collection thread,
+ * or any thread that is trying to free zone pages. While holding
+ * the kernel's map lock they may need to allocate new VM map entries, hence
+ * we need enough reserve to allow them to get past the point of holding the
+ * map lock. After freeing that page, the GC thread will wait in
+ * @c zone_reclaim() until the replenish threads can finish.
+ * Since there's only 1 GC thread at a time, that adds a minimum of 1 to the
+ * reserve size.
+ *
+ * Since the minumum amount you can add to a zone is 1 page,
+ * we'll use 16K (from ARM) as the refill size on all platforms.
+ *
+ * When a refill zone drops to half that available, i.e. REFILL_SIZE / 2,
+ * @c zalloc_ext() will wake the replenish thread. The replenish thread runs
+ * until at least REFILL_SIZE worth of free elements exist, before sleeping again.
+ * In the meantime threads may continue to use the reserve until there are only
+ * REFILL_SIZE / 4 elements left. Below that point only the replenish threads
+ * themselves and the GC thread may continue to use from the reserve.
*/
-#if defined(__LP64__)
-#define ZRECORDS_MAX 2560 /* Max records allowed in the log */
-#else
-#define ZRECORDS_MAX 1536 /* Max records allowed in the log */
-#endif
-#define ZRECORDS_DEFAULT 1024 /* default records in log if zrecs is not specificed in boot-args */
-
-static TUNABLE(uint32_t, log_records, "zrecs", ZRECORDS_DEFAULT);
+static thread_call_data_t zone_expand_callout;
-static void
-zone_enable_logging(zone_t z)
+static inline kma_flags_t
+zone_kma_flags(zone_t z, zalloc_flags_t flags)
{
- z->zlog_btlog = btlog_create(log_records, MAX_ZTRACE_DEPTH,
- (corruption_debug_flag == FALSE) /* caller_will_remove_entries_for_element? */);
+ kma_flags_t kmaflags = KMA_KOBJECT | KMA_ZERO;
- if (z->zlog_btlog) {
- printf("zone: logging started for zone %s%s\n",
- zone_heap_name(z), z->z_name);
- } else {
- printf("zone: couldn't allocate memory for zrecords, turning off zleak logging\n");
- z->zone_logging = false;
+ if (z->z_noencrypt) {
+ kmaflags |= KMA_NOENCRYPT;
+ }
+ if (flags & Z_NOPAGEWAIT) {
+ kmaflags |= KMA_NOPAGEWAIT;
+ }
+ if (z->z_permanent || (!z->z_destructible && z->z_va_sequester)) {
+ kmaflags |= KMA_PERMANENT;
}
+ if (z->z_submap_idx == Z_SUBMAP_IDX_GENERAL &&
+ z->kalloc_heap != KHEAP_ID_NONE) {
+ kmaflags |= KMA_KHEAP;
+ }
+
+ return kmaflags;
}
-/**
- * @function zone_setup_logging
+/*!
+ * @function zcram_and_lock()
*
- * @abstract
- * Optionally sets up a zone for logging.
+ * @brief
+ * Prepare some memory for being usable for allocation purposes.
*
* @discussion
- * We recognized two boot-args:
+ * Prepare memory in <code>[addr + ptoa(pg_start), addr + ptoa(pg_end))</code>
+ * to be usable in the zone.
*
- * zlog=<zone_to_log>
- * zrecs=<num_records_in_log>
+ * This function assumes the metadata is already populated for the range.
*
- * The zlog arg is used to specify the zone name that should be logged,
- * and zrecs is used to control the size of the log.
+ * Calling this function with @c pg_start being 0 means that the memory
+ * is either a partial chunk, or a full chunk, that isn't published anywhere
+ * and the initialization can happen without locks held.
*
- * If zrecs is not specified, a default value is used.
+ * Calling this function with a non zero @c pg_start means that we are extending
+ * an existing chunk: the memory in <code>[addr, addr + ptoa(pg_start))</code>,
+ * is already usable and published in the zone, so extending it requires holding
+ * the zone lock.
+ *
+ * @param zone The zone to cram new populated pages into
+ * @param addr The base address for the chunk(s)
+ * @param pg_va_new The number of virtual pages newly assigned to the zone
+ * @param pg_start The first newly populated page relative to @a addr.
+ * @param pg_end The after-last newly populated page relative to @a addr.
+ * @param kind The kind of memory assigned to the zone.
*/
static void
-zone_setup_logging(zone_t z)
+zcram_and_lock(zone_t zone, vm_offset_t addr, uint32_t pg_va_new,
+ uint32_t pg_start, uint32_t pg_end, zone_addr_kind_t kind)
{
- char zone_name[MAX_ZONE_NAME]; /* Temp. buffer for the zone name */
- char zlog_name[MAX_ZONE_NAME]; /* Temp. buffer to create the strings zlog1, zlog2 etc... */
- char zlog_val[MAX_ZONE_NAME]; /* the zone name we're logging, if any */
+ zone_id_t zindex = zone_index(zone);
+ vm_offset_t elem_size = zone_elem_size(zone);
+ uint32_t free_start = 0, free_end = 0;
- /*
- * Don't allow more than ZRECORDS_MAX records even if the user asked for more.
- *
- * This prevents accidentally hogging too much kernel memory
- * and making the system unusable.
- */
- if (log_records > ZRECORDS_MAX) {
- log_records = ZRECORDS_MAX;
- }
+ struct zone_page_metadata *meta = zone_meta_from_addr(addr);
+ uint32_t chunk_pages = zone->z_chunk_pages;
- /*
- * Append kalloc heap name to zone name (if zone is used by kalloc)
- */
- snprintf(zone_name, MAX_ZONE_NAME, "%s%s", zone_heap_name(z), z->z_name);
+ assert(pg_start < pg_end && pg_end <= chunk_pages);
- /* zlog0 isn't allowed. */
- for (int i = 1; i <= max_num_zones_to_log; i++) {
- snprintf(zlog_name, MAX_ZONE_NAME, "zlog%d", i);
+ if (pg_start == 0) {
+ uint16_t chunk_len = (uint16_t)pg_end;
+ uint16_t secondary_len = ZM_SECONDARY_PAGE;
+ bool inline_bitmap = false;
- if (PE_parse_boot_argn(zlog_name, zlog_val, sizeof(zlog_val)) &&
- track_this_zone(zone_name, zlog_val)) {
- z->zone_logging = true;
- num_zones_logged++;
- break;
+ if (zone->z_percpu) {
+ chunk_len = 1;
+ secondary_len = ZM_SECONDARY_PCPU_PAGE;
+ assert(pg_end == zpercpu_count());
+ }
+ if (!zone->z_permanent) {
+ inline_bitmap = zone->z_chunk_elems <= 32 * chunk_pages;
+ }
+
+ meta[0] = (struct zone_page_metadata){
+ .zm_index = zindex,
+ .zm_inline_bitmap = inline_bitmap,
+ .zm_chunk_len = chunk_len,
+ };
+ if (kind == ZONE_ADDR_FOREIGN) {
+ /* Never hit z_pageq_empty */
+ meta[0].zm_alloc_size = ZM_ALLOC_SIZE_LOCK;
+ }
+
+ for (uint16_t i = 1; i < chunk_pages; i++) {
+ meta[i] = (struct zone_page_metadata){
+ .zm_index = zindex,
+ .zm_inline_bitmap = inline_bitmap,
+ .zm_chunk_len = secondary_len,
+ .zm_page_index = i,
+ };
+ }
+
+ free_end = (uint32_t)ptoa(chunk_len) / elem_size;
+ if (!zone->z_permanent) {
+ zone_meta_bits_init(meta, free_end, zone->z_chunk_elems);
}
+ } else {
+ assert(!zone->z_percpu && !zone->z_permanent);
+
+ free_end = (uint32_t)ptoa(pg_end) / elem_size;
+ free_start = (uint32_t)ptoa(pg_start) / elem_size;
+ }
+
+#if VM_MAX_TAG_ZONES
+ if (__improbable(zone->tags)) {
+ assert(kind == ZONE_ADDR_NATIVE && !zone->z_percpu);
+ ztMemoryAdd(zone, addr + ptoa(pg_start),
+ ptoa(pg_end - pg_start));
}
+#endif /* VM_MAX_TAG_ZONES */
/*
- * Backwards compat. with the old boot-arg used to specify single zone
- * logging i.e. zlog Needs to happen after the newer zlogn checks
- * because the prefix will match all the zlogn
- * boot-args.
+ * Insert the initialized pages / metadatas into the right lists.
*/
- if (!z->zone_logging &&
- PE_parse_boot_argn("zlog", zlog_val, sizeof(zlog_val)) &&
- track_this_zone(zone_name, zlog_val)) {
- z->zone_logging = true;
- num_zones_logged++;
+
+ zone_lock(zone);
+ assert(zone->z_self == zone);
+
+ if (pg_start != 0) {
+ assert(meta->zm_chunk_len == pg_start);
+
+ zone_meta_bits_merge(meta, free_start, free_end);
+ meta->zm_chunk_len = (uint16_t)pg_end;
+
+ /*
+ * consume the zone_meta_lock_in_partial()
+ * done in zone_expand_locked()
+ */
+ zone_meta_alloc_size_sub(zone, meta, ZM_ALLOC_SIZE_LOCK);
+ zone_meta_remqueue(zone, meta);
}
+ if (zone->z_permanent || meta->zm_alloc_size) {
+ zone_meta_queue_push(zone, &zone->z_pageq_partial, meta);
+ } else {
+ zone_meta_queue_push(zone, &zone->z_pageq_empty, meta);
+ zone->z_wired_empty += zone->z_percpu ? 1 : pg_end;
+ }
+ if (pg_end < chunk_pages) {
+ /* push any non populated residual VA on z_pageq_va */
+ zone_meta_queue_push(zone, &zone->z_pageq_va, meta + pg_end);
+ }
- /*
- * If we want to log a zone, see if we need to allocate buffer space for
- * the log.
- *
- * Some vm related zones are zinit'ed before we can do a kmem_alloc, so
- * we have to defer allocation in that case.
- *
- * zone_init() will finish the job.
- *
- * If we want to log one of the VM related zones that's set up early on,
- * we will skip allocation of the log until zinit is called again later
- * on some other zone.
- */
- if (z->zone_logging && startup_phase >= STARTUP_SUB_KMEM_ALLOC) {
- zone_enable_logging(z);
+ zone_elems_free_add(zone, free_end - free_start);
+ zone->z_elems_avail += free_end - free_start;
+ zone->z_wired_cur += zone->z_percpu ? 1 : pg_end - pg_start;
+ if (pg_va_new) {
+ zone->z_va_cur += zone->z_percpu ? 1 : pg_va_new;
+ }
+ if (zone->z_wired_hwm < zone->z_wired_cur) {
+ zone->z_wired_hwm = zone->z_wired_cur;
}
+
+ os_atomic_add(&zones_phys_page_mapped_count, pg_end - pg_start, relaxed);
}
-/*
- * Each record in the log contains a pointer to the zone element it refers to,
- * and a small array to hold the pc's from the stack trace. A
- * record is added to the log each time a zalloc() is done in the zone_of_interest. For leak debugging,
- * the record is cleared when a zfree() is done. For corruption debugging, the log tracks both allocs and frees.
- * If the log fills, old records are replaced as if it were a circular buffer.
- */
+static void
+zcram(zone_t zone, vm_offset_t addr, uint32_t pages, zone_addr_kind_t kind)
+{
+ uint32_t chunk_pages = zone->z_chunk_pages;
+ assert(pages % chunk_pages == 0);
+ for (; pages > 0; pages -= chunk_pages, addr += ptoa(chunk_pages)) {
+ zcram_and_lock(zone, addr, chunk_pages, 0, chunk_pages, kind);
+ zone_unlock(zone);
+ }
+}
-/*
- * Decide if we want to log this zone by doing a string compare between a zone name and the name
- * of the zone to log. Return true if the strings are equal, false otherwise. Because it's not
- * possible to include spaces in strings passed in via the boot-args, a period in the logname will
- * match a space in the zone name.
- */
+void
+zone_cram_foreign(zone_t zone, vm_offset_t newmem, vm_size_t size)
+{
+ uint32_t pages = (uint32_t)atop(size);
-/*
- * Test if we want to log this zalloc/zfree event. We log if this is the zone we're interested in and
- * the buffer for the records has been allocated.
- */
+ if (!from_zone_map(newmem, size, ZONE_ADDR_FOREIGN)) {
+ panic("zone_cram_foreign: foreign memory [%p] being crammed is "
+ "outside of expected range", (void *)newmem);
+ }
+ if (!zone->z_allows_foreign) {
+ panic("zone_cram_foreign: foreign memory [%p] being crammed in "
+ "zone '%s%s' not expecting it", (void *)newmem,
+ zone_heap_name(zone), zone_name(zone));
+ }
+ if (size % ptoa(zone->z_chunk_pages)) {
+ panic("zone_cram_foreign: foreign memory [%p] being crammed has "
+ "invalid size %zx", (void *)newmem, (size_t)size);
+ }
+ if (startup_phase >= STARTUP_SUB_ZALLOC) {
+ panic("zone_cram_foreign: foreign memory [%p] being crammed "
+ "after zalloc is initialized", (void *)newmem);
+ }
-#define DO_LOGGING(z) (z->zlog_btlog != NULL)
-#else /* !ZONE_ENABLE_LOGGING */
-#define DO_LOGGING(z) 0
-#endif /* !ZONE_ENABLE_LOGGING */
+ bzero((void *)newmem, size);
+ zcram(zone, newmem, pages, ZONE_ADDR_FOREIGN);
+}
+
+void
+zone_fill_initially(zone_t zone, vm_size_t nelems)
+{
+ kma_flags_t kmaflags;
+ kern_return_t kr;
+ vm_offset_t addr;
+ uint32_t pages;
+
+ assert(!zone->z_permanent && !zone->collectable && !zone->z_destructible);
+ assert(zone->z_elems_avail == 0);
+
+ kmaflags = zone_kma_flags(zone, Z_WAITOK) | KMA_PERMANENT;
+ pages = zone_alloc_pages_for_nelems(zone, nelems);
+ kr = kernel_memory_allocate(zone_submap(zone), &addr, ptoa(pages),
+ 0, kmaflags, VM_KERN_MEMORY_ZONE);
+ if (kr != KERN_SUCCESS) {
+ panic("kernel_memory_allocate() of %u pages failed", pages);
+ }
+
+ zone_meta_populate(addr, ptoa(pages));
+ zcram(zone, addr, pages, ZONE_ADDR_NATIVE);
+}
+
+static vm_offset_t
+zone_allocate_va(zone_t z, zalloc_flags_t flags)
+{
+ kma_flags_t kmaflags = zone_kma_flags(z, flags) | KMA_VAONLY;
+ vm_size_t size = ptoa(z->z_chunk_pages);
+ kern_return_t kr;
+ vm_offset_t addr;
+
+ kr = kernel_memory_allocate(zone_submap(z), &addr, size, 0,
+ kmaflags, VM_KERN_MEMORY_ZONE);
+
+#if !__LP64__
+ if (kr == KERN_NO_SPACE && z->z_replenishes) {
+ /*
+ * On 32bit the zone submaps do not have as much VA
+ * available, so use the VA reserved map for this
+ * purpose.
+ */
+ vm_map_t map = zone_submaps[Z_SUBMAP_IDX_VA_RESERVE];
+ kr = kernel_memory_allocate(map, &addr, size, 0,
+ kmaflags, VM_KERN_MEMORY_ZONE);
+ }
+#endif
+
+ if (kr == KERN_SUCCESS) {
+#if ZALLOC_EARLY_GAPS
+ if (__improbable(zone_caching_disabled < 0)) {
+ zone_allocate_random_early_gap(z);
+ }
+#endif /* ZALLOC_EARLY_GAPS */
+ zone_meta_populate(addr, size);
+ return addr;
+ }
+ panic_include_zprint = TRUE;
#if CONFIG_ZLEAKS
+ if ((zleak_state & ZLEAK_STATE_ACTIVE)) {
+ panic_include_ztrace = TRUE;
+ }
+#endif /* CONFIG_ZLEAKS */
+ zone_t zone_largest = zone_find_largest();
+ panic("zalloc: zone map exhausted while allocating from zone [%s%s], "
+ "likely due to memory leak in zone [%s%s] "
+ "(%luM, %d elements allocated)",
+ zone_heap_name(z), zone_name(z),
+ zone_heap_name(zone_largest), zone_name(zone_largest),
+ (unsigned long)zone_size_wired(zone_largest) >> 20,
+ zone_count_allocated(zone_largest));
+}
-/*
- * The zone leak detector, abbreviated 'zleak', keeps track of a subset of the currently outstanding
- * allocations made by the zone allocator. Every zleak_sample_factor allocations in each zone, we capture a
- * backtrace. Every free, we examine the table and determine if the allocation was being tracked,
- * and stop tracking it if it was being tracked.
- *
- * We track the allocations in the zallocations hash table, which stores the address that was returned from
- * the zone allocator. Each stored entry in the zallocations table points to an entry in the ztraces table, which
- * stores the backtrace associated with that allocation. This provides uniquing for the relatively large
- * backtraces - we don't store them more than once.
- *
- * Data collection begins when the zone map is 50% full, and only occurs for zones that are taking up
- * a large amount of virtual space.
- */
-#define ZLEAK_STATE_ENABLED 0x01 /* Zone leak monitoring should be turned on if zone_map fills up. */
-#define ZLEAK_STATE_ACTIVE 0x02 /* We are actively collecting traces. */
-#define ZLEAK_STATE_ACTIVATING 0x04 /* Some thread is doing setup; others should move along. */
-#define ZLEAK_STATE_FAILED 0x08 /* Attempt to allocate tables failed. We will not try again. */
-uint32_t zleak_state = 0; /* State of collection, as above */
+static bool
+zone_expand_pred_nope(__unused zone_t z)
+{
+ return false;
+}
-boolean_t panic_include_ztrace = FALSE; /* Enable zleak logging on panic */
-vm_size_t zleak_global_tracking_threshold; /* Size of zone map at which to start collecting data */
-vm_size_t zleak_per_zone_tracking_threshold; /* Size a zone will have before we will collect data on it */
-unsigned int zleak_sample_factor = 1000; /* Allocations per sample attempt */
+static inline void
+ZONE_TRACE_VM_KERN_REQUEST_START(vm_size_t size)
+{
+#if DEBUG || DEVELOPMENT
+ VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_START,
+ size, 0, 0, 0);
+#else
+ (void)size;
+#endif
+}
-/*
- * Counters for allocation statistics.
- */
+static inline void
+ZONE_TRACE_VM_KERN_REQUEST_END(uint32_t pages)
+{
+#if DEBUG || DEVELOPMENT
+ task_t task = current_task();
+ if (pages && task) {
+ ledger_credit(task->ledger, task_ledgers.pages_grabbed_kern, pages);
+ }
+ VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END,
+ pages, 0, 0, 0);
+#else
+ (void)pages;
+#endif
+}
+
+static void
+zone_expand_locked(zone_t z, zalloc_flags_t flags, bool (*pred)(zone_t))
+{
+ thread_t self = current_thread();
+ bool vm_priv = (self->options & TH_OPT_VMPRIV);
+ bool clear_vm_priv;
+
+ for (;;) {
+ if (!pred) {
+ /* NULL pred means "try just once" */
+ pred = zone_expand_pred_nope;
+ } else if (!pred(z)) {
+ return;
+ }
+
+ if (vm_priv && !z->z_expander_vm_priv) {
+ /*
+ * Claim the vm priv overcommit slot
+ *
+ * We do not track exact ownership for VM privileged
+ * threads, so use the rwlock boost as a stop-gap
+ * just in case.
+ */
+ set_thread_rwlock_boost();
+ z->z_expander_vm_priv = true;
+ clear_vm_priv = true;
+ } else {
+ clear_vm_priv = false;
+ }
-/* Times two active records want to occupy the same spot */
-unsigned int z_alloc_collisions = 0;
-unsigned int z_trace_collisions = 0;
+ if (z->z_expander == NULL) {
+ z->z_expander = self;
+ break;
+ }
+ if (clear_vm_priv) {
+ break;
+ }
-/* Times a new record lands on a spot previously occupied by a freed allocation */
-unsigned int z_alloc_overwrites = 0;
-unsigned int z_trace_overwrites = 0;
+ if (flags & Z_NOPAGEWAIT) {
+ return;
+ }
-/* Times a new alloc or trace is put into the hash table */
-unsigned int z_alloc_recorded = 0;
-unsigned int z_trace_recorded = 0;
+ z->z_expanding_wait = true;
+ lck_spin_sleep_with_inheritor(&z->z_lock, LCK_SLEEP_DEFAULT,
+ &z->z_expander, z->z_expander,
+ TH_UNINT, TIMEOUT_WAIT_FOREVER);
+ }
-/* Times zleak_log returned false due to not being able to acquire the lock */
-unsigned int z_total_conflicts = 0;
+ do {
+ struct zone_page_metadata *meta = NULL;
+ uint32_t new_va = 0, cur_pages = 0, min_pages = 0, pages = 0;
+ vm_page_t page_list = NULL;
+ vm_offset_t addr = 0;
+ int waited = 0;
-/*
- * Structure for keeping track of an allocation
- * An allocation bucket is in use if its element is not NULL
- */
-struct zallocation {
- uintptr_t za_element; /* the element that was zalloc'ed or zfree'ed, NULL if bucket unused */
- vm_size_t za_size; /* how much memory did this allocation take up? */
- uint32_t za_trace_index; /* index into ztraces for backtrace associated with allocation */
- /* TODO: #if this out */
- uint32_t za_hit_count; /* for determining effectiveness of hash function */
-};
+ /*
+ * While we hold the zone lock, look if there's VA we can:
+ * - complete from partial pages,
+ * - reuse from the sequester list.
+ *
+ * When the page is being populated we pretend we allocated
+ * an extra element so that zone_gc() can't attempt to free
+ * the chunk (as it could become empty while we wait for pages).
+ */
+ if (!zone_pva_is_null(z->z_pageq_va)) {
+ meta = zone_meta_queue_pop_native(z,
+ &z->z_pageq_va, &addr);
+ if (meta->zm_chunk_len == ZM_SECONDARY_PAGE) {
+ cur_pages = meta->zm_page_index;
+ meta -= cur_pages;
+ addr -= ptoa(cur_pages);
+ zone_meta_lock_in_partial(z, meta, cur_pages);
+ }
+ }
+ zone_unlock(z);
-/* Size must be a power of two for the zhash to be able to just mask off bits instead of mod */
-uint32_t zleak_alloc_buckets = CONFIG_ZLEAK_ALLOCATION_MAP_NUM;
-uint32_t zleak_trace_buckets = CONFIG_ZLEAK_TRACE_MAP_NUM;
+ /*
+ * Do the zone leak activation here because zleak_activate()
+ * may block, and can't be done on the way out.
+ *
+ * Trigger jetsams via the vm_pageout_garbage_collect thread if
+ * we're running out of zone memory
+ */
+ zleak_activate_if_needed();
+ if (zone_map_nearing_exhaustion()) {
+ thread_wakeup((event_t)&vm_pageout_garbage_collect);
+ }
-vm_size_t zleak_max_zonemap_size;
+ /*
+ * And now allocate pages to populate our VA.
+ */
+ if (z->z_percpu) {
+ min_pages = z->z_chunk_pages;
+ } else {
+ min_pages = (uint32_t)atop(round_page(zone_elem_size(z)));
+ }
-/* Hashmaps of allocations and their corresponding traces */
-static struct zallocation* zallocations;
-static struct ztrace* ztraces;
+ ZONE_TRACE_VM_KERN_REQUEST_START(ptoa(z->z_chunk_pages - cur_pages));
-/* not static so that panic can see this, see kern/debug.c */
-struct ztrace* top_ztrace;
+ while (pages < z->z_chunk_pages - cur_pages) {
+ vm_page_t m = vm_page_grab();
-/* Lock to protect zallocations, ztraces, and top_ztrace from concurrent modification. */
-LCK_GRP_DECLARE(zleak_lock_grp, "zleak_lock");
-LCK_SPIN_DECLARE(zleak_lock, &zleak_lock_grp);
+ if (m) {
+ pages++;
+ m->vmp_snext = page_list;
+ page_list = m;
+ vm_page_zero_fill(m);
+ continue;
+ }
-/*
- * Initializes the zone leak monitor. Called from zone_init()
- */
-__startup_func
-static void
-zleak_init(vm_size_t max_zonemap_size)
-{
- char scratch_buf[16];
- boolean_t zleak_enable_flag = FALSE;
+ if (pages >= min_pages && (vm_pool_low() || waited)) {
+ break;
+ }
- zleak_max_zonemap_size = max_zonemap_size;
- zleak_global_tracking_threshold = max_zonemap_size / 2;
- zleak_per_zone_tracking_threshold = zleak_global_tracking_threshold / 8;
+ if ((flags & Z_NOPAGEWAIT) == 0) {
+ waited++;
+ VM_PAGE_WAIT();
+ continue;
+ }
-#if CONFIG_EMBEDDED
- if (PE_parse_boot_argn("-zleakon", scratch_buf, sizeof(scratch_buf))) {
- zleak_enable_flag = TRUE;
- printf("zone leak detection enabled\n");
- } else {
- zleak_enable_flag = FALSE;
- printf("zone leak detection disabled\n");
- }
-#else /* CONFIG_EMBEDDED */
- /* -zleakoff (flag to disable zone leak monitor) */
- if (PE_parse_boot_argn("-zleakoff", scratch_buf, sizeof(scratch_buf))) {
- zleak_enable_flag = FALSE;
- printf("zone leak detection disabled\n");
- } else {
- zleak_enable_flag = TRUE;
- printf("zone leak detection enabled\n");
- }
-#endif /* CONFIG_EMBEDDED */
+ /*
+ * Undo everything and bail out:
+ *
+ * - free pages
+ * - undo the fake allocation if any
+ * - put the VA back on the VA page queue.
+ */
+ vm_page_free_list(page_list, FALSE);
+ ZONE_TRACE_VM_KERN_REQUEST_END(pages);
- /* zfactor=XXXX (override how often to sample the zone allocator) */
- if (PE_parse_boot_argn("zfactor", &zleak_sample_factor, sizeof(zleak_sample_factor))) {
- printf("Zone leak factor override: %u\n", zleak_sample_factor);
- }
+ zone_lock(z);
- /* zleak-allocs=XXXX (override number of buckets in zallocations) */
- if (PE_parse_boot_argn("zleak-allocs", &zleak_alloc_buckets, sizeof(zleak_alloc_buckets))) {
- printf("Zone leak alloc buckets override: %u\n", zleak_alloc_buckets);
- /* uses 'is power of 2' trick: (0x01000 & 0x00FFF == 0) */
- if (zleak_alloc_buckets == 0 || (zleak_alloc_buckets & (zleak_alloc_buckets - 1))) {
- printf("Override isn't a power of two, bad things might happen!\n");
+ if (cur_pages) {
+ zone_meta_unlock_from_partial(z, meta, cur_pages);
+ }
+ if (meta) {
+ zone_meta_queue_push(z, &z->z_pageq_va,
+ meta + cur_pages);
+ }
+ goto page_shortage;
}
- }
- /* zleak-traces=XXXX (override number of buckets in ztraces) */
- if (PE_parse_boot_argn("zleak-traces", &zleak_trace_buckets, sizeof(zleak_trace_buckets))) {
- printf("Zone leak trace buckets override: %u\n", zleak_trace_buckets);
- /* uses 'is power of 2' trick: (0x01000 & 0x00FFF == 0) */
- if (zleak_trace_buckets == 0 || (zleak_trace_buckets & (zleak_trace_buckets - 1))) {
- printf("Override isn't a power of two, bad things might happen!\n");
+ /*
+ * If we didn't find pre-allocated VA, then allocate a chunk
+ * of VA here.
+ */
+ if (addr == 0) {
+ addr = zone_allocate_va(z, flags);
+ meta = zone_meta_from_addr(addr);
+ new_va = z->z_chunk_pages;
}
- }
- if (zleak_enable_flag) {
- zleak_state = ZLEAK_STATE_ENABLED;
- }
-}
+ kernel_memory_populate_with_pages(zone_submap(z),
+ addr + ptoa(cur_pages), ptoa(pages), page_list,
+ zone_kma_flags(z, flags), VM_KERN_MEMORY_ZONE);
-/*
- * Support for kern.zleak.active sysctl - a simplified
- * version of the zleak_state variable.
- */
-int
-get_zleak_state(void)
-{
- if (zleak_state & ZLEAK_STATE_FAILED) {
- return -1;
+ ZONE_TRACE_VM_KERN_REQUEST_END(pages);
+
+ zcram_and_lock(z, addr, new_va, cur_pages, cur_pages + pages,
+ ZONE_ADDR_NATIVE);
+ } while (pred(z));
+
+page_shortage:
+ zleak_track_if_needed(z);
+
+ if (clear_vm_priv) {
+ z->z_expander_vm_priv = false;
+ clear_thread_rwlock_boost();
}
- if (zleak_state & ZLEAK_STATE_ACTIVE) {
- return 1;
+ if (z->z_expander == self) {
+ z->z_expander = THREAD_NULL;
+ }
+ if (z->z_expanding_wait) {
+ z->z_expanding_wait = false;
+ wakeup_all_with_inheritor(&z->z_expander, THREAD_AWAKENED);
}
- return 0;
}
-kern_return_t
-zleak_activate(void)
+static bool
+zalloc_needs_refill(zone_t zone)
{
- kern_return_t retval;
- vm_size_t z_alloc_size = zleak_alloc_buckets * sizeof(struct zallocation);
- vm_size_t z_trace_size = zleak_trace_buckets * sizeof(struct ztrace);
- void *allocations_ptr = NULL;
- void *traces_ptr = NULL;
-
- /* Only one thread attempts to activate at a time */
- if (zleak_state & (ZLEAK_STATE_ACTIVE | ZLEAK_STATE_ACTIVATING | ZLEAK_STATE_FAILED)) {
- return KERN_SUCCESS;
+ if (zone->z_elems_free > zone->z_elems_rsv) {
+ return false;
}
-
- /* Indicate that we're doing the setup */
- lck_spin_lock(&zleak_lock);
- if (zleak_state & (ZLEAK_STATE_ACTIVE | ZLEAK_STATE_ACTIVATING | ZLEAK_STATE_FAILED)) {
- lck_spin_unlock(&zleak_lock);
- return KERN_SUCCESS;
+ if (zone->z_wired_cur < zone->z_wired_max) {
+ return true;
}
-
- zleak_state |= ZLEAK_STATE_ACTIVATING;
- lck_spin_unlock(&zleak_lock);
-
- /* Allocate and zero tables */
- retval = kmem_alloc_kobject(kernel_map, (vm_offset_t*)&allocations_ptr, z_alloc_size, VM_KERN_MEMORY_OSFMK);
- if (retval != KERN_SUCCESS) {
- goto fail;
+ if (zone->exhaustible) {
+ return false;
}
-
- retval = kmem_alloc_kobject(kernel_map, (vm_offset_t*)&traces_ptr, z_trace_size, VM_KERN_MEMORY_OSFMK);
- if (retval != KERN_SUCCESS) {
- goto fail;
+ if (zone->expandable) {
+ /*
+ * If we're expandable, just don't go through this again.
+ */
+ zone->z_wired_max = ~0u;
+ return true;
}
+ zone_unlock(zone);
- bzero(allocations_ptr, z_alloc_size);
- bzero(traces_ptr, z_trace_size);
-
- /* Everything's set. Install tables, mark active. */
- zallocations = allocations_ptr;
- ztraces = traces_ptr;
-
- /*
- * Initialize the top_ztrace to the first entry in ztraces,
- * so we don't have to check for null in zleak_log
- */
- top_ztrace = &ztraces[0];
+ panic_include_zprint = true;
+#if CONFIG_ZLEAKS
+ if (zleak_state & ZLEAK_STATE_ACTIVE) {
+ panic_include_ztrace = true;
+ }
+#endif /* CONFIG_ZLEAKS */
+ panic("zone '%s%s' exhausted", zone_heap_name(zone), zone_name(zone));
+}
- /*
- * Note that we do need a barrier between installing
- * the tables and setting the active flag, because the zfree()
- * path accesses the table without a lock if we're active.
- */
- lck_spin_lock(&zleak_lock);
- zleak_state |= ZLEAK_STATE_ACTIVE;
- zleak_state &= ~ZLEAK_STATE_ACTIVATING;
- lck_spin_unlock(&zleak_lock);
+static void
+zone_expand_async(__unused thread_call_param_t p0, __unused thread_call_param_t p1)
+{
+ zone_foreach(z) {
+ if (z->no_callout) {
+ /* z_async_refilling will never be set */
+ continue;
+ }
- return 0;
+ if (z->z_replenishes) {
+ /* those use the zone_replenish_thread */
+ continue;
+ }
-fail:
- /*
- * If we fail to allocate memory, don't further tax
- * the system by trying again.
- */
- lck_spin_lock(&zleak_lock);
- zleak_state |= ZLEAK_STATE_FAILED;
- zleak_state &= ~ZLEAK_STATE_ACTIVATING;
- lck_spin_unlock(&zleak_lock);
+ zone_lock(z);
+ if (z->z_self && z->z_async_refilling) {
+ z->z_async_refilling = false;
+ zone_expand_locked(z, Z_WAITOK, zalloc_needs_refill);
+ }
+ zone_unlock(z);
+ }
+}
- if (allocations_ptr != NULL) {
- kmem_free(kernel_map, (vm_offset_t)allocations_ptr, z_alloc_size);
+static inline void
+zone_expand_async_schedule_if_needed(zone_t zone)
+{
+ if (zone->z_elems_free > zone->z_elems_rsv || zone->z_async_refilling ||
+ zone->no_callout) {
+ return;
}
- if (traces_ptr != NULL) {
- kmem_free(kernel_map, (vm_offset_t)traces_ptr, z_trace_size);
+ if (!zone->expandable && zone->z_wired_cur >= zone->z_wired_max) {
+ return;
}
- return retval;
+ if (zone->z_elems_free == 0 || !vm_pool_low()) {
+ zone->z_async_refilling = true;
+ thread_call_enter(&zone_expand_callout);
+ }
}
-/*
- * TODO: What about allocations that never get deallocated,
- * especially ones with unique backtraces? Should we wait to record
- * until after boot has completed?
- * (How many persistent zallocs are there?)
- */
+#endif /* !ZALLOC_TEST */
+#pragma mark zone replenishing (VM allocations)
+#if !ZALLOC_TEST
/*
- * This function records the allocation in the allocations table,
- * and stores the associated backtrace in the traces table
- * (or just increments the refcount if the trace is already recorded)
- * If the allocation slot is in use, the old allocation is replaced with the new allocation, and
- * the associated trace's refcount is decremented.
- * If the trace slot is in use, it returns.
- * The refcount is incremented by the amount of memory the allocation consumes.
- * The return value indicates whether to try again next time.
+ * Tracks how many zone_replenish threads are active, because zone_gc() wants
+ * for those to be finished before it proceeds.
+ *
+ * This counts how many replenish threads are active in
+ * ZONE_REPLENISH_ACTIVE_INC increments,
+ * and uses the low bit to track if there are any waiters.
*/
-static boolean_t
-zleak_log(uintptr_t* bt,
- uintptr_t addr,
- uint32_t depth,
- vm_size_t allocation_size)
+#define ZONE_REPLENISH_ACTIVE_NONE 0u
+#define ZONE_REPLENISH_ACTIVE_WAITER_BIT 1u
+#define ZONE_REPLENISH_ACTIVE_INC 2u
+#define ZONE_REPLENISH_ACTIVE_MASK (~ZONE_REPLENISH_ACTIVE_WAITER_BIT)
+static unsigned _Atomic zone_replenish_active;
+static unsigned zone_replenish_wakeups;
+static unsigned zone_replenish_wakeups_initiated;
+static unsigned zone_replenish_throttle_count;
+
+#define ZONE_REPLENISH_TARGET (16 * 1024)
+
+static void
+zone_replenish_wait_if_needed(void)
{
- /* Quit if there's someone else modifying the hash tables */
- if (!lck_spin_try_lock(&zleak_lock)) {
- z_total_conflicts++;
- return FALSE;
+ /*
+ * This check can be racy, the reserves ought to be enough
+ * to compensate for a little race
+ */
+ while (os_atomic_load(&zone_replenish_active, relaxed) !=
+ ZONE_REPLENISH_ACTIVE_NONE) {
+ unsigned o_active, n_active;
+
+ assert_wait(&zone_replenish_active, THREAD_UNINT);
+
+ os_atomic_rmw_loop(&zone_replenish_active, o_active, n_active, relaxed, {
+ if (o_active == ZONE_REPLENISH_ACTIVE_NONE) {
+ os_atomic_rmw_loop_give_up({
+ clear_wait(current_thread(), THREAD_AWAKENED);
+ return;
+ });
+ }
+ if (o_active & ZONE_REPLENISH_ACTIVE_WAITER_BIT) {
+ os_atomic_rmw_loop_give_up(break);
+ }
+ n_active = o_active | ZONE_REPLENISH_ACTIVE_WAITER_BIT;
+ });
+ thread_block(THREAD_CONTINUE_NULL);
}
+}
- struct zallocation* allocation = &zallocations[hashaddr(addr, zleak_alloc_buckets)];
-
- uint32_t trace_index = hashbacktrace(bt, depth, zleak_trace_buckets);
- struct ztrace* trace = &ztraces[trace_index];
+__attribute__((noinline))
+static void
+zone_replenish_locked(zone_t zone)
+{
+ thread_t thr = current_thread();
+ uint32_t min_free;
- allocation->za_hit_count++;
- trace->zt_hit_count++;
+ zone_replenish_wakeups++;
/*
- * If the allocation bucket we want to be in is occupied, and if the occupier
- * has the same trace as us, just bail.
+ * We'll let threads continue to allocate under the reserve:
+ * - until it depleted to 50% for regular threads,
+ * - until it depleted to 25% for VM_PRIV threads.
+ *
+ * After that only TH_OPT_ZONE_PRIV threads may continue.
*/
- if (allocation->za_element != (uintptr_t) 0 && trace_index == allocation->za_trace_index) {
- z_alloc_collisions++;
-
- lck_spin_unlock(&zleak_lock);
- return TRUE;
+ if (thr->options & TH_OPT_VMPRIV) {
+ min_free = zone->z_elems_rsv / 4;
+ } else {
+ min_free = zone->z_elems_rsv / 2;
}
- /* STEP 1: Store the backtrace in the traces array. */
- /* A size of zero indicates that the trace bucket is free. */
-
- if (trace->zt_size > 0 && bcmp(trace->zt_stack, bt, (depth * sizeof(uintptr_t))) != 0) {
+ while (zone->z_elems_free <= zone->z_elems_rsv) {
/*
- * Different unique trace with same hash!
- * Just bail - if we're trying to record the leaker, hopefully the other trace will be deallocated
- * and get out of the way for later chances
+ * Wakeup the replenish thread if not running.
*/
- trace->zt_collisions++;
- z_trace_collisions++;
-
- lck_spin_unlock(&zleak_lock);
- return TRUE;
- } else if (trace->zt_size > 0) {
- /* Same trace, already added, so increment refcount */
- trace->zt_size += allocation_size;
- } else {
- /* Found an unused trace bucket, record the trace here! */
- if (trace->zt_depth != 0) { /* if this slot was previously used but not currently in use */
- z_trace_overwrites++;
+ if (!zone->z_async_refilling) {
+ os_atomic_add(&zone_replenish_active,
+ ZONE_REPLENISH_ACTIVE_INC, relaxed);
+ zone->z_async_refilling = true;
+ zone_replenish_wakeups_initiated++;
+ thread_wakeup(&zone->z_elems_rsv);
}
- z_trace_recorded++;
- trace->zt_size = allocation_size;
- memcpy(trace->zt_stack, bt, (depth * sizeof(uintptr_t)));
-
- trace->zt_depth = depth;
- trace->zt_collisions = 0;
- }
-
- /* STEP 2: Store the allocation record in the allocations array. */
+ if (zone->z_elems_free > min_free) {
+ break;
+ }
- if (allocation->za_element != (uintptr_t) 0) {
/*
- * Straight up replace any allocation record that was there. We don't want to do the work
- * to preserve the allocation entries that were there, because we only record a subset of the
- * allocations anyways.
+ * TH_OPT_ZONE_PRIV threads are the GC thread and a replenish
+ * thread itself.
+ *
+ * Replenish threads *need* to use the reserve. GC threads need
+ * to get through the current allocation, but then will wait at
+ * a higher level after they've dropped any locks which would
+ * deadlock the replenish thread.
+ *
+ * The value of (refill_level / 2) in the previous bit of code
+ * should have given us headroom even though this thread didn't
+ * wait.
*/
+ if (thr->options & TH_OPT_ZONE_PRIV) {
+ assert(zone->z_elems_free != 0);
+ break;
+ }
- z_alloc_collisions++;
-
- struct ztrace* associated_trace = &ztraces[allocation->za_trace_index];
- /* Knock off old allocation's size, not the new allocation */
- associated_trace->zt_size -= allocation->za_size;
- } else if (allocation->za_trace_index != 0) {
- /* Slot previously used but not currently in use */
- z_alloc_overwrites++;
- }
-
- allocation->za_element = addr;
- allocation->za_trace_index = trace_index;
- allocation->za_size = allocation_size;
+ if (startup_phase < STARTUP_SUB_MACH_IPC) {
+ panic("vm_map_steal_memory didn't steal enough memory: "
+ "trying to grow [%s%s] before the scheduler has started",
+ zone_heap_name(zone), zone_name(zone));
+ }
- z_alloc_recorded++;
+ /*
+ * Wait for the replenish threads to add more elements
+ * for us to allocate from.
+ */
+ zone_replenish_throttle_count++;
+ zone->z_replenish_wait = true;
+ assert_wait_timeout(zone, THREAD_UNINT, 1, NSEC_PER_MSEC);
+ zone_unlock(zone);
+ thread_block(THREAD_CONTINUE_NULL);
+ zone_lock(zone);
+ zone->z_replenish_wait = false;
- if (top_ztrace->zt_size < trace->zt_size) {
- top_ztrace = trace;
+ assert(zone->z_self == zone);
}
+}
- lck_spin_unlock(&zleak_lock);
- return TRUE;
+static bool
+zone_replenish_needed(zone_t z)
+{
+ return z->z_elems_free <= z->z_elems_rsv;
}
/*
- * Free the allocation record and release the stacktrace.
- * This should be as fast as possible because it will be called for every free.
+ * High priority VM privileged thread used to asynchronously refill a given zone.
+ * These are needed for data structures used by the lower level VM itself. The
+ * replenish thread maintains a reserve of elements, so that the VM will never
+ * block in the zone allocator.
*/
-__attribute__((noinline))
+__dead2
static void
-zleak_free(uintptr_t addr,
- vm_size_t allocation_size)
+zone_replenish_thread(void *_z, wait_result_t __unused wr)
{
- if (addr == (uintptr_t) 0) {
- return;
+ unsigned o_active, n_active;
+ zone_t z = _z;
+
+ zone_lock(z);
+ assert(z->z_self == z);
+ assert(z->z_async_refilling && z->z_replenishes);
+
+ zone_expand_locked(z, Z_WAITOK, zone_replenish_needed);
+
+ if (z->z_replenish_wait) {
+ /* Wakeup any potentially throttled allocations */
+ z->z_replenish_wait = false;
+ thread_wakeup(z);
}
- struct zallocation* allocation = &zallocations[hashaddr(addr, zleak_alloc_buckets)];
+ /* wakeup zone_reclaim() callers that were possibly waiting */
+ os_atomic_rmw_loop(&zone_replenish_active, o_active, n_active, relaxed, {
+ if (os_sub_overflow(o_active, ZONE_REPLENISH_ACTIVE_INC, &n_active)) {
+ panic("zone_replenish_active corrupt: %d", o_active);
+ }
+ if ((n_active & ZONE_REPLENISH_ACTIVE_MASK) == 0) {
+ n_active = ZONE_REPLENISH_ACTIVE_NONE;
+ }
+ });
- /* Double-checked locking: check to find out if we're interested, lock, check to make
- * sure it hasn't changed, then modify it, and release the lock.
- */
+ if (n_active == ZONE_REPLENISH_ACTIVE_NONE &&
+ (o_active & ZONE_REPLENISH_ACTIVE_WAITER_BIT)) {
+ thread_wakeup(&zone_replenish_active);
+ }
- if (allocation->za_element == addr && allocation->za_trace_index < zleak_trace_buckets) {
- /* if the allocation was the one, grab the lock, check again, then delete it */
- lck_spin_lock(&zleak_lock);
+ z->z_async_refilling = false;
+ assert_wait(&z->z_elems_rsv, THREAD_UNINT);
- if (allocation->za_element == addr && allocation->za_trace_index < zleak_trace_buckets) {
- struct ztrace *trace;
+ zone_unlock(z);
- /* allocation_size had better match what was passed into zleak_log - otherwise someone is freeing into the wrong zone! */
- if (allocation->za_size != allocation_size) {
- panic("Freeing as size %lu memory that was allocated with size %lu\n",
- (uintptr_t)allocation_size, (uintptr_t)allocation->za_size);
- }
+ thread_block_parameter(zone_replenish_thread, z);
+ __builtin_unreachable();
+}
- trace = &ztraces[allocation->za_trace_index];
+void
+zone_replenish_configure(zone_t z)
+{
+ thread_t th;
+ kern_return_t kr;
+ char name[MAXTHREADNAMESIZE];
- /* size of 0 indicates trace bucket is unused */
- if (trace->zt_size > 0) {
- trace->zt_size -= allocation_size;
- }
+ zone_lock(z);
+ assert(!z->z_replenishes && !z->z_destructible);
+ z->z_elems_rsv = (uint16_t)(ZONE_REPLENISH_TARGET / zone_elem_size(z));
+ z->z_replenishes = true;
+ os_atomic_add(&zone_replenish_active, ZONE_REPLENISH_ACTIVE_INC, relaxed);
+ z->z_async_refilling = true;
+ zone_unlock(z);
- /* A NULL element means the allocation bucket is unused */
- allocation->za_element = 0;
- }
- lck_spin_unlock(&zleak_lock);
+ kr = kernel_thread_create(zone_replenish_thread, z, MAXPRI_KERNEL, &th);
+ if (kr != KERN_SUCCESS) {
+ panic("zone_replenish_configure, thread create: 0x%x", kr);
}
+ /* make sure this thread can't lose its stack */
+ assert(th->reserved_stack == th->kernel_stack);
+
+ snprintf(name, sizeof(name), "z_replenish(%s)", zone_name(z));
+ thread_set_thread_name(th, name);
+
+ thread_mtx_lock(th);
+ th->options |= TH_OPT_VMPRIV | TH_OPT_ZONE_PRIV;
+ thread_start(th);
+ thread_mtx_unlock(th);
+
+ thread_deallocate(th);
}
-#endif /* CONFIG_ZLEAKS */
+/*! @} */
+#endif /* !ZALLOC_TEST */
+#pragma mark zone jetsam integration
+#if !ZALLOC_TEST
-/* These functions outside of CONFIG_ZLEAKS because they are also used in
- * mbuf.c for mbuf leak-detection. This is why they lack the z_ prefix.
+/*
+ * We're being very conservative here and picking a value of 95%. We might need to lower this if
+ * we find that we're not catching the problem and are still hitting zone map exhaustion panics.
*/
+#define ZONE_MAP_JETSAM_LIMIT_DEFAULT 95
-/* "Thomas Wang's 32/64 bit mix functions." http://www.concentric.net/~Ttwang/tech/inthash.htm */
-uintptr_t
-hash_mix(uintptr_t x)
+/*
+ * Trigger zone-map-exhaustion jetsams if the zone map is X% full, where X=zone_map_jetsam_limit.
+ * Can be set via boot-arg "zone_map_jetsam_limit". Set to 95% by default.
+ */
+TUNABLE_WRITEABLE(unsigned int, zone_map_jetsam_limit, "zone_map_jetsam_limit",
+ ZONE_MAP_JETSAM_LIMIT_DEFAULT);
+
+void
+get_zone_map_size(uint64_t *current_size, uint64_t *capacity)
{
-#ifndef __LP64__
- x += ~(x << 15);
- x ^= (x >> 10);
- x += (x << 3);
- x ^= (x >> 6);
- x += ~(x << 11);
- x ^= (x >> 16);
-#else
- x += ~(x << 32);
- x ^= (x >> 22);
- x += ~(x << 13);
- x ^= (x >> 8);
- x += (x << 3);
- x ^= (x >> 15);
- x += ~(x << 27);
- x ^= (x >> 31);
-#endif
- return x;
+ vm_offset_t phys_pages = os_atomic_load(&zones_phys_page_mapped_count, relaxed);
+ *current_size = ptoa_64(phys_pages);
+ *capacity = ptoa_64(zone_phys_mapped_max_pages);
}
-uint32_t
-hashbacktrace(uintptr_t* bt, uint32_t depth, uint32_t max_size)
+void
+get_largest_zone_info(char *zone_name, size_t zone_name_len, uint64_t *zone_size)
{
- uintptr_t hash = 0;
- uintptr_t mask = max_size - 1;
+ zone_t largest_zone = zone_find_largest();
- while (depth) {
- hash += bt[--depth];
+ /*
+ * Append kalloc heap name to zone name (if zone is used by kalloc)
+ */
+ snprintf(zone_name, zone_name_len, "%s%s",
+ zone_heap_name(largest_zone), largest_zone->z_name);
+
+ *zone_size = zone_size_wired(largest_zone);
+}
+
+bool
+zone_map_nearing_exhaustion(void)
+{
+ uint64_t phys_pages = os_atomic_load(&zones_phys_page_mapped_count, relaxed);
+ return phys_pages * 100 > zone_phys_mapped_max_pages * zone_map_jetsam_limit;
+}
+
+
+#define VMENTRY_TO_VMOBJECT_COMPARISON_RATIO 98
+
+/*
+ * Tries to kill a single process if it can attribute one to the largest zone. If not, wakes up the memorystatus thread
+ * to walk through the jetsam priority bands and kill processes.
+ */
+static void
+kill_process_in_largest_zone(void)
+{
+ pid_t pid = -1;
+ zone_t largest_zone = zone_find_largest();
+
+ printf("zone_map_exhaustion: Zone mapped %lld of %lld, used %lld, capacity %lld [jetsam limit %d%%]\n",
+ ptoa_64(os_atomic_load(&zones_phys_page_mapped_count, relaxed)),
+ ptoa_64(zone_phys_mapped_max_pages),
+ (uint64_t)zone_submaps_approx_size(),
+ (uint64_t)(zone_foreign_size() + zone_native_size()),
+ zone_map_jetsam_limit);
+ printf("zone_map_exhaustion: Largest zone %s%s, size %lu\n", zone_heap_name(largest_zone),
+ largest_zone->z_name, (uintptr_t)zone_size_wired(largest_zone));
+
+ /*
+ * We want to make sure we don't call this function from userspace.
+ * Or we could end up trying to synchronously kill the process
+ * whose context we're in, causing the system to hang.
+ */
+ assert(current_task() == kernel_task);
+
+ /*
+ * If vm_object_zone is the largest, check to see if the number of
+ * elements in vm_map_entry_zone is comparable.
+ *
+ * If so, consider vm_map_entry_zone as the largest. This lets us target
+ * a specific process to jetsam to quickly recover from the zone map
+ * bloat.
+ */
+ if (largest_zone == vm_object_zone) {
+ unsigned int vm_object_zone_count = zone_count_allocated(vm_object_zone);
+ unsigned int vm_map_entry_zone_count = zone_count_allocated(vm_map_entry_zone);
+ /* Is the VM map entries zone count >= 98% of the VM objects zone count? */
+ if (vm_map_entry_zone_count >= ((vm_object_zone_count * VMENTRY_TO_VMOBJECT_COMPARISON_RATIO) / 100)) {
+ largest_zone = vm_map_entry_zone;
+ printf("zone_map_exhaustion: Picking VM map entries as the zone to target, size %lu\n",
+ (uintptr_t)zone_size_wired(largest_zone));
+ }
+ }
+
+ /* TODO: Extend this to check for the largest process in other zones as well. */
+ if (largest_zone == vm_map_entry_zone) {
+ pid = find_largest_process_vm_map_entries();
+ } else {
+ printf("zone_map_exhaustion: Nothing to do for the largest zone [%s%s]. "
+ "Waking up memorystatus thread.\n", zone_heap_name(largest_zone),
+ largest_zone->z_name);
}
+ if (!memorystatus_kill_on_zone_map_exhaustion(pid)) {
+ printf("zone_map_exhaustion: Call to memorystatus failed, victim pid: %d\n", pid);
+ }
+}
- hash = hash_mix(hash) & mask;
-
- assert(hash < max_size);
+#endif /* !ZALLOC_TEST */
+#pragma mark zfree
+#if !ZALLOC_TEST
+#if KASAN_ZALLOC
- return (uint32_t) hash;
-}
+/*!
+ * @defgroup zfree
+ * @{
+ *
+ * @brief
+ * The codepath for zone frees.
+ *
+ * @discussion
+ * There are 4 major ways to allocate memory that end up in the zone allocator:
+ * - @c zfree()
+ * - @c zfree_percpu()
+ * - @c kfree*()
+ * - @c zfree_permanent()
+ *
+ * While permanent zones have their own allocation scheme, all other codepaths
+ * will eventually go through the @c zfree_ext() choking point.
+ *
+ * Ignoring the @c gzalloc_free() codepath, the decision tree looks like this:
+ * <code>
+ * zfree_ext()
+ * ├───> zfree_cached() ────────────────╮
+ * │ │ │
+ * │ │ │
+ * │ ├───> zfree_cached_slow() ───┤
+ * │ │ │ │
+ * │ │ v │
+ * ╰───────┴───> zfree_item() ──────────┴───>
+ * </code>
+ *
+ * @c zfree_ext() takes care of all the generic work to perform on an element
+ * before it is freed (zeroing, logging, tagging, ...) then will hand it off to:
+ * - @c zfree_item() if zone caching is off
+ * - @c zfree_cached() if zone caching is on.
+ *
+ * @c zfree_cached can take a number of decisions:
+ * - a fast path if the (f) or (a) magazines have space (preemption disabled),
+ * - using the cpu local or recirculation depot calling @c zfree_cached_slow(),
+ * - falling back to @c zfree_item() when CPU caching has been disabled.
+ */
/*
- * TODO: Determine how well distributed this is
- * max_size must be a power of 2. i.e 0x10000 because 0x10000-1 is 0x0FFFF which is a great bitmask
+ * Called from zfree() to add the element being freed to the KASan quarantine.
+ *
+ * Returns true if the newly-freed element made it into the quarantine without
+ * displacing another, false otherwise. In the latter case, addrp points to the
+ * address of the displaced element, which will be freed by the zone.
*/
-uint32_t
-hashaddr(uintptr_t pt, uint32_t max_size)
+static bool
+kasan_quarantine_freed_element(
+ zone_t *zonep, /* the zone the element is being freed to */
+ void **addrp) /* address of the element being freed */
{
- uintptr_t hash = 0;
- uintptr_t mask = max_size - 1;
-
- hash = hash_mix(pt) & mask;
+ zone_t zone = *zonep;
+ void *addr = *addrp;
- assert(hash < max_size);
+ /*
+ * Resize back to the real allocation size and hand off to the KASan
+ * quarantine. `addr` may then point to a different allocation, if the
+ * current element replaced another in the quarantine. The zone then
+ * takes ownership of the swapped out free element.
+ */
+ vm_size_t usersz = zone_elem_size(zone) - 2 * zone->z_kasan_redzone;
+ vm_size_t sz = usersz;
- return (uint32_t) hash;
+ if (addr && zone->z_kasan_redzone) {
+ kasan_check_free((vm_address_t)addr, usersz, KASAN_HEAP_ZALLOC);
+ addr = (void *)kasan_dealloc((vm_address_t)addr, &sz);
+ assert(sz == zone_elem_size(zone));
+ }
+ if (addr && !zone->kasan_noquarantine) {
+ kasan_free(&addr, &sz, KASAN_HEAP_ZALLOC, zonep, usersz, true);
+ if (!addr) {
+ return TRUE;
+ }
+ }
+ if (addr && zone->kasan_noquarantine) {
+ kasan_unpoison(addr, zone_elem_size(zone));
+ }
+ *addrp = addr;
+ return FALSE;
}
-/* End of all leak-detection code */
-#pragma mark zone creation, configuration, destruction
+#endif /* KASAN_ZALLOC */
-static zone_t
-zone_init_defaults(zone_id_t zid)
+__header_always_inline void
+zfree_drop(zone_t zone, struct zone_page_metadata *meta, zone_element_t ze,
+ bool recirc)
{
- zone_t z = &zone_array[zid];
-
- z->page_count_max = ~0u;
- z->collectable = true;
- z->expandable = true;
- z->submap_idx = Z_SUBMAP_IDX_GENERAL_MAP;
+ vm_offset_t esize = zone_elem_size(zone);
- simple_lock_init(&z->lock, 0);
+ if (zone_meta_mark_free(meta, ze) == recirc) {
+ zone_meta_double_free_panic(zone, ze, __func__);
+ }
- return z;
-}
+ vm_offset_t old_size = meta->zm_alloc_size;
+ vm_offset_t max_size = ptoa(meta->zm_chunk_len) + ZM_ALLOC_SIZE_LOCK;
+ vm_offset_t new_size = zone_meta_alloc_size_sub(zone, meta, esize);
-static bool
-zone_is_initializing(zone_t z)
-{
- return !z->z_self && !z->destroyed;
+ if (new_size == 0) {
+ /* whether the page was on the intermediate or all_used, queue, move it to free */
+ zone_meta_requeue(zone, &zone->z_pageq_empty, meta);
+ zone->z_wired_empty += meta->zm_chunk_len;
+ } else if (old_size + esize > max_size) {
+ /* first free element on page, move from all_used */
+ zone_meta_requeue(zone, &zone->z_pageq_partial, meta);
+ }
}
static void
-zone_set_max(zone_t z, vm_size_t max)
+zfree_item(zone_t zone, struct zone_page_metadata *meta, zone_element_t ze)
{
-#if KASAN_ZALLOC
- if (z->kasan_redzone) {
- /*
- * Adjust the max memory for the kasan redzones
- */
- max += (max / z->pcpu_elem_size) * z->kasan_redzone * 2;
- }
-#endif
- if (max < z->percpu ? 1 : z->alloc_pages) {
- max = z->percpu ? 1 : z->alloc_pages;
- } else {
- max = atop(round_page(max));
- }
- z->page_count_max = max;
-}
+ /* transfer preemption count to lock */
+ zone_lock_nopreempt_check_contention(zone, NULL);
-void
-zone_set_submap_idx(zone_t zone, unsigned int sub_map_idx)
-{
- if (!zone_is_initializing(zone)) {
- panic("%s: called after zone_create()", __func__);
- }
- if (sub_map_idx > zone_last_submap_idx) {
- panic("zone_set_submap_idx(%d) > %d", sub_map_idx, zone_last_submap_idx);
- }
- zone->submap_idx = sub_map_idx;
+ zfree_drop(zone, meta, ze, false);
+ zone_elems_free_add(zone, 1);
+
+ zone_unlock(zone);
}
-void
-zone_set_noexpand(
- zone_t zone,
- vm_size_t max)
+__attribute__((noinline))
+static void
+zfree_cached_slow(zone_t zone, struct zone_page_metadata *meta,
+ zone_element_t ze, zone_cache_t cache)
{
- if (!zone_is_initializing(zone)) {
- panic("%s: called after zone_create()", __func__);
+ struct zone_depot mags = STAILQ_HEAD_INITIALIZER(mags);
+ zone_magazine_t mag = NULL;
+ uint16_t n = 0;
+
+ if (zone_meta_is_free(meta, ze)) {
+ zone_meta_double_free_panic(zone, ze, __func__);
}
- zone->expandable = false;
- zone_set_max(zone, max);
-}
-void
-zone_set_exhaustible(
- zone_t zone,
- vm_size_t max)
-{
- if (!zone_is_initializing(zone)) {
- panic("%s: called after zone_create()", __func__);
+ if (zone == zc_magazine_zone) {
+ mag = (zone_magazine_t)zone_element_addr(ze,
+ zone_elem_size(zone));
+#if KASAN_ZALLOC
+ kasan_poison_range((vm_offset_t)mag, zone_elem_size(zone),
+ ASAN_VALID);
+#endif
+ } else {
+ mag = zone_magazine_alloc(Z_NOWAIT);
+ if (__improbable(mag == NULL)) {
+ return zfree_item(zone, meta, ze);
+ }
+ mag->zm_cur = 1;
+ mag->zm_elems[0] = ze;
}
- zone->expandable = false;
- zone->exhaustible = true;
- zone_set_max(zone, max);
-}
-/**
- * @function zone_create_find
- *
- * @abstract
- * Finds an unused zone for the given name and element size.
- *
- * @param name the zone name
- * @param size the element size (including redzones, ...)
- * @param flags the flags passed to @c zone_create*
- * @param zid the desired zone ID or ZONE_ID_ANY
- *
- * @returns a zone to initialize further.
- */
-static zone_t
-zone_create_find(
- const char *name,
- vm_size_t size,
- zone_create_flags_t flags,
- zone_id_t zid)
-{
- zone_id_t nzones;
- zone_t z;
+ mag = zone_magazine_replace(&cache->zc_free_cur,
+ &cache->zc_free_elems, mag);
- simple_lock(&all_zones_lock, &zone_locks_grp);
+ z_debug_assert(cache->zc_free_cur <= 1);
+ z_debug_assert(mag->zm_cur == zc_mag_size());
- nzones = (zone_id_t)os_atomic_load(&num_zones, relaxed);
- assert(num_zones_in_use <= nzones && nzones < MAX_ZONES);
+ STAILQ_INSERT_HEAD(&mags, mag, zm_link);
+ n = 1;
- if (__improbable(nzones < ZONE_ID__FIRST_DYNAMIC)) {
+ if (cache->zc_depot_max >= 2 * zc_mag_size()) {
/*
- * The first time around, make sure the reserved zone IDs
- * have an initialized lock as zone_index_foreach() will
- * enumerate them.
+ * If we can use the local depot (zc_depot_max allows for
+ * 2 magazines worth of elements) then:
+ *
+ * 1. if we have space for an extra depot locally,
+ * push it, and leave.
+ *
+ * 2. if we overflow, then take (1 / zc_recirc_denom)
+ * of the depot out, in order to migrate it to the
+ * recirculation depot.
*/
- while (nzones < ZONE_ID__FIRST_DYNAMIC) {
- zone_init_defaults(nzones++);
- }
-
- os_atomic_store(&num_zones, nzones, release);
- }
+ zone_depot_lock_nopreempt(cache);
- if (zid != ZONE_ID_ANY) {
- if (zid >= ZONE_ID__FIRST_DYNAMIC) {
- panic("zone_create: invalid desired zone ID %d for %s",
- zid, name);
+ if ((cache->zc_depot_cur + 2) * zc_mag_size() <=
+ cache->zc_depot_max) {
+ cache->zc_depot_cur++;
+ STAILQ_INSERT_TAIL(&cache->zc_depot, mag, zm_link);
+ return zone_depot_unlock(cache);
}
- if (flags & ZC_DESTRUCTIBLE) {
- panic("zone_create: ID %d (%s) must be permanent", zid, name);
- }
- if (zone_array[zid].z_self) {
- panic("zone_create: creating zone ID %d (%s) twice", zid, name);
+
+ while (zc_recirc_denom * cache->zc_depot_cur * zc_mag_size() >=
+ (zc_recirc_denom - 1) * cache->zc_depot_max) {
+ mag = STAILQ_FIRST(&cache->zc_depot);
+ STAILQ_REMOVE_HEAD(&cache->zc_depot, zm_link);
+ STAILQ_INSERT_TAIL(&mags, mag, zm_link);
+ cache->zc_depot_cur--;
+ n++;
}
- z = &zone_array[zid];
+
+ zone_depot_unlock(cache);
} else {
- if (flags & ZC_DESTRUCTIBLE) {
- /*
- * If possible, find a previously zdestroy'ed zone in the
- * zone_array that we can reuse.
- */
- for (int i = bitmap_first(zone_destroyed_bitmap, MAX_ZONES);
- i >= 0; i = bitmap_next(zone_destroyed_bitmap, i)) {
- z = &zone_array[i];
+ enable_preemption();
+ }
- /*
- * If the zone name and the element size are the
- * same, we can just reuse the old zone struct.
- */
- if (strcmp(z->z_name, name) || zone_elem_size(z) != size) {
- continue;
- }
- bitmap_clear(zone_destroyed_bitmap, i);
- z->destroyed = false;
- z->z_self = z;
- zid = (zone_id_t)i;
- goto out;
- }
+ /*
+ * Preflight validity of all the elements before we touch the zone
+ * metadata, and then insert them into the recirculation depot.
+ */
+ STAILQ_FOREACH(mag, &mags, zm_link) {
+ for (uint16_t i = 0; i < zc_mag_size(); i++) {
+ zone_element_validate(zone, mag->zm_elems[i]);
}
+ }
- zid = nzones++;
- z = zone_init_defaults(zid);
+ zone_lock_check_contention(zone, cache);
- /*
- * The release barrier pairs with the acquire in
- * zone_index_foreach() and makes sure that enumeration loops
- * always see an initialized zone lock.
- */
- os_atomic_store(&num_zones, nzones, release);
+ STAILQ_FOREACH(mag, &mags, zm_link) {
+ for (uint16_t i = 0; i < zc_mag_size(); i++) {
+ zone_element_t e = mag->zm_elems[i];
+
+ if (!zone_meta_mark_free(zone_meta_from_element(e), e)) {
+ zone_meta_double_free_panic(zone, e, __func__);
+ }
+ }
}
+ STAILQ_CONCAT(&zone->z_recirc, &mags);
+ zone->z_recirc_cur += n;
-out:
- num_zones_in_use++;
- simple_unlock(&all_zones_lock);
+ zone_elems_free_add(zone, n * zc_mag_size());
- return z;
+ zone_unlock(zone);
}
-__abortlike
static void
-zone_create_panic(const char *name, const char *f1, const char *f2)
+zfree_cached(zone_t zone, struct zone_page_metadata *meta, zone_element_t ze)
{
- panic("zone_create: creating zone %s: flag %s and %s are incompatible",
- name, f1, f2);
-}
-#define zone_create_assert_not_both(name, flags, current_flag, forbidden_flag) \
- if ((flags) & forbidden_flag) { \
- zone_create_panic(name, #current_flag, #forbidden_flag); \
+ zone_cache_t cache = zpercpu_get(zone->z_pcpu_cache);
+
+ if (cache->zc_free_cur >= zc_mag_size()) {
+ if (cache->zc_alloc_cur >= zc_mag_size()) {
+ return zfree_cached_slow(zone, meta, ze, cache);
+ }
+ zone_cache_swap_magazines(cache);
+ }
+
+ if (__improbable(cache->zc_alloc_elems == NULL)) {
+ return zfree_item(zone, meta, ze);
+ }
+
+ if (zone_meta_is_free(meta, ze)) {
+ zone_meta_double_free_panic(zone, ze, __func__);
+ }
+
+ uint16_t idx = cache->zc_free_cur++;
+ if (idx >= zc_mag_size()) {
+ zone_accounting_panic(zone, "zc_free_cur overflow");
}
+ cache->zc_free_elems[idx] = ze;
+
+ enable_preemption();
+}
/*
- * Adjusts the size of the element based on minimum size, alignment
- * and kasan redzones
+ * The function is noinline when zlog can be used so that the backtracing can
+ * reliably skip the zfree_ext() and zfree_log_trace()
+ * boring frames.
*/
-static vm_size_t
-zone_elem_adjust_size(
- const char *name __unused,
- vm_size_t elem_size,
- zone_create_flags_t flags,
- vm_size_t *redzone __unused)
+#if ZONE_ENABLE_LOGGING
+__attribute__((noinline))
+#endif /* ZONE_ENABLE_LOGGING */
+void
+zfree_ext(zone_t zone, zone_stats_t zstats, void *addr)
{
- vm_size_t size;
+ struct zone_page_metadata *page_meta;
+ vm_offset_t elem = (vm_offset_t)addr;
+ vm_size_t elem_size = zone_elem_size(zone);
+ zone_element_t ze;
+
+ DTRACE_VM2(zfree, zone_t, zone, void*, addr);
+ TRACE_MACHLEAKS(ZFREE_CODE, ZFREE_CODE_2, elem_size, elem);
+#if VM_MAX_TAG_ZONES
+ if (__improbable(zone->tags)) {
+ vm_tag_t tag = *ztSlot(zone, elem) >> 1;
+ // set the tag with b0 clear so the block remains inuse
+ *ztSlot(zone, elem) = 0xFFFE;
+ vm_tag_update_zone_size(tag, zone->tag_zone_index,
+ -(long)elem_size);
+ }
+#endif /* VM_MAX_TAG_ZONES */
+
+#if KASAN_ZALLOC
+ if (kasan_quarantine_freed_element(&zone, &addr)) {
+ return;
+ }
+ /*
+ * kasan_quarantine_freed_element() might return a different
+ * {zone, addr} than the one being freed for kalloc heaps.
+ *
+ * Make sure we reload everything.
+ */
+ elem = (vm_offset_t)addr;
+ elem_size = zone_elem_size(zone);
+#endif
+#if CONFIG_ZLEAKS
/*
- * Adjust element size for minimum size and pointer alignment
+ * Zone leak detection: un-track the allocation
*/
- size = (elem_size + sizeof(vm_offset_t) - 1) & -sizeof(vm_offset_t);
- if (((flags & ZC_PERCPU) == 0) && size < ZONE_MIN_ELEM_SIZE) {
- size = ZONE_MIN_ELEM_SIZE;
+ if (__improbable(zone->zleak_on)) {
+ zleak_free(elem, elem_size);
+ }
+#endif /* CONFIG_ZLEAKS */
+#if ZONE_ENABLE_LOGGING
+ if (__improbable(DO_LOGGING(zone))) {
+ zfree_log_trace(zone, elem, __builtin_frame_address(0));
+ }
+#endif /* ZONE_ENABLE_LOGGING */
+#if CONFIG_GZALLOC
+ if (__improbable(zone->gzalloc_tracked)) {
+ return gzalloc_free(zone, zstats, addr);
}
+#endif /* CONFIG_GZALLOC */
+ page_meta = zone_element_resolve(zone, elem, elem_size, &ze);
+ ze.ze_value |= zfree_clear_or_poison(zone, elem, elem_size);
#if KASAN_ZALLOC
- /*
- * Expand the zone allocation size to include the redzones.
- *
- * For page-multiple zones add a full guard page because they
- * likely require alignment.
- */
- vm_size_t redzone_tmp;
- if (flags & (ZC_KASAN_NOREDZONE | ZC_PERCPU)) {
- redzone_tmp = 0;
- } else if ((size & PAGE_MASK) == 0) {
- if (size != PAGE_SIZE && (flags & ZC_ALIGNMENT_REQUIRED)) {
- panic("zone_create: zone %s can't provide more than PAGE_SIZE"
- "alignment", name);
+ if (zone->z_percpu) {
+ zpercpu_foreach_cpu(i) {
+ kasan_poison_range(elem + ptoa(i), elem_size,
+ ASAN_HEAP_FREED);
}
- redzone_tmp = PAGE_SIZE;
- } else if (flags & ZC_ALIGNMENT_REQUIRED) {
- redzone_tmp = 0;
} else {
- redzone_tmp = KASAN_GUARD_SIZE;
- }
- size += redzone_tmp * 2;
- if (redzone) {
- *redzone = redzone_tmp;
+ kasan_poison_range(elem, elem_size, ASAN_HEAP_FREED);
}
#endif
- return size;
+
+ disable_preemption();
+ zpercpu_get(zstats)->zs_mem_freed += elem_size;
+
+ if (zone->z_pcpu_cache) {
+ return zfree_cached(zone, page_meta, ze);
+ }
+
+ return zfree_item(zone, page_meta, ze);
}
-/*
- * Returns the allocation chunk size that has least framentation
- */
-static vm_size_t
-zone_get_min_alloc_granule(
- vm_size_t elem_size,
- zone_create_flags_t flags)
+void
+(zfree)(union zone_or_view zov, void *addr)
{
- vm_size_t alloc_granule = PAGE_SIZE;
- if (flags & ZC_PERCPU) {
- alloc_granule = PAGE_SIZE * zpercpu_count();
- if (PAGE_SIZE % elem_size > 256) {
- panic("zone_create: per-cpu zone has too much fragmentation");
- }
- } else if ((elem_size & PAGE_MASK) == 0) {
- /* zero fragmentation by definition */
- alloc_granule = elem_size;
- } else if (alloc_granule % elem_size == 0) {
- /* zero fragmentation by definition */
- } else {
- vm_size_t frag = (alloc_granule % elem_size) * 100 / alloc_granule;
- vm_size_t alloc_tmp = PAGE_SIZE;
- while ((alloc_tmp += PAGE_SIZE) <= ZONE_MAX_ALLOC_SIZE) {
- vm_size_t frag_tmp = (alloc_tmp % elem_size) * 100 / alloc_tmp;
- if (frag_tmp < frag) {
- frag = frag_tmp;
- alloc_granule = alloc_tmp;
- }
- }
- }
- return alloc_granule;
+ zone_t zone = zov.zov_view->zv_zone;
+ zone_stats_t zstats = zov.zov_view->zv_stats;
+ assert(!zone->z_percpu);
+ zfree_ext(zone, zstats, addr);
}
-vm_size_t
-zone_get_foreign_alloc_size(
- const char *name __unused,
- vm_size_t elem_size,
- zone_create_flags_t flags,
- uint16_t min_pages)
+void
+zfree_percpu(union zone_or_view zov, void *addr)
{
- vm_size_t adjusted_size = zone_elem_adjust_size(name, elem_size, flags,
- NULL);
- vm_size_t alloc_granule = zone_get_min_alloc_granule(adjusted_size,
- flags);
- vm_size_t min_size = min_pages * PAGE_SIZE;
- /*
- * Round up min_size to a multiple of alloc_granule
- */
- return ((min_size + alloc_granule - 1) / alloc_granule)
- * alloc_granule;
+ zone_t zone = zov.zov_view->zv_zone;
+ zone_stats_t zstats = zov.zov_view->zv_stats;
+ assert(zone->z_percpu);
+ zfree_ext(zone, zstats, (void *)__zpcpu_demangle(addr));
}
-zone_t
-zone_create_ext(
- const char *name,
- vm_size_t size,
- zone_create_flags_t flags,
- zone_id_t desired_zid,
- void (^extra_setup)(zone_t))
+/*! @} */
+#endif /* !ZALLOC_TEST */
+#pragma mark zalloc
+#if !ZALLOC_TEST
+
+/*!
+ * @defgroup zalloc
+ * @{
+ *
+ * @brief
+ * The codepath for zone allocations.
+ *
+ * @discussion
+ * There are 4 major ways to allocate memory that end up in the zone allocator:
+ * - @c zalloc(), @c zalloc_flags(), ...
+ * - @c zalloc_percpu()
+ * - @c kalloc*()
+ * - @c zalloc_permanent()
+ *
+ * While permanent zones have their own allocation scheme, all other codepaths
+ * will eventually go through the @c zalloc_ext() choking point.
+ *
+ * Ignoring the @c zalloc_gz() codepath, the decision tree looks like this:
+ * <code>
+ * zalloc_ext()
+ * │
+ * ├───> zalloc_cached() ──────> zalloc_cached_fast() ───╮
+ * │ │ ^ │
+ * │ │ │ │
+ * │ ╰───> zalloc_cached_slow() ───╯ │
+ * │ │ │
+ * │<─────────────────╮ ├─────────────╮ │
+ * │ │ │ │ │
+ * │ │ v │ │
+ * │<───────╮ â•â”€â”€> zalloc_item_slow() ────┤ │
+ * │ │ │ │ │
+ * │ │ │ v │
+ * ╰───> zalloc_item() ──────────> zalloc_item_fast() ───┤
+ * │
+ * v
+ * zalloc_return()
+ * </code>
+ *
+ *
+ * The @c zalloc_item() track is used when zone caching is off:
+ * - @c zalloc_item_fast() is used when there are enough elements available,
+ * - @c zalloc_item_slow() is used when a refill is needed, which can cause
+ * the zone to grow. This is the only codepath that refills.
+ *
+ * This track uses the zone lock for serialization:
+ * - taken in @c zalloc_item(),
+ * - maintained during @c zalloc_item_slow() (possibly dropped and re-taken),
+ * - dropped in @c zalloc_item_fast().
+ *
+ *
+ * The @c zalloc_cached() track is used when zone caching is on:
+ * - @c zalloc_cached_fast() is taken when the cache has elements,
+ * - @c zalloc_cached_slow() is taken if a cache refill is needed.
+ * It can chose many strategies:
+ * ~ @c zalloc_cached_from_depot() to try to reuse cpu stashed magazines,
+ * ~ using the global recirculation depot @c z_recirc,
+ * ~ using zalloc_import() if the zone has enough elements,
+ * ~ falling back to the @c zalloc_item() track if zone caching is disabled
+ * due to VM pressure or the zone has no available elements.
+ *
+ * This track disables preemption for serialization:
+ * - preemption is disabled in @c zalloc_cached(),
+ * - kept disabled during @c zalloc_cached_slow(), converted into a zone lock
+ * if switching to @c zalloc_item_slow(),
+ * - preemption is reenabled in @c zalloc_cached_fast().
+ *
+ * @c zalloc_cached_from_depot() also takes depot locks (taken by the caller,
+ * released by @c zalloc_cached_from_depot().
+ *
+ * In general the @c zalloc_*_slow() codepaths deal with refilling and will
+ * tail call into the @c zalloc_*_fast() code to perform the actual allocation.
+ *
+ * @c zalloc_return() is the final function everyone tail calls into,
+ * which prepares the element for consumption by the caller and deals with
+ * common treatment (zone logging, tags, kasan, validation, ...).
+ */
+
+/*!
+ * @function zalloc_import
+ *
+ * @brief
+ * Import @c n elements in the specified array, opposite of @c zfree_drop().
+ *
+ * @param zone The zone to import elements from
+ * @param elems The array to import into
+ * @param n The number of elements to import. Must be non zero,
+ * and smaller than @c zone->z_elems_free.
+ */
+__header_always_inline void
+zalloc_import(zone_t zone, zone_element_t *elems, uint32_t n)
{
- vm_size_t alloc;
- vm_size_t redzone;
- zone_t z;
+ vm_size_t esize = zone_elem_size(zone);
+ uint32_t i = 0;
- if (size > ZONE_MAX_ALLOC_SIZE) {
- panic("zone_create: element size too large: %zd", (size_t)size);
- }
+ assertf(STAILQ_EMPTY(&zone->z_recirc),
+ "Trying to import from zone %p [%s%s] with non empty recirc",
+ zone, zone_heap_name(zone), zone_name(zone));
- size = zone_elem_adjust_size(name, size, flags, &redzone);
- /*
- * Allocate the zone slot, return early if we found an older match.
- */
- z = zone_create_find(name, size, flags, desired_zid);
- if (__improbable(z->z_self)) {
- /* We found a zone to reuse */
- return z;
- }
+ do {
+ vm_offset_t page, eidx, size = 0;
+ struct zone_page_metadata *meta;
+
+ if (!zone_pva_is_null(zone->z_pageq_partial)) {
+ meta = zone_pva_to_meta(zone->z_pageq_partial);
+ page = zone_pva_to_addr(zone->z_pageq_partial);
+ } else if (!zone_pva_is_null(zone->z_pageq_empty)) {
+ meta = zone_pva_to_meta(zone->z_pageq_empty);
+ page = zone_pva_to_addr(zone->z_pageq_empty);
+ zone_counter_sub(zone, z_wired_empty, meta->zm_chunk_len);
+ } else {
+ zone_accounting_panic(zone, "z_elems_free corruption");
+ }
- /*
- * Initialize the zone properly.
- */
+ if (!zone_has_index(zone, meta->zm_index)) {
+ zone_page_metadata_index_confusion_panic(zone, page, meta);
+ }
- /*
- * If the kernel is post lockdown, copy the zone name passed in.
- * Else simply maintain a pointer to the name string as it can only
- * be a core XNU zone (no unloadable kext exists before lockdown).
- */
- if (startup_phase >= STARTUP_SUB_LOCKDOWN) {
- size_t nsz = MIN(strlen(name) + 1, MACH_ZONE_NAME_MAX_LEN);
- char *buf = zalloc_permanent(nsz, ZALIGN_NONE);
- strlcpy(buf, name, nsz);
- z->z_name = buf;
+ vm_offset_t old_size = meta->zm_alloc_size;
+ vm_offset_t max_size = ptoa(meta->zm_chunk_len) + ZM_ALLOC_SIZE_LOCK;
+
+ do {
+ eidx = zone_meta_find_and_clear_bit(zone, meta);
+ elems[i++] = zone_element_encode(page, eidx, ZPM_AUTO);
+ size += esize;
+ } while (i < n && old_size + size + esize <= max_size);
+
+ vm_offset_t new_size = zone_meta_alloc_size_add(zone, meta, size);
+
+ if (new_size + esize > max_size) {
+ zone_meta_requeue(zone, &zone->z_pageq_full, meta);
+ } else if (old_size == 0) {
+ /* remove from free, move to intermediate */
+ zone_meta_requeue(zone, &zone->z_pageq_partial, meta);
+ }
+ } while (i < n);
+}
+
+/*!
+ * @function zalloc_return
+ *
+ * @brief
+ * Performs the tail-end of the work required on allocations before the caller
+ * uses them.
+ *
+ * @discussion
+ * This function is called without any zone lock held,
+ * and preemption back to the state it had when @c zalloc_ext() was called.
+ *
+ * @param zone The zone we're allocating from.
+ * @param ze The encoded element we just allocated.
+ * @param flags The flags passed to @c zalloc_ext() (for Z_ZERO).
+ * @param elem_size The element size for this zone.
+ * @param freemag An optional magazine that needs to be freed.
+ */
+__attribute__((noinline))
+static void *
+zalloc_return(zone_t zone, zone_element_t ze, zalloc_flags_t flags,
+ vm_offset_t elem_size, zone_magazine_t freemag)
+{
+ vm_offset_t addr = zone_element_addr(ze, elem_size);
+
+#if KASAN_ZALLOC
+ if (zone->z_percpu) {
+ zpercpu_foreach_cpu(i) {
+ kasan_poison_range(addr + ptoa(i), elem_size,
+ ASAN_VALID);
+ }
} else {
- z->z_name = name;
+ kasan_poison_range(addr, elem_size, ASAN_VALID);
+ }
+#endif
+#if ZALLOC_ENABLE_POISONING
+ zalloc_validate_element(zone, addr, elem_size, zone_element_prot(ze));
+#endif /* ZALLOC_ENABLE_POISONING */
+#if ZONE_ENABLE_LOGGING || CONFIG_ZLEAKS
+ if (__improbable(zalloc_should_log_or_trace_leaks(zone, elem_size))) {
+ zalloc_log_or_trace_leaks(zone, addr, __builtin_frame_address(0));
+ }
+#endif /* ZONE_ENABLE_LOGGING || CONFIG_ZLEAKS */
+#if KASAN_ZALLOC
+ if (zone->z_kasan_redzone) {
+ addr = kasan_alloc(addr, elem_size,
+ elem_size - 2 * zone->z_kasan_redzone,
+ zone->z_kasan_redzone);
+ elem_size -= 2 * zone->z_kasan_redzone;
}
/*
- * If zone_init() hasn't run yet, the permanent zones do not exist.
- * We can limp along without properly initialized stats for a while,
- * zone_init() will rebuild the missing stats when it runs.
+ * Initialize buffer with unique pattern only if memory
+ * wasn't expected to be zeroed.
*/
- if (__probable(zone_array[ZONE_ID_PERCPU_PERMANENT].z_self)) {
- z->z_stats = zalloc_percpu_permanent_type(struct zone_stats);
+ if (!zone->z_free_zeroes && !(flags & Z_ZERO)) {
+ kasan_leak_init(addr, elem_size);
+ }
+#endif /* KASAN_ZALLOC */
+ if ((flags & Z_ZERO) && !zone->z_free_zeroes) {
+ bzero((void *)addr, elem_size);
}
- alloc = zone_get_min_alloc_granule(size, flags);
-
- if (flags & ZC_KALLOC_HEAP) {
- size_t rem = (alloc % size) / (alloc / size);
+#if VM_MAX_TAG_ZONES
+ if (__improbable(zone->tags)) {
+ vm_tag_t tag = zalloc_flags_get_tag(flags);
+ if (tag == VM_KERN_MEMORY_NONE) {
+ tag = VM_KERN_MEMORY_KALLOC;
+ }
+ // set the tag with b0 clear so the block remains inuse
+ *ztSlot(zone, addr) = (vm_tag_t)(tag << 1);
+ vm_tag_update_zone_size(tag, zone->tag_zone_index,
+ (long)elem_size);
+ }
+#endif /* VM_MAX_TAG_ZONES */
- /*
- * Try to grow the elements size and spread them more if the remaining
- * space is large enough.
- */
- size += rem & ~(KALLOC_MINALIGN - 1);
+ TRACE_MACHLEAKS(ZALLOC_CODE, ZALLOC_CODE_2, elem_size, addr);
+ DTRACE_VM2(zalloc, zone_t, zone, void*, addr);
+ if (freemag) {
+ zone_magazine_free(freemag);
}
+ return (void *)addr;
+}
- z->pcpu_elem_size = z->z_elem_size = (uint16_t)size;
- z->alloc_pages = (uint16_t)atop(alloc);
-#if KASAN_ZALLOC
- z->kasan_redzone = redzone;
- if (strncmp(name, "fakestack.", sizeof("fakestack.") - 1) == 0) {
- z->kasan_fakestacks = true;
+#if CONFIG_GZALLOC
+/*!
+ * @function zalloc_gz
+ *
+ * @brief
+ * Performs allocations for zones using gzalloc.
+ *
+ * @discussion
+ * This function is noinline so that it doesn't affect the codegen
+ * of the fastpath.
+ */
+__attribute__((noinline))
+static void *
+zalloc_gz(zone_t zone, zone_stats_t zstats, zalloc_flags_t flags)
+{
+ vm_offset_t addr = gzalloc_alloc(zone, zstats, flags);
+ return zalloc_return(zone, zone_element_encode(addr, 0, ZPM_AUTO),
+ flags, zone_elem_size(zone), NULL);
+}
+#endif /* CONFIG_GZALLOC */
+
+static void *
+zalloc_item_fast(zone_t zone, zone_stats_t zstats, zalloc_flags_t flags)
+{
+ vm_size_t esize = zone_elem_size(zone);
+ zone_element_t ze;
+
+ zalloc_import(zone, &ze, 1);
+ zone_elems_free_sub(zone, 1);
+ zpercpu_get(zstats)->zs_mem_allocated += esize;
+ zone_unlock(zone);
+
+ return zalloc_return(zone, ze, flags, esize, NULL);
+}
+
+/*!
+ * @function zalloc_item_slow
+ *
+ * @brief
+ * Performs allocations when the zone is out of elements.
+ *
+ * @discussion
+ * This function might drop the lock and reenable preemption,
+ * which means the per-CPU caching layer or recirculation depot
+ * might have received elements.
+ */
+__attribute__((noinline))
+static void *
+zalloc_item_slow(zone_t zone, zone_stats_t zstats, zalloc_flags_t flags)
+{
+ if (zone->z_replenishes) {
+ zone_replenish_locked(zone);
+ } else {
+ if ((flags & Z_NOWAIT) == 0) {
+ zone_expand_locked(zone, flags, zalloc_needs_refill);
+ }
+ if (flags & (Z_NOWAIT | Z_NOPAGEWAIT)) {
+ zone_expand_async_schedule_if_needed(zone);
+ }
+ if (__improbable(zone->z_elems_free == 0)) {
+ zone_unlock(zone);
+ if (__improbable(flags & Z_NOFAIL)) {
+ zone_nofail_panic(zone);
+ }
+ DTRACE_VM2(zalloc, zone_t, zone, void*, NULL);
+ return NULL;
+ }
}
-#endif
/*
- * Handle KPI flags
+ * We might have changed core or got preempted/blocked while expanding
+ * the zone. Allocating from the zone when the recirculation depot
+ * is not empty is not allowed.
+ *
+ * It will be rare but possible for the depot to refill while we were
+ * waiting for pages. If that happens we need to start over.
*/
-#if __LP64__
- if (flags & ZC_SEQUESTER) {
- z->va_sequester = true;
+ if (!STAILQ_EMPTY(&zone->z_recirc)) {
+ zone_unlock(zone);
+ return zalloc_ext(zone, zstats, flags);
}
-#endif
- /* ZC_CACHING applied after all configuration is done */
- if (flags & ZC_PERCPU) {
- /*
- * ZC_CACHING is disallowed because it uses per-cpu zones for its
- * implementation and it would be circular. These allocations are
- * also quite expensive, so caching feels dangerous memory wise too.
- *
- * ZC_ZFREE_CLEARMEM is forced because per-cpu zones allow for
- * pointer-sized allocations which poisoning doesn't support.
- */
- zone_create_assert_not_both(name, flags, ZC_PERCPU, ZC_CACHING);
- zone_create_assert_not_both(name, flags, ZC_PERCPU, ZC_ALLOW_FOREIGN);
- z->percpu = true;
- z->gzalloc_exempt = true;
- z->zfree_clear_mem = true;
- z->pcpu_elem_size *= zpercpu_count();
- }
- if (flags & ZC_ZFREE_CLEARMEM) {
- z->zfree_clear_mem = true;
- }
- if (flags & ZC_NOGC) {
- z->collectable = false;
- }
- if (flags & ZC_NOENCRYPT) {
- z->noencrypt = true;
- }
- if (flags & ZC_ALIGNMENT_REQUIRED) {
- z->alignment_required = true;
- }
- if (flags & ZC_NOGZALLOC) {
- z->gzalloc_exempt = true;
- }
- if (flags & ZC_NOCALLOUT) {
- z->no_callout = true;
- }
- if (flags & ZC_DESTRUCTIBLE) {
- zone_create_assert_not_both(name, flags, ZC_DESTRUCTIBLE, ZC_CACHING);
- zone_create_assert_not_both(name, flags, ZC_DESTRUCTIBLE, ZC_ALLOW_FOREIGN);
- z->destructible = true;
- }
+ return zalloc_item_fast(zone, zstats, flags);
+}
+
+/*!
+ * @function zalloc_item
+ *
+ * @brief
+ * Performs allocations when zone caching is off.
+ *
+ * @discussion
+ * This function calls @c zalloc_item_slow() when refilling the zone
+ * is needed, or @c zalloc_item_fast() if the zone has enough free elements.
+ */
+static void *
+zalloc_item(zone_t zone, zone_stats_t zstats, zalloc_flags_t flags)
+{
+ zone_lock_check_contention(zone, NULL);
/*
- * Handle Internal flags
+ * When we commited to the zalloc_item() path,
+ * zone caching might have been flipped/enabled.
+ *
+ * If we got preempted for long enough, the recirculation layer
+ * can have been populated, and allocating from the zone would be
+ * incorrect.
+ *
+ * So double check for this extremely rare race here.
*/
- if (flags & ZC_ALLOW_FOREIGN) {
- z->allows_foreign = true;
+ if (__improbable(!STAILQ_EMPTY(&zone->z_recirc))) {
+ zone_unlock(zone);
+ return zalloc_ext(zone, zstats, flags);
}
- if ((ZSECURITY_OPTIONS_SUBMAP_USER_DATA & zsecurity_options) &&
- (flags & ZC_DATA_BUFFERS)) {
- z->submap_idx = Z_SUBMAP_IDX_BAG_OF_BYTES_MAP;
+
+ if (__improbable(zone->z_elems_free <= zone->z_elems_rsv)) {
+ return zalloc_item_slow(zone, zstats, flags);
}
- if (flags & ZC_KASAN_NOQUARANTINE) {
- z->kasan_noquarantine = true;
+
+ return zalloc_item_fast(zone, zstats, flags);
+}
+
+static void *
+zalloc_cached_fast(zone_t zone, zone_stats_t zstats, zalloc_flags_t flags,
+ zone_cache_t cache, zone_magazine_t freemag)
+{
+ vm_offset_t esize = zone_elem_size(zone);
+ zone_element_t ze;
+ uint32_t index;
+
+ index = --cache->zc_alloc_cur;
+ if (index >= zc_mag_size()) {
+ zone_accounting_panic(zone, "zc_alloc_cur wrap around");
}
- /* ZC_KASAN_NOREDZONE already handled */
+ ze = cache->zc_alloc_elems[index];
+ cache->zc_alloc_elems[index].ze_value = 0;
- /*
- * Then if there's extra tuning, do it
- */
- if (extra_setup) {
- extra_setup(z);
+ zpercpu_get(zstats)->zs_mem_allocated += esize;
+ enable_preemption();
+
+ if (zone_meta_is_free(zone_meta_from_element(ze), ze)) {
+ zone_meta_double_free_panic(zone, ze, __func__);
}
- /*
- * Configure debugging features
- */
-#if CONFIG_GZALLOC
- gzalloc_zone_init(z); /* might set z->gzalloc_tracked */
-#endif
-#if ZONE_ENABLE_LOGGING
- if (!z->gzalloc_tracked && num_zones_logged < max_num_zones_to_log) {
- /*
- * Check for and set up zone leak detection if requested via boot-args.
- * might set z->zone_logging
- */
- zone_setup_logging(z);
+ return zalloc_return(zone, ze, flags, esize, freemag);
+}
+
+static void *
+zalloc_cached_from_depot(zone_t zone, zone_stats_t zstats, zalloc_flags_t flags,
+ zone_cache_t cache, zone_cache_t depot, zone_magazine_t mag)
+{
+ STAILQ_REMOVE_HEAD(&depot->zc_depot, zm_link);
+ if (depot->zc_depot_cur-- == 0) {
+ zone_accounting_panic(zone, "zc_depot_cur wrap-around");
}
-#endif /* ZONE_ENABLE_LOGGING */
-#if VM_MAX_TAG_ZONES
- if (!z->gzalloc_tracked && z->kalloc_heap && zone_tagging_on) {
- static int tag_zone_index;
- vm_offset_t esize = zone_elem_size(z);
- z->tags = true;
- z->tags_inline = (((page_size + esize - 1) / esize) <=
- (sizeof(uint32_t) / sizeof(uint16_t)));
- z->tag_zone_index = os_atomic_inc_orig(&tag_zone_index, relaxed);
- assert(z->tag_zone_index < VM_MAX_TAG_ZONES);
+ zone_depot_unlock_nopreempt(depot);
+
+ mag = zone_magazine_replace(&cache->zc_alloc_cur,
+ &cache->zc_alloc_elems, mag);
+
+ z_debug_assert(cache->zc_alloc_cur == zc_mag_size());
+ z_debug_assert(mag->zm_cur == 0);
+
+ if (zone == zc_magazine_zone) {
+ enable_preemption();
+ bzero(mag, zone_elem_size(zone));
+ return mag;
}
-#endif
+
+ return zalloc_cached_fast(zone, zstats, flags, cache, mag);
+}
+
+__attribute__((noinline))
+static void *
+zalloc_cached_slow(zone_t zone, zone_stats_t zstats, zalloc_flags_t flags,
+ zone_cache_t cache)
+{
+ zone_magazine_t mag = NULL;
+ struct zone_depot mags = STAILQ_HEAD_INITIALIZER(mags);
/*
- * Finally, fixup properties based on security policies, boot-args, ...
+ * Try to allocate from our local depot, if there's one.
*/
- if ((ZSECURITY_OPTIONS_SUBMAP_USER_DATA & zsecurity_options) &&
- z->kalloc_heap == KHEAP_ID_DATA_BUFFERS) {
- z->submap_idx = Z_SUBMAP_IDX_BAG_OF_BYTES_MAP;
- }
-#if __LP64__
- if ((ZSECURITY_OPTIONS_SEQUESTER & zsecurity_options) &&
- (flags & ZC_NOSEQUESTER) == 0 &&
- z->submap_idx == Z_SUBMAP_IDX_GENERAL_MAP) {
- z->va_sequester = true;
+ if (STAILQ_FIRST(&cache->zc_depot)) {
+ zone_depot_lock_nopreempt(cache);
+
+ if ((mag = STAILQ_FIRST(&cache->zc_depot)) != NULL) {
+ return zalloc_cached_from_depot(zone, zstats, flags,
+ cache, cache, mag);
+ }
+
+ zone_depot_unlock_nopreempt(cache);
}
-#endif
+
+ zone_lock_nopreempt_check_contention(zone, cache);
+
/*
- * Always clear zone elements smaller than a cacheline,
- * because it's pretty close to free.
+ * If the recirculation depot is empty, we'll need to import.
+ * The system is tuned for this to be extremely rare.
*/
- if (size <= zp_min_size) {
- z->zfree_clear_mem = true;
- }
- if (zp_factor != 0 && !z->zfree_clear_mem) {
- z->zp_count = zone_poison_count_init(z);
- }
+ if (__improbable(STAILQ_EMPTY(&zone->z_recirc))) {
+ uint16_t n_elems = zc_mag_size();
-#if CONFIG_ZCACHE
- if ((flags & ZC_NOCACHING) == 0) {
- /*
- * Append kalloc heap name to zone name (if zone is used by kalloc)
- */
- char temp_zone_name[MAX_ZONE_NAME] = "";
- snprintf(temp_zone_name, MAX_ZONE_NAME, "%s%s", zone_heap_name(z), z->z_name);
+ if (zone->z_elems_free < n_elems + zone->z_elems_rsv / 2 &&
+ os_sub_overflow(zone->z_elems_free,
+ zone->z_elems_rsv / 2, &n_elems)) {
+ n_elems = 0;
+ }
- /* Check if boot-arg specified it should have a cache */
- if (track_this_zone(temp_zone_name, cache_zone_name)) {
- flags |= ZC_CACHING;
- } else if (zcc_kalloc && z->kalloc_heap) {
- flags |= ZC_CACHING;
+ z_debug_assert(n_elems <= zc_mag_size());
+
+ if (__improbable(n_elems == 0)) {
+ /*
+ * If importing elements would deplete the zone,
+ * call zalloc_item_slow()
+ */
+ return zalloc_item_slow(zone, zstats, flags);
}
+
+ if (__improbable(zone_caching_disabled)) {
+ if (__improbable(zone_caching_disabled < 0)) {
+ /*
+ * In the first 10s after boot, mess with
+ * the scan position in order to make early
+ * allocations patterns less predictible.
+ */
+ zone_early_scramble_rr(zone, zstats);
+ }
+ return zalloc_item_fast(zone, zstats, flags);
+ }
+
+ zalloc_import(zone, cache->zc_alloc_elems, n_elems);
+
+ cache->zc_alloc_cur = n_elems;
+ zone_elems_free_sub(zone, n_elems);
+
+ zone_unlock_nopreempt(zone);
+
+ return zalloc_cached_fast(zone, zstats, flags, cache, NULL);
}
- if ((flags & ZC_CACHING) &&
- !z->tags && !z->zone_logging && !z->gzalloc_tracked) {
- zcache_init(z);
- }
-#endif /* CONFIG_ZCACHE */
- lock_zone(z);
- z->z_self = z;
- unlock_zone(z);
+ uint16_t n_mags = 0;
- return z;
+ /*
+ * If the recirculation depot has elements, then try to fill
+ * the local per-cpu depot to (1 / zc_recirc_denom)
+ */
+ do {
+ mag = STAILQ_FIRST(&zone->z_recirc);
+ STAILQ_REMOVE_HEAD(&zone->z_recirc, zm_link);
+ STAILQ_INSERT_TAIL(&mags, mag, zm_link);
+ n_mags++;
+
+ for (uint16_t i = 0; i < zc_mag_size(); i++) {
+ zone_element_t e = mag->zm_elems[i];
+
+ if (!zone_meta_mark_used(zone_meta_from_element(e), e)) {
+ zone_meta_double_free_panic(zone, e, __func__);
+ }
+ }
+ } while (!STAILQ_EMPTY(&zone->z_recirc) &&
+ zc_recirc_denom * n_mags * zc_mag_size() <= cache->zc_depot_max);
+
+ zone_elems_free_sub(zone, n_mags * zc_mag_size());
+ zone_counter_sub(zone, z_recirc_cur, n_mags);
+
+ zone_unlock_nopreempt(zone);
+
+ /*
+ * And then incorporate everything into our per-cpu layer.
+ */
+ mag = STAILQ_FIRST(&mags);
+ STAILQ_REMOVE_HEAD(&mags, zm_link);
+ mag = zone_magazine_replace(&cache->zc_alloc_cur,
+ &cache->zc_alloc_elems, mag);
+ z_debug_assert(cache->zc_alloc_cur == zc_mag_size());
+ z_debug_assert(mag->zm_cur == 0);
+
+ if (--n_mags > 0) {
+ zone_depot_lock_nopreempt(cache);
+ cache->zc_depot_cur += n_mags;
+ STAILQ_CONCAT(&cache->zc_depot, &mags);
+ zone_depot_unlock_nopreempt(cache);
+ }
+
+ return zalloc_cached_fast(zone, zstats, flags, cache, mag);
}
-__startup_func
-void
-zone_create_startup(struct zone_create_startup_spec *spec)
+/*!
+ * @function zalloc_cached
+ *
+ * @brief
+ * Performs allocations when zone caching is on.
+ *
+ * @discussion
+ * This function calls @c zalloc_cached_fast() when the caches have elements
+ * ready.
+ *
+ * Else it will call @c zalloc_cached_slow() so that the cache is refilled,
+ * which might switch to the @c zalloc_item_slow() track when the backing zone
+ * needs to be refilled.
+ */
+static void *
+zalloc_cached(zone_t zone, zone_stats_t zstats, zalloc_flags_t flags)
{
- *spec->z_var = zone_create_ext(spec->z_name, spec->z_size,
- spec->z_flags, spec->z_zid, spec->z_setup);
+ zone_cache_t cache;
+
+ disable_preemption();
+ cache = zpercpu_get(zone->z_pcpu_cache);
+
+ if (cache->zc_alloc_cur == 0) {
+ if (__improbable(cache->zc_free_cur == 0)) {
+ return zalloc_cached_slow(zone, zstats, flags, cache);
+ }
+ zone_cache_swap_magazines(cache);
+ }
+
+ return zalloc_cached_fast(zone, zstats, flags, cache, NULL);
}
-/*
- * The 4 first field of a zone_view and a zone alias, so that the zone_or_view_t
- * union works. trust but verify.
+/*!
+ * @function zalloc_ext
+ *
+ * @brief
+ * The core implementation of @c zalloc(), @c zalloc_flags(), @c zalloc_percpu().
*/
-#define zalloc_check_zov_alias(f1, f2) \
- static_assert(offsetof(struct zone, f1) == offsetof(struct zone_view, f2))
-zalloc_check_zov_alias(z_self, zv_zone);
-zalloc_check_zov_alias(z_stats, zv_stats);
-zalloc_check_zov_alias(z_name, zv_name);
-zalloc_check_zov_alias(z_views, zv_next);
-#undef zalloc_check_zov_alias
-
-__startup_func
-void
-zone_view_startup_init(struct zone_view_startup_spec *spec)
+void *
+zalloc_ext(zone_t zone, zone_stats_t zstats, zalloc_flags_t flags)
{
- struct kalloc_heap *heap = NULL;
- zone_view_t zv = spec->zv_view;
- zone_t z;
+ /*
+ * KASan uses zalloc() for fakestack, which can be called anywhere.
+ * However, we make sure these calls can never block.
+ */
+ assert(zone->kasan_fakestacks ||
+ ml_get_interrupts_enabled() ||
+ ml_is_quiescing() ||
+ debug_mode_active() ||
+ startup_phase < STARTUP_SUB_EARLY_BOOT);
- switch (spec->zv_heapid) {
- case KHEAP_ID_DEFAULT:
- heap = KHEAP_DEFAULT;
- break;
- case KHEAP_ID_DATA_BUFFERS:
- heap = KHEAP_DATA_BUFFERS;
- break;
- case KHEAP_ID_KEXT:
- heap = KHEAP_KEXT;
- break;
- default:
- heap = NULL;
+ /*
+ * Make sure Z_NOFAIL was not obviously misused
+ */
+ if (zone->z_replenishes) {
+ assert((flags & (Z_NOWAIT | Z_NOPAGEWAIT)) == 0);
+ } else if (flags & Z_NOFAIL) {
+ assert(!zone->exhaustible &&
+ (flags & (Z_NOWAIT | Z_NOPAGEWAIT)) == 0);
}
- if (heap) {
- z = kalloc_heap_zone_for_size(heap, spec->zv_size);
- assert(z);
- } else {
- z = spec->zv_zone;
- assert(spec->zv_size <= zone_elem_size(z));
+#if CONFIG_GZALLOC
+ if (__improbable(zone->gzalloc_tracked)) {
+ return zalloc_gz(zone, zstats, flags);
}
+#endif /* CONFIG_GZALLOC */
- zv->zv_zone = z;
- zv->zv_stats = zalloc_percpu_permanent_type(struct zone_stats);
- zv->zv_next = z->z_views;
- if (z->z_views == NULL && z->kalloc_heap == KHEAP_ID_NONE) {
- /*
- * count the raw view for zones not in a heap,
- * kalloc_heap_init() already counts it for its members.
- */
- zone_view_count += 2;
- } else {
- zone_view_count += 1;
+ if (zone->z_pcpu_cache) {
+ return zalloc_cached(zone, zstats, flags);
}
- z->z_views = zv;
+
+ return zalloc_item(zone, zstats, flags);
}
-zone_t
-zone_create(
- const char *name,
- vm_size_t size,
- zone_create_flags_t flags)
+void *
+zalloc(union zone_or_view zov)
{
- return zone_create_ext(name, size, flags, ZONE_ID_ANY, NULL);
+ return zalloc_flags(zov, Z_WAITOK);
}
-zone_t
-zinit(
- vm_size_t size, /* the size of an element */
- vm_size_t max, /* maximum memory to use */
- vm_size_t alloc __unused, /* allocation size */
- const char *name) /* a name for the zone */
+void *
+zalloc_noblock(union zone_or_view zov)
+{
+ return zalloc_flags(zov, Z_NOWAIT);
+}
+
+void *
+zalloc_flags(union zone_or_view zov, zalloc_flags_t flags)
{
- zone_t z = zone_create(name, size, ZC_DESTRUCTIBLE);
- zone_set_max(z, max);
- return z;
+ zone_t zone = zov.zov_view->zv_zone;
+ zone_stats_t zstats = zov.zov_view->zv_stats;
+ assert(!zone->z_percpu);
+ return zalloc_ext(zone, zstats, flags);
}
-void
-zdestroy(zone_t z)
+void *
+zalloc_percpu(union zone_or_view zov, zalloc_flags_t flags)
{
- unsigned int zindex = zone_index(z);
+ zone_t zone = zov.zov_view->zv_zone;
+ zone_stats_t zstats = zov.zov_view->zv_stats;
+ assert(zone->z_percpu);
+ return (void *)__zpcpu_mangle(zalloc_ext(zone, zstats, flags));
+}
- lock_zone(z);
+static void *
+_zalloc_permanent(zone_t zone, vm_size_t size, vm_offset_t mask)
+{
+ struct zone_page_metadata *page_meta;
+ vm_offset_t offs, addr;
+ zone_pva_t pva;
- if (!z->destructible || zone_caching_enabled(z) || z->allows_foreign) {
- panic("zdestroy: Zone %s%s isn't destructible",
- zone_heap_name(z), z->z_name);
- }
+ assert(ml_get_interrupts_enabled() ||
+ ml_is_quiescing() ||
+ debug_mode_active() ||
+ startup_phase < STARTUP_SUB_EARLY_BOOT);
- if (!z->z_self || z->expanding_no_vm_priv || z->expanding_vm_priv ||
- z->async_pending || z->waiting) {
- panic("zdestroy: Zone %s%s in an invalid state for destruction",
- zone_heap_name(z), z->z_name);
- }
+ size = (size + mask) & ~mask;
+ assert(size <= PAGE_SIZE);
-#if !KASAN_ZALLOC
- /*
- * Unset the valid bit. We'll hit an assert failure on further operations
- * on this zone, until zinit() is called again.
- *
- * Leave the zone valid for KASan as we will see zfree's on quarantined free
- * elements even after the zone is destroyed.
- */
- z->z_self = NULL;
-#endif
- z->destroyed = true;
- unlock_zone(z);
+ zone_lock(zone);
+ assert(zone->z_self == zone);
- /* Dump all the free elements */
- zone_drop_free_elements(z);
+ for (;;) {
+ pva = zone->z_pageq_partial;
+ while (!zone_pva_is_null(pva)) {
+ page_meta = zone_pva_to_meta(pva);
+ if (page_meta->zm_bump + size <= PAGE_SIZE) {
+ goto found;
+ }
+ pva = page_meta->zm_page_next;
+ }
-#if CONFIG_GZALLOC
- if (__improbable(z->gzalloc_tracked)) {
- /* If the zone is gzalloc managed dump all the elements in the free cache */
- gzalloc_empty_free_cache(z);
+ zone_expand_locked(zone, Z_WAITOK, NULL);
}
-#endif
-
- lock_zone(z);
- while (!zone_pva_is_null(z->pages_sequester)) {
- struct zone_page_metadata *page_meta;
- vm_offset_t free_addr;
-
- page_meta = zone_sequestered_page_get(z, &free_addr);
- unlock_zone(z);
- kmem_free(submap_for_zone(z), free_addr, ptoa(z->alloc_pages));
- lock_zone(z);
- }
+found:
+ offs = (uint16_t)((page_meta->zm_bump + mask) & ~mask);
+ page_meta->zm_bump = (uint16_t)(offs + size);
+ page_meta->zm_alloc_size += size;
+ zone->z_elems_free -= size;
+ zpercpu_get(zone->z_stats)->zs_mem_allocated += size;
-#if !KASAN_ZALLOC
- /* Assert that all counts are zero */
- if (z->countavail || z->countfree || zone_size_wired(z) ||
- z->allfree_page_count || z->sequester_page_count) {
- panic("zdestroy: Zone %s%s isn't empty at zdestroy() time",
- zone_heap_name(z), z->z_name);
+ if (page_meta->zm_alloc_size >= PAGE_SIZE - sizeof(vm_offset_t)) {
+ zone_meta_requeue(zone, &zone->z_pageq_full, page_meta);
}
- /* consistency check: make sure everything is indeed empty */
- assert(zone_pva_is_null(z->pages_any_free_foreign));
- assert(zone_pva_is_null(z->pages_all_used_foreign));
- assert(zone_pva_is_null(z->pages_all_free));
- assert(zone_pva_is_null(z->pages_intermediate));
- assert(zone_pva_is_null(z->pages_all_used));
- assert(zone_pva_is_null(z->pages_sequester));
-#endif
-
- unlock_zone(z);
-
- simple_lock(&all_zones_lock, &zone_locks_grp);
+ zone_unlock(zone);
- assert(!bitmap_test(zone_destroyed_bitmap, zindex));
- /* Mark the zone as empty in the bitmap */
- bitmap_set(zone_destroyed_bitmap, zindex);
- num_zones_in_use--;
- assert(num_zones_in_use > 0);
+ addr = offs + zone_pva_to_addr(pva);
- simple_unlock(&all_zones_lock);
+ DTRACE_VM2(zalloc, zone_t, zone, void*, addr);
+ return (void *)addr;
}
-#pragma mark zone (re)fill, jetsam
-
-/*
- * Dealing with zone allocations from the mach VM code.
- *
- * The implementation of the mach VM itself uses the zone allocator
- * for things like the vm_map_entry data structure. In order to prevent
- * an infinite recursion problem when adding more pages to a zone, zalloc
- * uses a replenish thread to refill the VM layer's zones before they have
- * too few remaining free entries. The reserved remaining free entries
- * guarantee that the VM routines can get entries from already mapped pages.
- *
- * In order for that to work, the amount of allocations in the nested
- * case have to be bounded. There are currently 2 replenish zones, and
- * if each needs 1 element of each zone to add a new page to itself, that
- * gives us a minumum reserve of 2 elements.
- *
- * There is also a deadlock issue with the zone garbage collection thread,
- * or any thread that is trying to free zone pages. While holding
- * the kernel's map lock they may need to allocate new VM map entries, hence
- * we need enough reserve to allow them to get past the point of holding the
- * map lock. After freeing that page, the GC thread will wait in drop_free_elements()
- * until the replenish threads can finish. Since there's only 1 GC thread at a time,
- * that adds a minimum of 1 to the reserve size.
- *
- * Since the minumum amount you can add to a zone is 1 page, we'll use 16K (from ARM)
- * as the refill size on all platforms.
- *
- * When a refill zone drops to half that available, i.e. REFILL_SIZE / 2,
- * zalloc_ext() will wake the replenish thread. The replenish thread runs
- * until at least REFILL_SIZE worth of free elements exist, before sleeping again.
- * In the meantime threads may continue to use the reserve until there are only REFILL_SIZE / 4
- * elements left. Below that point only the replenish threads themselves and the GC
- * thread may continue to use from the reserve.
- */
-static unsigned zone_replenish_loops;
-static unsigned zone_replenish_wakeups;
-static unsigned zone_replenish_wakeups_initiated;
-static unsigned zone_replenish_throttle_count;
-
-#define ZONE_REPLENISH_TARGET (16 * 1024)
-static unsigned zone_replenish_active = 0; /* count of zones currently replenishing */
-static unsigned zone_replenish_max_threads = 0;
+static void *
+_zalloc_permanent_large(size_t size, vm_offset_t mask)
+{
+ kern_return_t kr;
+ vm_offset_t addr;
-LCK_GRP_DECLARE(zone_replenish_lock_grp, "zone_replenish_lock");
-LCK_SPIN_DECLARE(zone_replenish_lock, &zone_replenish_lock_grp);
+ kr = kernel_memory_allocate(kernel_map, &addr, size, mask,
+ KMA_KOBJECT | KMA_PERMANENT | KMA_ZERO,
+ VM_KERN_MEMORY_KALLOC);
+ if (kr != 0) {
+ panic("zalloc_permanent: unable to allocate %zd bytes (%d)",
+ size, kr);
+ }
+ return (void *)addr;
+}
-__abortlike
-static void
-zone_replenish_panic(zone_t zone, kern_return_t kr)
+void *
+zalloc_permanent(vm_size_t size, vm_offset_t mask)
{
- panic_include_zprint = TRUE;
-#if CONFIG_ZLEAKS
- if ((zleak_state & ZLEAK_STATE_ACTIVE)) {
- panic_include_ztrace = TRUE;
- }
-#endif /* CONFIG_ZLEAKS */
- if (kr == KERN_NO_SPACE) {
- zone_t zone_largest = zone_find_largest();
- panic("zalloc: zone map exhausted while allocating from zone %s%s, "
- "likely due to memory leak in zone %s%s "
- "(%lu total bytes, %d elements allocated)",
- zone_heap_name(zone), zone->z_name,
- zone_heap_name(zone_largest), zone_largest->z_name,
- (unsigned long)zone_size_wired(zone_largest),
- zone_count_allocated(zone_largest));
+ if (size <= PAGE_SIZE) {
+ zone_t zone = &zone_array[ZONE_ID_PERMANENT];
+ return _zalloc_permanent(zone, size, mask);
}
- panic("zalloc: %s%s (%d elements) retry fail %d",
- zone_heap_name(zone), zone->z_name,
- zone_count_allocated(zone), kr);
+ return _zalloc_permanent_large(size, mask);
}
-static void
-zone_replenish_locked(zone_t z, zalloc_flags_t flags, bool asynchronously)
+void *
+zalloc_percpu_permanent(vm_size_t size, vm_offset_t mask)
{
- int kmaflags = KMA_KOBJECT | KMA_ZERO;
- vm_offset_t space, alloc_size;
- uint32_t retry = 0;
- kern_return_t kr;
+ zone_t zone = &zone_array[ZONE_ID_PERCPU_PERMANENT];
+ return (void *)__zpcpu_mangle(_zalloc_permanent(zone, size, mask));
+}
- if (z->noencrypt) {
- kmaflags |= KMA_NOENCRYPT;
- }
- if (flags & Z_NOPAGEWAIT) {
- kmaflags |= KMA_NOPAGEWAIT;
- }
- if (z->permanent) {
- kmaflags |= KMA_PERMANENT;
- }
+/*! @} */
+#endif /* !ZALLOC_TEST */
+#pragma mark zone GC / trimming
+#if !ZALLOC_TEST
- for (;;) {
- struct zone_page_metadata *page_meta = NULL;
+static thread_call_data_t zone_defrag_callout;
- /*
- * Try to allocate our regular chunk of pages,
- * unless the system is under massive pressure
- * and we're looking for more than 2 pages.
- */
- if (!z->percpu && z->alloc_pages > 2 && (vm_pool_low() || retry > 0)) {
- alloc_size = round_page(zone_elem_size(z));
- } else {
- alloc_size = ptoa(z->alloc_pages);
- page_meta = zone_sequestered_page_get(z, &space);
- }
+static void
+zone_reclaim_chunk(zone_t z, struct zone_page_metadata *meta, uint32_t free_count)
+{
+ vm_address_t page_addr;
+ vm_size_t size_to_free;
+ uint32_t bitmap_ref;
+ uint32_t page_count;
+ bool sequester = z->z_va_sequester && !z->z_destroyed;
- unlock_zone(z);
+ zone_meta_queue_pop_native(z, &z->z_pageq_empty, &page_addr);
-#if CONFIG_ZLEAKS
- /*
- * Do the zone leak activation here because zleak_activate()
- * may block, and can't be done on the way out.
- */
- if (__improbable(zleak_state & ZLEAK_STATE_ENABLED)) {
- if (!(zleak_state & ZLEAK_STATE_ACTIVE) &&
- zone_submaps_approx_size() >= zleak_global_tracking_threshold) {
- kr = zleak_activate();
- if (kr != KERN_SUCCESS) {
- printf("Failed to activate live zone leak debugging (%d).\n", kr);
- }
- }
- }
-#endif /* CONFIG_ZLEAKS */
+ page_count = meta->zm_chunk_len;
- /*
- * Trigger jetsams via the vm_pageout_garbage_collect thread if
- * we're running out of zone memory
- */
- if (is_zone_map_nearing_exhaustion()) {
- thread_wakeup((event_t) &vm_pageout_garbage_collect);
+ if (meta->zm_alloc_size) {
+ zone_metadata_corruption(z, meta, "alloc_size");
+ }
+ if (z->z_percpu) {
+ if (page_count != 1) {
+ zone_metadata_corruption(z, meta, "page_count");
}
-
- if (page_meta) {
- kr = zone_sequestered_page_populate(z, page_meta, space,
- alloc_size, kmaflags);
- } else {
- if (z->submap_idx == Z_SUBMAP_IDX_GENERAL_MAP && z->kalloc_heap != KHEAP_ID_NONE) {
- kmaflags |= KMA_KHEAP;
- }
- kr = kernel_memory_allocate(submap_for_zone(z),
- &space, alloc_size, 0, kmaflags, VM_KERN_MEMORY_ZONE);
+ size_to_free = ptoa(z->z_chunk_pages);
+ os_atomic_sub(&zones_phys_page_mapped_count,
+ z->z_chunk_pages, relaxed);
+ } else {
+ if (page_count > z->z_chunk_pages) {
+ zone_metadata_corruption(z, meta, "page_count");
}
-
-#if !__LP64__
- if (kr == KERN_NO_SPACE && z->allows_foreign) {
- /*
- * For zones allowing foreign pages, fallback to the kernel map
- */
- kr = kernel_memory_allocate(kernel_map, &space,
- alloc_size, 0, kmaflags, VM_KERN_MEMORY_ZONE);
+ if (page_count < z->z_chunk_pages) {
+ /* Dequeue non populated VA from z_pageq_va */
+ zone_meta_remqueue(z, meta + page_count);
}
-#endif
+ size_to_free = ptoa(page_count);
+ os_atomic_sub(&zones_phys_page_mapped_count, page_count, relaxed);
+ }
- if (kr == KERN_SUCCESS) {
- break;
- }
+ zone_counter_sub(z, z_elems_free, free_count);
+ zone_counter_sub(z, z_elems_avail, free_count);
+ zone_counter_sub(z, z_wired_empty, page_count);
+ zone_counter_sub(z, z_wired_cur, page_count);
+ if (z->z_elems_free_min < free_count) {
+ z->z_elems_free_min = 0;
+ } else {
+ z->z_elems_free_min -= free_count;
+ }
+ if (z->z_elems_free_max < free_count) {
+ z->z_elems_free_max = 0;
+ } else {
+ z->z_elems_free_max -= free_count;
+ }
- if (flags & Z_NOPAGEWAIT) {
- lock_zone(z);
- return;
+ bitmap_ref = 0;
+ if (sequester) {
+ if (meta->zm_inline_bitmap) {
+ for (int i = 0; i < meta->zm_chunk_len; i++) {
+ meta[i].zm_bitmap = 0;
+ }
+ } else {
+ bitmap_ref = meta->zm_bitmap;
+ meta->zm_bitmap = 0;
}
-
- if (asynchronously) {
- assert_wait_timeout(&z->prio_refill_count,
- THREAD_UNINT, 1, 100 * NSEC_PER_USEC);
- thread_block(THREAD_CONTINUE_NULL);
- } else if (++retry == 3) {
- zone_replenish_panic(z, kr);
+ meta->zm_chunk_len = 0;
+ } else {
+ if (!meta->zm_inline_bitmap) {
+ bitmap_ref = meta->zm_bitmap;
}
-
- lock_zone(z);
+ zone_counter_sub(z, z_va_cur, z->z_percpu ? 1 : z->z_chunk_pages);
+ bzero(meta, sizeof(*meta) * z->z_chunk_pages);
}
- zcram_and_lock(z, space, alloc_size);
+ zone_unlock(z);
-#if CONFIG_ZLEAKS
- if (__improbable(zleak_state & ZLEAK_STATE_ACTIVE)) {
- if (!z->zleak_on &&
- zone_size_wired(z) >= zleak_per_zone_tracking_threshold) {
- z->zleak_on = true;
- }
+ if (bitmap_ref) {
+ zone_bits_free(bitmap_ref);
}
-#endif /* CONFIG_ZLEAKS */
-}
-
-/*
- * High priority VM privileged thread used to asynchronously refill a given zone.
- * These are needed for data structures used by the lower level VM itself. The
- * replenish thread maintains a reserve of elements, so that the VM will never
- * block in the zone allocator.
- */
-__dead2
-static void
-zone_replenish_thread(void *_z, wait_result_t __unused wr)
-{
- zone_t z = _z;
- current_thread()->options |= (TH_OPT_VMPRIV | TH_OPT_ZONE_PRIV);
-
- for (;;) {
- lock_zone(z);
- assert(z->z_self == z);
- assert(z->zone_replenishing);
- assert(z->prio_refill_count != 0);
+ /* Free the pages for metadata and account for them */
+#if KASAN_ZALLOC
+ kasan_poison_range(page_addr, size_to_free, ASAN_VALID);
+#endif
+#if VM_MAX_TAG_ZONES
+ if (z->tags) {
+ ztMemoryRemove(z, page_addr, size_to_free);
+ }
+#endif /* VM_MAX_TAG_ZONES */
- while (z->countfree < z->prio_refill_count) {
- assert(!z->expanding_no_vm_priv);
- assert(!z->expanding_vm_priv);
+ if (sequester) {
+ kernel_memory_depopulate(zone_submap(z), page_addr,
+ size_to_free, KMA_KOBJECT, VM_KERN_MEMORY_ZONE);
+ } else {
+ kmem_free(zone_submap(z), page_addr, ptoa(z->z_chunk_pages));
+ }
- zone_replenish_locked(z, Z_WAITOK, true);
+ /*
+ * Freeing memory sometimes needs some (for example vm map entries
+ * to represent holes).
+ *
+ * If there are any active replenish threads, we need to let them work
+ * while we hold no locks. Only do so right after we just freed memory
+ * once however to give them even more chances to find fresh pages.
+ */
+ zone_replenish_wait_if_needed();
- assert(z->z_self == z);
- zone_replenish_loops++;
- }
+ thread_yield_to_preemption();
- /* Wakeup any potentially throttled allocations. */
- thread_wakeup(z);
+ zone_lock(z);
- assert_wait(&z->prio_refill_count, THREAD_UNINT);
+ if (sequester) {
+ zone_meta_queue_push(z, &z->z_pageq_va, meta);
+ }
+}
- /*
- * We finished refilling the zone, so decrement the active count
- * and wake up any waiting GC threads.
- */
- lck_spin_lock(&zone_replenish_lock);
- assert(zone_replenish_active > 0);
- if (--zone_replenish_active == 0) {
- thread_wakeup((event_t)&zone_replenish_active);
- }
- lck_spin_unlock(&zone_replenish_lock);
+static uint16_t
+zone_reclaim_elements(zone_t z, uint16_t *count, zone_element_t *elems)
+{
+ uint16_t n = *count;
- z->zone_replenishing = false;
- unlock_zone(z);
+ z_debug_assert(n <= zc_mag_size());
- thread_block(THREAD_CONTINUE_NULL);
- zone_replenish_wakeups++;
+ for (uint16_t i = 0; i < n; i++) {
+ zone_element_t ze = elems[i];
+ elems[i].ze_value = 0;
+ zfree_drop(z, zone_element_validate(z, ze), ze, false);
}
+
+ *count = 0;
+ return n;
}
-void
-zone_prio_refill_configure(zone_t z)
+static uint16_t
+zone_reclaim_recirc_magazine(zone_t z, struct zone_depot *mags)
{
- thread_t th;
- kern_return_t tres;
+ zone_magazine_t mag = STAILQ_FIRST(&z->z_recirc);
- lock_zone(z);
- assert(!z->prio_refill_count && !z->destructible);
- z->prio_refill_count = (uint16_t)(ZONE_REPLENISH_TARGET / zone_elem_size(z));
- z->zone_replenishing = true;
- unlock_zone(z);
+ STAILQ_REMOVE_HEAD(&z->z_recirc, zm_link);
+ STAILQ_INSERT_TAIL(mags, mag, zm_link);
+ zone_counter_sub(z, z_recirc_cur, 1);
- lck_spin_lock(&zone_replenish_lock);
- ++zone_replenish_max_threads;
- ++zone_replenish_active;
- lck_spin_unlock(&zone_replenish_lock);
- OSMemoryBarrier();
+ z_debug_assert(mag->zm_cur == zc_mag_size());
- tres = kernel_thread_start_priority(zone_replenish_thread, z,
- MAXPRI_KERNEL, &th);
- if (tres != KERN_SUCCESS) {
- panic("zone_prio_refill_configure, thread create: 0x%x", tres);
+ for (uint16_t i = 0; i < zc_mag_size(); i++) {
+ zone_element_t ze = mag->zm_elems[i];
+ mag->zm_elems[i].ze_value = 0;
+ zfree_drop(z, zone_element_validate(z, ze), ze, true);
}
- thread_deallocate(th);
+ mag->zm_cur = 0;
+
+ return zc_mag_size();
}
static void
-zone_randomize_freelist(zone_t zone, struct zone_page_metadata *meta,
- vm_offset_t size, zone_addr_kind_t kind, unsigned int *entropy_buffer)
-{
- const vm_size_t elem_size = zone_elem_size(zone);
- vm_offset_t left, right, head, base;
- vm_offset_t element;
-
- left = ZONE_PAGE_FIRST_OFFSET(kind);
- right = size - ((size - left) % elem_size);
- head = 0;
- base = zone_meta_to_addr(meta, kind);
-
- while (left < right) {
- if (zone_leaks_scan_enable || __improbable(zone->tags) ||
- random_bool_gen_bits(&zone_bool_gen, entropy_buffer, MAX_ENTROPY_PER_ZCRAM, 1)) {
- element = base + left;
- left += elem_size;
- } else {
- right -= elem_size;
- element = base + right;
- }
+zone_depot_trim(zone_cache_t zc, struct zone_depot *head)
+{
+ zone_magazine_t mag;
+
+ if (zc->zc_depot_cur == 0 ||
+ 2 * (zc->zc_depot_cur + 1) * zc_mag_size() <= zc->zc_depot_max) {
+ return;
+ }
- vm_offset_t *primary = (vm_offset_t *)element;
- vm_offset_t *backup = get_backup_ptr(elem_size, primary);
+ zone_depot_lock(zc);
- *primary = *backup = head ^ zp_nopoison_cookie;
- head = element;
+ while (zc->zc_depot_cur &&
+ 2 * (zc->zc_depot_cur + 1) * zc_mag_size() > zc->zc_depot_max) {
+ mag = STAILQ_FIRST(&zc->zc_depot);
+ STAILQ_REMOVE_HEAD(&zc->zc_depot, zm_link);
+ STAILQ_INSERT_TAIL(head, mag, zm_link);
+ zc->zc_depot_cur--;
}
- meta->zm_freelist_offs = (uint16_t)(head - base);
+ zone_depot_unlock(zc);
}
-/*
- * Cram the given memory into the specified zone. Update the zone page count accordingly.
+__enum_decl(zone_reclaim_mode_t, uint32_t, {
+ ZONE_RECLAIM_TRIM,
+ ZONE_RECLAIM_DRAIN,
+ ZONE_RECLAIM_DESTROY,
+});
+
+/*!
+ * @function zone_reclaim
+ *
+ * @brief
+ * Drains or trim the zone.
+ *
+ * @discussion
+ * Draining the zone will free it from all its elements.
+ *
+ * Trimming the zone tries to respect the working set size, and avoids draining
+ * the depot when it's not necessary.
+ *
+ * @param z The zone to reclaim from
+ * @param mode The purpose of this reclaim.
*/
static void
-zcram_and_lock(zone_t zone, vm_offset_t newmem, vm_size_t size)
+zone_reclaim(zone_t z, zone_reclaim_mode_t mode)
{
- unsigned int entropy_buffer[MAX_ENTROPY_PER_ZCRAM] = { 0 };
- struct zone_page_metadata *meta;
- zone_addr_kind_t kind;
- uint32_t pg_count = (uint32_t)atop(size);
- uint32_t zindex = zone_index(zone);
- uint32_t free_count;
- uint16_t empty_freelist_offs = PAGE_METADATA_EMPTY_FREELIST;
-
- /* Basic sanity checks */
- assert(zone != ZONE_NULL && newmem != (vm_offset_t)0);
- assert((newmem & PAGE_MASK) == 0);
- assert((size & PAGE_MASK) == 0);
+ struct zone_depot mags = STAILQ_HEAD_INITIALIZER(mags);
+ zone_magazine_t mag, tmp;
- KDBG(MACHDBG_CODE(DBG_MACH_ZALLOC, ZALLOC_ZCRAM) | DBG_FUNC_START,
- zindex, size);
+ zone_lock(z);
- kind = zone_addr_kind(newmem, size);
-#if DEBUG || DEVELOPMENT
- if (zalloc_debug & ZALLOC_DEBUG_ZCRAM) {
- kprintf("zcram(%p[%s%s], 0x%lx%s, 0x%lx)\n", zone,
- zone_heap_name(zone), zone->z_name, (uintptr_t)newmem,
- kind == ZONE_ADDR_FOREIGN ? "[F]" : "", (uintptr_t)size);
- }
-#endif /* DEBUG || DEVELOPMENT */
+ if (mode == ZONE_RECLAIM_DESTROY) {
+ if (!z->z_destructible || z->z_pcpu_cache ||
+ z->z_elems_rsv || z->z_allows_foreign) {
+ panic("zdestroy: Zone %s%s isn't destructible",
+ zone_heap_name(z), z->z_name);
+ }
- /*
- * Initialize the metadata for all pages. We dont need the zone lock
- * here because we are not manipulating any zone related state yet.
- *
- * This includes randomizing the freelists as the metadata isn't
- * published yet.
- */
+ if (!z->z_self || z->z_expander || z->z_expander_vm_priv ||
+ z->z_async_refilling || z->z_expanding_wait) {
+ panic("zdestroy: Zone %s%s in an invalid state for destruction",
+ zone_heap_name(z), z->z_name);
+ }
- if (kind == ZONE_ADDR_NATIVE) {
+#if !KASAN_ZALLOC
/*
- * We're being called by zfill,
- * zone_replenish_thread or vm_page_more_fictitious,
+ * Unset the valid bit. We'll hit an assert failure on further
+ * operations on this zone, until zinit() is called again.
*
- * which will only either allocate a single page, or `alloc_pages`
- * worth.
+ * Leave the zone valid for KASan as we will see zfree's on
+ * quarantined free elements even after the zone is destroyed.
*/
- assert(pg_count <= zone->alloc_pages);
-
+ z->z_self = NULL;
+#endif
+ z->z_destroyed = true;
+ } else if (z->z_destroyed) {
+ return zone_unlock(z);
+ } else if (z->z_replenishes && z->z_async_refilling) {
/*
- * Make sure the range of metadata entries we're about to init
- * have proper physical backing, then initialize them.
+ * If the zone is replenishing, leave it alone.
*/
- meta = zone_meta_from_addr(newmem, kind);
- zone_meta_populate(meta, meta + pg_count);
+ return zone_unlock(z);
+ }
- if (zone->permanent) {
- empty_freelist_offs = 0;
+ if (z->z_pcpu_cache) {
+ if (mode != ZONE_RECLAIM_TRIM) {
+ zpercpu_foreach(zc, z->z_pcpu_cache) {
+ zc->zc_depot_max /= 2;
+ }
+ } else {
+ zpercpu_foreach(zc, z->z_pcpu_cache) {
+ if (zc->zc_depot_max > 0) {
+ zc->zc_depot_max--;
+ }
+ }
}
- meta[0] = (struct zone_page_metadata){
- .zm_index = zindex,
- .zm_page_count = pg_count,
- .zm_percpu = zone->percpu,
- .zm_freelist_offs = empty_freelist_offs,
- };
+ zone_unlock(z);
- for (uint32_t i = 1; i < pg_count; i++) {
- meta[i] = (struct zone_page_metadata){
- .zm_index = zindex,
- .zm_page_count = i,
- .zm_percpu = zone->percpu,
- .zm_secondary_page = true,
- .zm_freelist_offs = empty_freelist_offs,
- };
+ if (mode == ZONE_RECLAIM_TRIM) {
+ zpercpu_foreach(zc, z->z_pcpu_cache) {
+ zone_depot_trim(zc, &mags);
+ }
+ } else {
+ zpercpu_foreach(zc, z->z_pcpu_cache) {
+ zone_depot_lock(zc);
+ STAILQ_CONCAT(&mags, &zc->zc_depot);
+ zc->zc_depot_cur = 0;
+ zone_depot_unlock(zc);
+ }
}
- if (!zone->permanent) {
- zone_randomize_freelist(zone, meta,
- zone->percpu ? PAGE_SIZE : size, kind, entropy_buffer);
+ zone_lock(z);
+
+ uint32_t freed = 0;
+
+ STAILQ_FOREACH(mag, &mags, zm_link) {
+ freed += zone_reclaim_elements(z,
+ &mag->zm_cur, mag->zm_elems);
+
+ if (freed >= zc_free_batch_size) {
+ z->z_elems_free_min += freed;
+ z->z_elems_free_max += freed;
+ z->z_elems_free += freed;
+ zone_unlock(z);
+ thread_yield_to_preemption();
+ zone_lock(z);
+ freed = 0;
+ }
}
- } else {
- if (!zone->allows_foreign || !from_foreign_range(newmem, size)) {
- panic("zcram_and_lock: foreign memory [%lx] being crammed is "
- "outside of foreign range", (uintptr_t)newmem);
+
+ if (mode == ZONE_RECLAIM_DESTROY) {
+ zpercpu_foreach(zc, z->z_pcpu_cache) {
+ freed += zone_reclaim_elements(z,
+ &zc->zc_alloc_cur, zc->zc_alloc_elems);
+ freed += zone_reclaim_elements(z,
+ &zc->zc_free_cur, zc->zc_free_elems);
+ }
+
+ z->z_elems_free_wss = 0;
+ z->z_elems_free_min = 0;
+ z->z_elems_free_max = 0;
+ z->z_contention_cur = 0;
+ z->z_contention_wma = 0;
+ } else {
+ z->z_elems_free_min += freed;
+ z->z_elems_free_max += freed;
+ }
+ z->z_elems_free += freed;
+ }
+
+ for (;;) {
+ struct zone_page_metadata *meta;
+ uint32_t count, goal, freed = 0;
+
+ goal = z->z_elems_rsv;
+ if (mode == ZONE_RECLAIM_TRIM) {
+ /*
+ * When trimming, only free elements in excess
+ * of the working set estimate.
+ *
+ * However if we are in a situation where the working
+ * set estimate is clearly growing, ignore the estimate
+ * as the next working set update will grow it and
+ * we want to avoid churn.
+ */
+ goal = MAX(goal, MAX(z->z_elems_free_wss,
+ z->z_elems_free - z->z_elems_free_min));
+
+ /*
+ * Add some slop to account for "the last partial chunk in flight"
+ * so that we do not deplete the recirculation depot too harshly.
+ */
+ goal += z->z_chunk_elems / 2;
+ }
+
+ if (z->z_elems_free <= goal) {
+ break;
}
/*
- * We cannot support elements larger than page size for foreign
- * memory because we put metadata on the page itself for each
- * page of foreign memory.
+ * If we're above target, but we have no free page, then drain
+ * the recirculation depot until we get a free chunk or exhaust
+ * the depot.
*
- * We need to do this in order to be able to reach the metadata
- * when any element is freed.
+ * This is rather abrupt but also somehow will reduce
+ * fragmentation anyway, and the zone code will import
+ * over time anyway.
*/
- assert(!zone->percpu && !zone->permanent);
- assert(zone_elem_size(zone) <= PAGE_SIZE - sizeof(struct zone_page_metadata));
+ while (z->z_recirc_cur) {
+ if (z->z_recirc_cur * zc_mag_size() <= goal &&
+ !zone_pva_is_null(z->z_pageq_empty)) {
+ break;
+ }
+ if (freed >= zc_free_batch_size) {
+ zone_unlock(z);
+ thread_yield_to_preemption();
+ zone_lock(z);
+ freed = 0;
+ /* we dropped the lock, needs to reassess */
+ continue;
+ }
+ freed += zone_reclaim_recirc_magazine(z, &mags);
+ }
- bzero((void *)newmem, size);
+ if (zone_pva_is_null(z->z_pageq_empty)) {
+ break;
+ }
- for (vm_offset_t offs = 0; offs < size; offs += PAGE_SIZE) {
- meta = (struct zone_page_metadata *)(newmem + offs);
- *meta = (struct zone_page_metadata){
- .zm_index = zindex,
- .zm_page_count = 1,
- .zm_freelist_offs = empty_freelist_offs,
- };
- meta->zm_foreign_cookie[0] = ZONE_FOREIGN_COOKIE;
- zone_randomize_freelist(zone, meta, PAGE_SIZE, kind,
- entropy_buffer);
+ meta = zone_pva_to_meta(z->z_pageq_empty);
+ count = (uint32_t)ptoa(meta->zm_chunk_len) / zone_elem_size(z);
+
+ if (z->z_elems_free - count < goal) {
+ break;
}
+
+ zone_reclaim_chunk(z, meta, count);
}
-#if VM_MAX_TAG_ZONES
- if (__improbable(zone->tags)) {
- assert(kind == ZONE_ADDR_NATIVE && !zone->percpu);
- ztMemoryAdd(zone, newmem, size);
+ zone_unlock(z);
+
+ STAILQ_FOREACH_SAFE(mag, &mags, zm_link, tmp) {
+ zone_magazine_free(mag);
}
-#endif /* VM_MAX_TAG_ZONES */
+}
+static void
+zone_reclam_all(zone_reclaim_mode_t mode)
+{
/*
- * Insert the initialized pages / metadatas into the right lists.
+ * Start with zones with VA sequester since depopulating
+ * pages will not need to allocate vm map entries for holes,
+ * which will give memory back to the system faster.
*/
-
- lock_zone(zone);
- assert(zone->z_self == zone);
-
- zone->page_count += pg_count;
- if (zone->page_count_hwm < zone->page_count) {
- zone->page_count_hwm = zone->page_count;
+ zone_foreach(z) {
+ if (z == zc_magazine_zone) {
+ continue;
+ }
+ if (z->z_va_sequester && z->collectable) {
+ zone_reclaim(z, mode);
+ }
}
- os_atomic_add(&zones_phys_page_count, pg_count, relaxed);
- if (kind == ZONE_ADDR_NATIVE) {
- os_atomic_add(&zones_phys_page_mapped_count, pg_count, relaxed);
- if (zone->permanent) {
- zone_meta_queue_push(zone, &zone->pages_intermediate, meta, kind);
- } else {
- zone_meta_queue_push(zone, &zone->pages_all_free, meta, kind);
- zone->allfree_page_count += meta->zm_page_count;
+ zone_foreach(z) {
+ if (z == zc_magazine_zone) {
+ continue;
}
- free_count = zone_elem_count(zone, size, kind);
- zone->countfree += free_count;
- zone->countavail += free_count;
- } else {
- free_count = zone_elem_count(zone, PAGE_SIZE, kind);
- for (vm_offset_t offs = 0; offs < size; offs += PAGE_SIZE) {
- meta = (struct zone_page_metadata *)(newmem + offs);
- zone_meta_queue_push(zone, &zone->pages_any_free_foreign, meta, kind);
- zone->countfree += free_count;
- zone->countavail += free_count;
+ if (!z->z_va_sequester && z->collectable) {
+ zone_reclaim(z, mode);
}
}
- KDBG(MACHDBG_CODE(DBG_MACH_ZALLOC, ZALLOC_ZCRAM) | DBG_FUNC_END, zindex);
+ zone_reclaim(zc_magazine_zone, mode);
}
void
-zcram(zone_t zone, vm_offset_t newmem, vm_size_t size)
-{
- zcram_and_lock(zone, newmem, size);
- unlock_zone(zone);
-}
-
-/*
- * Fill a zone with enough memory to contain at least nelem elements.
- * Return the number of elements actually put into the zone, which may
- * be more than the caller asked for since the memory allocation is
- * rounded up to the next zone allocation size.
- */
-int
-zfill(
- zone_t zone,
- int nelem)
+zone_gc(zone_gc_level_t level)
{
- kern_return_t kr;
- vm_offset_t memory;
-
- vm_size_t alloc_size = ptoa(zone->alloc_pages);
- vm_size_t nalloc_inc = zone_elem_count(zone, alloc_size, ZONE_ADDR_NATIVE);
- vm_size_t nalloc = 0, goal = MAX(0, nelem);
- int kmaflags = KMA_KOBJECT | KMA_ZERO;
-
- if (zone->noencrypt) {
- kmaflags |= KMA_NOENCRYPT;
- }
-
- assert(!zone->allows_foreign && !zone->permanent);
+ zone_reclaim_mode_t mode;
- /*
- * Trigger jetsams via the vm_pageout_garbage_collect thread if we're
- * running out of zone memory
- */
- if (is_zone_map_nearing_exhaustion()) {
- thread_wakeup((event_t) &vm_pageout_garbage_collect);
+ switch (level) {
+ case ZONE_GC_TRIM:
+ mode = ZONE_RECLAIM_TRIM;
+ break;
+ case ZONE_GC_DRAIN:
+ mode = ZONE_RECLAIM_DRAIN;
+ break;
+ case ZONE_GC_JETSAM:
+ kill_process_in_largest_zone();
+ mode = ZONE_RECLAIM_TRIM;
+ break;
}
- if (zone->va_sequester) {
- lock_zone(zone);
-
- do {
- struct zone_page_metadata *page_meta;
- page_meta = zone_sequestered_page_get(zone, &memory);
- if (NULL == page_meta) {
- break;
- }
- unlock_zone(zone);
-
- kr = zone_sequestered_page_populate(zone, page_meta,
- memory, alloc_size, kmaflags);
- if (KERN_SUCCESS != kr) {
- goto out_nolock;
- }
-
- zcram_and_lock(zone, memory, alloc_size);
- nalloc += nalloc_inc;
- } while (nalloc < goal);
-
- unlock_zone(zone);
- }
+ current_thread()->options |= TH_OPT_ZONE_PRIV;
+ lck_mtx_lock(&zone_gc_lock);
-out_nolock:
- while (nalloc < goal) {
- kr = kernel_memory_allocate(submap_for_zone(zone), &memory,
- alloc_size, 0, kmaflags, VM_KERN_MEMORY_ZONE);
- if (kr != KERN_SUCCESS) {
- printf("%s: kernel_memory_allocate() of %lu bytes failed\n",
- __func__, (unsigned long)(nalloc * alloc_size));
- break;
- }
+ zone_reclam_all(mode);
- zcram(zone, memory, alloc_size);
- nalloc += nalloc_inc;
+ if (level == ZONE_GC_JETSAM && zone_map_nearing_exhaustion()) {
+ /*
+ * If we possibly killed a process, but we're still critical,
+ * we need to drain harder.
+ */
+ zone_reclam_all(ZONE_RECLAIM_DRAIN);
}
- return (int)nalloc;
+ lck_mtx_unlock(&zone_gc_lock);
+ current_thread()->options &= ~TH_OPT_ZONE_PRIV;
}
-/*
- * We're being very conservative here and picking a value of 95%. We might need to lower this if
- * we find that we're not catching the problem and are still hitting zone map exhaustion panics.
- */
-#define ZONE_MAP_JETSAM_LIMIT_DEFAULT 95
-
-/*
- * Trigger zone-map-exhaustion jetsams if the zone map is X% full, where X=zone_map_jetsam_limit.
- * Can be set via boot-arg "zone_map_jetsam_limit". Set to 95% by default.
- */
-TUNABLE_WRITEABLE(unsigned int, zone_map_jetsam_limit, "zone_map_jetsam_limit",
- ZONE_MAP_JETSAM_LIMIT_DEFAULT);
-
void
-get_zone_map_size(uint64_t *current_size, uint64_t *capacity)
+zone_gc_trim(void)
{
- vm_offset_t phys_pages = os_atomic_load(&zones_phys_page_mapped_count, relaxed);
- *current_size = ptoa_64(phys_pages);
- *capacity = zone_phys_mapped_max;
+ zone_gc(ZONE_GC_TRIM);
}
void
-get_largest_zone_info(char *zone_name, size_t zone_name_len, uint64_t *zone_size)
+zone_gc_drain(void)
{
- zone_t largest_zone = zone_find_largest();
-
- /*
- * Append kalloc heap name to zone name (if zone is used by kalloc)
- */
- snprintf(zone_name, zone_name_len, "%s%s",
- zone_heap_name(largest_zone), largest_zone->z_name);
-
- *zone_size = zone_size_wired(largest_zone);
+ zone_gc(ZONE_GC_DRAIN);
}
-boolean_t
-is_zone_map_nearing_exhaustion(void)
+static bool
+zone_defrag_needed(zone_t z)
{
- vm_offset_t phys_pages = os_atomic_load(&zones_phys_page_mapped_count, relaxed);
- return ptoa_64(phys_pages) > (zone_phys_mapped_max * zone_map_jetsam_limit) / 100;
-}
-
+ uint32_t recirc_size = z->z_recirc_cur * zc_mag_size();
-#define VMENTRY_TO_VMOBJECT_COMPARISON_RATIO 98
+ if (recirc_size <= z->z_chunk_elems / 2) {
+ return false;
+ }
+ return recirc_size * zc_defrag_ratio > z->z_elems_free_wss * 100;
+}
-/*
- * Tries to kill a single process if it can attribute one to the largest zone. If not, wakes up the memorystatus thread
- * to walk through the jetsam priority bands and kill processes.
+/*!
+ * @function zone_defrag_async
+ *
+ * @brief
+ * Resize the recirculation depot to match the working set size.
+ *
+ * @discussion
+ * When zones grow very large due to a spike in usage, and then some of those
+ * elements get freed, the elements in magazines in the recirculation depot
+ * are in no particular order.
+ *
+ * In order to control fragmentation, we need to detect "empty" pages so that
+ * they get onto the @c z_pageq_empty freelist, so that allocations re-pack
+ * naturally.
+ *
+ * This is done very gently, never in excess of the working set and some slop.
*/
static void
-kill_process_in_largest_zone(void)
+zone_defrag_async(__unused thread_call_param_t p0, __unused thread_call_param_t p1)
{
- pid_t pid = -1;
- zone_t largest_zone = zone_find_largest();
+ zone_foreach(z) {
+ struct zone_depot mags = STAILQ_HEAD_INITIALIZER(mags);
+ zone_magazine_t mag, tmp;
+ uint32_t freed = 0, goal = 0;
- printf("zone_map_exhaustion: Zone mapped %lld of %lld, used %lld, map size %lld, capacity %lld [jetsam limit %d%%]\n",
- ptoa_64(os_atomic_load(&zones_phys_page_mapped_count, relaxed)), ptoa_64(zone_phys_mapped_max),
- ptoa_64(os_atomic_load(&zones_phys_page_count, relaxed)),
- (uint64_t)zone_submaps_approx_size(),
- (uint64_t)zone_range_size(&zone_info.zi_map_range),
- zone_map_jetsam_limit);
- printf("zone_map_exhaustion: Largest zone %s%s, size %lu\n", zone_heap_name(largest_zone),
- largest_zone->z_name, (uintptr_t)zone_size_wired(largest_zone));
+ if (!z->collectable || !zone_defrag_needed(z)) {
+ continue;
+ }
- /*
- * We want to make sure we don't call this function from userspace.
- * Or we could end up trying to synchronously kill the process
- * whose context we're in, causing the system to hang.
- */
- assert(current_task() == kernel_task);
+ zone_lock(z);
- /*
- * If vm_object_zone is the largest, check to see if the number of
- * elements in vm_map_entry_zone is comparable.
- *
- * If so, consider vm_map_entry_zone as the largest. This lets us target
- * a specific process to jetsam to quickly recover from the zone map
- * bloat.
- */
- if (largest_zone == vm_object_zone) {
- unsigned int vm_object_zone_count = zone_count_allocated(vm_object_zone);
- unsigned int vm_map_entry_zone_count = zone_count_allocated(vm_map_entry_zone);
- /* Is the VM map entries zone count >= 98% of the VM objects zone count? */
- if (vm_map_entry_zone_count >= ((vm_object_zone_count * VMENTRY_TO_VMOBJECT_COMPARISON_RATIO) / 100)) {
- largest_zone = vm_map_entry_zone;
- printf("zone_map_exhaustion: Picking VM map entries as the zone to target, size %lu\n",
- (uintptr_t)zone_size_wired(largest_zone));
+ goal = z->z_elems_free_wss + z->z_chunk_elems / 2 +
+ zc_mag_size() - 1;
+
+ while (z->z_recirc_cur * zc_mag_size() > goal) {
+ if (freed >= zc_free_batch_size) {
+ zone_unlock(z);
+ thread_yield_to_preemption();
+ zone_lock(z);
+ freed = 0;
+ /* we dropped the lock, needs to reassess */
+ continue;
+ }
+ freed += zone_reclaim_recirc_magazine(z, &mags);
}
- }
- /* TODO: Extend this to check for the largest process in other zones as well. */
- if (largest_zone == vm_map_entry_zone) {
- pid = find_largest_process_vm_map_entries();
- } else {
- printf("zone_map_exhaustion: Nothing to do for the largest zone [%s%s]. "
- "Waking up memorystatus thread.\n", zone_heap_name(largest_zone),
- largest_zone->z_name);
- }
- if (!memorystatus_kill_on_zone_map_exhaustion(pid)) {
- printf("zone_map_exhaustion: Call to memorystatus failed, victim pid: %d\n", pid);
+ zone_unlock(z);
+
+ STAILQ_FOREACH_SAFE(mag, &mags, zm_link, tmp) {
+ zone_magazine_free(mag);
+ }
}
}
-#pragma mark zalloc module init
-
-/*
- * Initialize the "zone of zones" which uses fixed memory allocated
- * earlier in memory initialization. zone_bootstrap is called
- * before zone_init.
- */
-__startup_func
void
-zone_bootstrap(void)
+compute_zone_working_set_size(__unused void *param)
{
- /* Validate struct zone_page_metadata expectations */
- if ((1U << ZONE_PAGECOUNT_BITS) <
- atop(ZONE_MAX_ALLOC_SIZE) * sizeof(struct zone_page_metadata)) {
- panic("ZONE_PAGECOUNT_BITS is not large enough to hold page counts");
- }
+ uint32_t zc_auto = zc_auto_threshold;
+ bool kick_defrag = false;
- /* Validate struct zone_packed_virtual_address expectations */
- static_assert((intptr_t)VM_MIN_KERNEL_ADDRESS < 0, "the top bit must be 1");
- if (VM_KERNEL_POINTER_SIGNIFICANT_BITS - PAGE_SHIFT > 31) {
- panic("zone_pva_t can't pack a kernel page address in 31 bits");
+ /*
+ * Keep zone caching disabled until the first proc is made.
+ */
+ if (__improbable(zone_caching_disabled < 0)) {
+ return;
}
- zpercpu_early_count = ml_early_cpu_max_number() + 1;
-
- /* Set up zone element poisoning */
- zp_bootstrap();
+ zone_caching_disabled = vm_pool_low();
+#if ZALLOC_EARLY_GAPS
+ zone_cleanup_early_gaps_if_needed();
+#endif
- random_bool_init(&zone_bool_gen);
+ if (os_mul_overflow(zc_auto, Z_CONTENTION_WMA_UNIT, &zc_auto)) {
+ zc_auto = 0;
+ }
- /*
- * the KASAN quarantine for kalloc doesn't understand heaps
- * and trips the heap confusion panics. At the end of the day,
- * all these security measures are double duty with KASAN.
- *
- * On 32bit kernels, these protections are just too expensive.
- */
-#if !defined(__LP64__) || KASAN_ZALLOC
- zsecurity_options &= ~ZSECURITY_OPTIONS_SEQUESTER;
- zsecurity_options &= ~ZSECURITY_OPTIONS_SUBMAP_USER_DATA;
- zsecurity_options &= ~ZSECURITY_OPTIONS_SEQUESTER_KEXT_KALLOC;
-#endif
+ zone_foreach(z) {
+ uint32_t wma;
+ bool needs_caching = false;
- thread_call_setup(&call_async_alloc, zalloc_async, NULL);
+ if (z->z_self != z) {
+ continue;
+ }
-#if CONFIG_ZCACHE
- /* zcc_enable_for_zone_name=<zone>: enable per-cpu zone caching for <zone>. */
- if (PE_parse_boot_arg_str("zcc_enable_for_zone_name", cache_zone_name, sizeof(cache_zone_name))) {
- printf("zcache: caching enabled for zone %s\n", cache_zone_name);
- }
-#endif /* CONFIG_ZCACHE */
-}
+ zone_lock(z);
-#if __LP64__
-#if CONFIG_EMBEDDED
-#define ZONE_MAP_VIRTUAL_SIZE_LP64 (32ULL * 1024ULL * 1024 * 1024)
-#else
-#define ZONE_MAP_VIRTUAL_SIZE_LP64 (128ULL * 1024ULL * 1024 * 1024)
-#endif
-#endif /* __LP64__ */
+ wma = z->z_elems_free_max - z->z_elems_free_min;
+ wma = (3 * wma + z->z_elems_free_wss) / 4;
+ z->z_elems_free_max = z->z_elems_free_min = z->z_elems_free;
+ z->z_elems_free_wss = wma;
-#define SINGLE_GUARD 16384
-#define MULTI_GUARD (3 * SINGLE_GUARD)
+ if (!kick_defrag && zone_defrag_needed(z)) {
+ kick_defrag = true;
+ }
-#if __LP64__
-static inline vm_offset_t
-zone_restricted_va_max(void)
-{
- vm_offset_t compressor_max = VM_PACKING_MAX_PACKABLE(C_SLOT_PACKED_PTR);
- vm_offset_t vm_page_max = VM_PACKING_MAX_PACKABLE(VM_PAGE_PACKED_PTR);
+ /* fixed point decimal of contentions per second */
+ wma = z->z_contention_cur * Z_CONTENTION_WMA_UNIT /
+ ZONE_WSS_UPDATE_PERIOD;
+ z->z_contention_cur = 0;
+ z->z_contention_wma = (3 * wma + z->z_contention_wma) / 4;
- return trunc_page(MIN(compressor_max, vm_page_max));
-}
-#endif
+ /*
+ * If the zone seems to be very quiet,
+ * gently lower its cpu-local depot size.
+ */
+ if (z->z_pcpu_cache && wma < Z_CONTENTION_WMA_UNIT / 2 &&
+ z->z_contention_wma < Z_CONTENTION_WMA_UNIT / 2) {
+ zpercpu_foreach(zc, z->z_pcpu_cache) {
+ if (zc->zc_depot_max > zc_mag_size()) {
+ zc->zc_depot_max--;
+ }
+ }
+ }
-__startup_func
-static void
-zone_tunables_fixup(void)
-{
- if (zone_map_jetsam_limit == 0 || zone_map_jetsam_limit > 100) {
- zone_map_jetsam_limit = ZONE_MAP_JETSAM_LIMIT_DEFAULT;
- }
-}
-STARTUP(TUNABLES, STARTUP_RANK_MIDDLE, zone_tunables_fixup);
+ /*
+ * If the zone has been contending like crazy for two periods,
+ * and is eligible, maybe it's time to enable caching.
+ */
+ if (!z->z_nocaching && !z->z_pcpu_cache && !z->exhaustible &&
+ zc_auto && z->z_contention_wma >= zc_auto && wma >= zc_auto) {
+ needs_caching = true;
+ }
-__startup_func
-static vm_size_t
-zone_phys_size_max(void)
-{
- mach_vm_size_t zsize;
- vm_size_t zsizearg;
+ zone_unlock(z);
- if (PE_parse_boot_argn("zsize", &zsizearg, sizeof(zsizearg))) {
- zsize = zsizearg * (1024ULL * 1024);
- } else {
- zsize = sane_size >> 2; /* Set target zone size as 1/4 of physical memory */
-#if defined(__LP64__)
- zsize += zsize >> 1;
-#endif /* __LP64__ */
+ if (needs_caching) {
+ zone_enable_caching(z);
+ }
}
- if (zsize < CONFIG_ZONE_MAP_MIN) {
- zsize = CONFIG_ZONE_MAP_MIN; /* Clamp to min */
- }
- if (zsize > sane_size >> 1) {
- zsize = sane_size >> 1; /* Clamp to half of RAM max */
- }
- if (zsizearg == 0 && zsize > ZONE_MAP_MAX) {
- /* if zsize boot-arg not present and zsize exceeds platform maximum, clip zsize */
- vm_size_t orig_zsize = zsize;
- zsize = ZONE_MAP_MAX;
- printf("NOTE: zonemap size reduced from 0x%lx to 0x%lx\n",
- (uintptr_t)orig_zsize, (uintptr_t)zsize);
+ if (kick_defrag) {
+ thread_call_enter(&zone_defrag_callout);
}
-
- assert((vm_size_t) zsize == zsize);
- return (vm_size_t)trunc_page(zsize);
}
-__startup_func
-static struct zone_map_range
-zone_init_allocate_va(vm_offset_t *submap_min, vm_size_t size, bool guard)
+#endif /* !ZALLOC_TEST */
+#pragma mark vm integration, MIG routines
+#if !ZALLOC_TEST
+
+/*
+ * Creates a vm_map_copy_t to return to the caller of mach_* MIG calls
+ * requesting zone information.
+ * Frees unused pages towards the end of the region, and zero'es out unused
+ * space on the last page.
+ */
+static vm_map_copy_t
+create_vm_map_copy(
+ vm_offset_t start_addr,
+ vm_size_t total_size,
+ vm_size_t used_size)
{
- struct zone_map_range r;
- kern_return_t kr;
+ kern_return_t kr;
+ vm_offset_t end_addr;
+ vm_size_t free_size;
+ vm_map_copy_t copy;
- if (guard) {
- vm_map_offset_t addr = *submap_min;
- vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
+ if (used_size != total_size) {
+ end_addr = start_addr + used_size;
+ free_size = total_size - (round_page(end_addr) - start_addr);
- vmk_flags.vmkf_permanent = TRUE;
- kr = vm_map_enter(kernel_map, &addr, size, 0,
- VM_FLAGS_FIXED, vmk_flags, VM_KERN_MEMORY_ZONE, kernel_object,
- 0, FALSE, VM_PROT_NONE, VM_PROT_NONE, VM_INHERIT_DEFAULT);
- *submap_min = (vm_offset_t)addr;
- } else {
- kr = kernel_memory_allocate(kernel_map, submap_min, size,
- 0, KMA_KOBJECT | KMA_PAGEABLE | KMA_VAONLY, VM_KERN_MEMORY_ZONE);
- }
- if (kr != KERN_SUCCESS) {
- panic("zone_init_allocate_va(0x%lx:0x%zx) failed: %d",
- (uintptr_t)*submap_min, (size_t)size, kr);
+ if (free_size >= PAGE_SIZE) {
+ kmem_free(ipc_kernel_map,
+ round_page(end_addr), free_size);
+ }
+ bzero((char *) end_addr, round_page(end_addr) - end_addr);
}
- r.min_address = *submap_min;
- *submap_min += size;
- r.max_address = *submap_min;
+ kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)start_addr,
+ (vm_map_size_t)used_size, TRUE, ©);
+ assert(kr == KERN_SUCCESS);
- return r;
+ return copy;
}
-__startup_func
-static void
-zone_submap_init(
- vm_offset_t *submap_min,
- unsigned idx,
- uint64_t zone_sub_map_numer,
- uint64_t *remaining_denom,
- vm_offset_t *remaining_size,
- vm_size_t guard_size)
+static boolean_t
+get_zone_info(
+ zone_t z,
+ mach_zone_name_t *zn,
+ mach_zone_info_t *zi)
{
- vm_offset_t submap_start, submap_end;
- vm_size_t submap_size;
- vm_map_t submap;
- kern_return_t kr;
-
- submap_size = trunc_page(zone_sub_map_numer * *remaining_size /
- *remaining_denom);
- submap_start = *submap_min;
- submap_end = submap_start + submap_size;
+ struct zone zcopy;
+ vm_size_t cached = 0;
-#if defined(__LP64__)
- if (idx == Z_SUBMAP_IDX_VA_RESTRICTED_MAP) {
- vm_offset_t restricted_va_max = zone_restricted_va_max();
- if (submap_end > restricted_va_max) {
-#if DEBUG || DEVELOPMENT
- printf("zone_init: submap[%d] clipped to %zdM of %zdM\n", idx,
- (size_t)(restricted_va_max - submap_start) >> 20,
- (size_t)submap_size >> 20);
-#endif /* DEBUG || DEVELOPMENT */
- guard_size += submap_end - restricted_va_max;
- *remaining_size -= submap_end - restricted_va_max;
- submap_end = restricted_va_max;
- submap_size = restricted_va_max - submap_start;
+ assert(z != ZONE_NULL);
+ zone_lock(z);
+ if (!z->z_self) {
+ zone_unlock(z);
+ return FALSE;
+ }
+ zcopy = *z;
+ if (z->z_pcpu_cache) {
+ zpercpu_foreach(zc, z->z_pcpu_cache) {
+ cached += zc->zc_alloc_cur + zc->zc_free_cur;
+ cached += zc->zc_depot_cur * zc_mag_size();
}
-
- vm_packing_verify_range("vm_compressor",
- submap_start, submap_end, VM_PACKING_PARAMS(C_SLOT_PACKED_PTR));
- vm_packing_verify_range("vm_page",
- submap_start, submap_end, VM_PACKING_PARAMS(VM_PAGE_PACKED_PTR));
}
-#endif /* defined(__LP64__) */
+ zone_unlock(z);
- vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
- vmk_flags.vmkf_permanent = TRUE;
- kr = kmem_suballoc(kernel_map, submap_min, submap_size,
- FALSE, VM_FLAGS_FIXED, vmk_flags,
- VM_KERN_MEMORY_ZONE, &submap);
- if (kr != KERN_SUCCESS) {
- panic("kmem_suballoc(kernel_map[%d] %p:%p) failed: %d",
- idx, (void *)submap_start, (void *)submap_end, kr);
- }
+ if (zn != NULL) {
+ /*
+ * Append kalloc heap name to zone name (if zone is used by kalloc)
+ */
+ char temp_zone_name[MAX_ZONE_NAME] = "";
+ snprintf(temp_zone_name, MAX_ZONE_NAME, "%s%s",
+ zone_heap_name(z), z->z_name);
-#if DEBUG || DEVELOPMENT
- printf("zone_init: submap[%d] %p:%p (%zuM)\n",
- idx, (void *)submap_start, (void *)submap_end,
- (size_t)submap_size >> 20);
-#endif /* DEBUG || DEVELOPMENT */
+ /* assuming here the name data is static */
+ (void) __nosan_strlcpy(zn->mzn_name, temp_zone_name,
+ strlen(temp_zone_name) + 1);
+ }
- zone_submaps[idx] = submap;
- *submap_min = submap_end;
- *remaining_size -= submap_size;
- *remaining_denom -= zone_sub_map_numer;
+ if (zi != NULL) {
+ *zi = (mach_zone_info_t) {
+ .mzi_count = zone_count_allocated(&zcopy) - cached,
+ .mzi_cur_size = ptoa_64(zone_scale_for_percpu(&zcopy, zcopy.z_wired_cur)),
+ // max_size for zprint is now high-watermark of pages used
+ .mzi_max_size = ptoa_64(zone_scale_for_percpu(&zcopy, zcopy.z_wired_hwm)),
+ .mzi_elem_size = zone_scale_for_percpu(&zcopy, zcopy.z_elem_size),
+ .mzi_alloc_size = ptoa_64(zcopy.z_chunk_pages),
+ .mzi_exhaustible = (uint64_t)zcopy.exhaustible,
+ };
+ zpercpu_foreach(zs, zcopy.z_stats) {
+ zi->mzi_sum_size += zs->zs_mem_allocated;
+ }
+ if (zcopy.collectable) {
+ SET_MZI_COLLECTABLE_BYTES(zi->mzi_collectable,
+ ptoa_64(zone_scale_for_percpu(&zcopy, zcopy.z_wired_empty)));
+ SET_MZI_COLLECTABLE_FLAG(zi->mzi_collectable, TRUE);
+ }
+ }
- zone_init_allocate_va(submap_min, guard_size, true);
+ return TRUE;
}
-/* Global initialization of Zone Allocator.
- * Runs after zone_bootstrap.
- */
-__startup_func
-static void
-zone_init(void)
+kern_return_t
+task_zone_info(
+ __unused task_t task,
+ __unused mach_zone_name_array_t *namesp,
+ __unused mach_msg_type_number_t *namesCntp,
+ __unused task_zone_info_array_t *infop,
+ __unused mach_msg_type_number_t *infoCntp)
{
- vm_size_t zone_meta_size;
- vm_size_t zone_map_size;
- vm_size_t remaining_size;
- vm_offset_t submap_min = 0;
-
- if (ZSECURITY_OPTIONS_SUBMAP_USER_DATA & zsecurity_options) {
- zone_last_submap_idx = Z_SUBMAP_IDX_BAG_OF_BYTES_MAP;
- } else {
- zone_last_submap_idx = Z_SUBMAP_IDX_GENERAL_MAP;
- }
- zone_phys_mapped_max = zone_phys_size_max();
+ return KERN_FAILURE;
+}
-#if __LP64__
- zone_map_size = ZONE_MAP_VIRTUAL_SIZE_LP64;
-#else
- zone_map_size = zone_phys_mapped_max;
-#endif
- zone_meta_size = round_page(atop(zone_map_size) *
- sizeof(struct zone_page_metadata));
+kern_return_t
+mach_zone_info(
+ host_priv_t host,
+ mach_zone_name_array_t *namesp,
+ mach_msg_type_number_t *namesCntp,
+ mach_zone_info_array_t *infop,
+ mach_msg_type_number_t *infoCntp)
+{
+ return mach_memory_info(host, namesp, namesCntp, infop, infoCntp, NULL, NULL);
+}
- /*
- * Zone "map" setup:
- *
- * [ VA_RESTRICTED ] <-- LP64 only
- * [ SINGLE_GUARD ] <-- LP64 only
- * [ meta ]
- * [ SINGLE_GUARD ]
- * [ map<i> ] \ for each extra map
- * [ MULTI_GUARD ] /
- */
- remaining_size = zone_map_size;
-#if defined(__LP64__)
- remaining_size -= SINGLE_GUARD;
-#endif
- remaining_size -= zone_meta_size + SINGLE_GUARD;
- remaining_size -= MULTI_GUARD * (zone_last_submap_idx -
- Z_SUBMAP_IDX_GENERAL_MAP + 1);
-#if VM_MAX_TAG_ZONES
- if (zone_tagging_on) {
- zone_tagging_init(zone_map_size);
- }
-#endif
+kern_return_t
+mach_memory_info(
+ host_priv_t host,
+ mach_zone_name_array_t *namesp,
+ mach_msg_type_number_t *namesCntp,
+ mach_zone_info_array_t *infop,
+ mach_msg_type_number_t *infoCntp,
+ mach_memory_info_array_t *memoryInfop,
+ mach_msg_type_number_t *memoryInfoCntp)
+{
+ mach_zone_name_t *names;
+ vm_offset_t names_addr;
+ vm_size_t names_size;
- uint64_t remaining_denom = 0;
- uint64_t zone_sub_map_numer[Z_SUBMAP_IDX_COUNT] = {
-#ifdef __LP64__
- [Z_SUBMAP_IDX_VA_RESTRICTED_MAP] = 20,
-#endif /* defined(__LP64__) */
- [Z_SUBMAP_IDX_GENERAL_MAP] = 40,
- [Z_SUBMAP_IDX_BAG_OF_BYTES_MAP] = 40,
- };
+ mach_zone_info_t *info;
+ vm_offset_t info_addr;
+ vm_size_t info_size;
- for (unsigned idx = 0; idx <= zone_last_submap_idx; idx++) {
-#if DEBUG || DEVELOPMENT
- char submap_name[MAX_SUBMAP_NAME];
- snprintf(submap_name, MAX_SUBMAP_NAME, "submap%d", idx);
- PE_parse_boot_argn(submap_name, &zone_sub_map_numer[idx], sizeof(uint64_t));
-#endif
- remaining_denom += zone_sub_map_numer[idx];
- }
+ mach_memory_info_t *memory_info;
+ vm_offset_t memory_info_addr;
+ vm_size_t memory_info_size;
+ vm_size_t memory_info_vmsize;
+ unsigned int num_info;
- /*
- * And now allocate the various pieces of VA and submaps.
- *
- * Make a first allocation of contiguous VA, that we'll deallocate,
- * and we'll carve-out memory in that range again linearly.
- * The kernel is stil single threaded at this stage.
- */
+ unsigned int max_zones, used_zones, i;
+ mach_zone_name_t *zn;
+ mach_zone_info_t *zi;
+ kern_return_t kr;
- struct zone_map_range *map_range = &zone_info.zi_map_range;
+ uint64_t zones_collectable_bytes = 0;
- *map_range = zone_init_allocate_va(&submap_min, zone_map_size, false);
- submap_min = map_range->min_address;
- kmem_free(kernel_map, submap_min, zone_map_size);
+ if (host == HOST_NULL) {
+ return KERN_INVALID_HOST;
+ }
+#if CONFIG_DEBUGGER_FOR_ZONE_INFO
+ if (!PE_i_can_has_debugger(NULL)) {
+ return KERN_INVALID_HOST;
+ }
+#endif
-#if defined(__LP64__)
/*
- * Allocate `Z_SUBMAP_IDX_VA_RESTRICTED_MAP` first because its VA range
- * can't go beyond RESTRICTED_VA_MAX for the vm_page_t packing to work.
+ * We assume that zones aren't freed once allocated.
+ * We won't pick up any zones that are allocated later.
*/
- zone_submap_init(&submap_min, Z_SUBMAP_IDX_VA_RESTRICTED_MAP,
- zone_sub_map_numer[Z_SUBMAP_IDX_VA_RESTRICTED_MAP], &remaining_denom,
- &remaining_size, SINGLE_GUARD);
-#endif /* defined(__LP64__) */
- /*
- * Allocate metadata array
- */
- zone_info.zi_meta_range =
- zone_init_allocate_va(&submap_min, zone_meta_size, true);
- zone_init_allocate_va(&submap_min, SINGLE_GUARD, true);
+ max_zones = os_atomic_load(&num_zones, relaxed);
- zone_info.zi_array_base =
- (struct zone_page_metadata *)zone_info.zi_meta_range.min_address -
- zone_pva_from_addr(map_range->min_address).packed_address;
+ names_size = round_page(max_zones * sizeof *names);
+ kr = kmem_alloc_pageable(ipc_kernel_map,
+ &names_addr, names_size, VM_KERN_MEMORY_IPC);
+ if (kr != KERN_SUCCESS) {
+ return kr;
+ }
+ names = (mach_zone_name_t *) names_addr;
- /*
- * Allocate other submaps
- */
- for (unsigned idx = Z_SUBMAP_IDX_GENERAL_MAP; idx <= zone_last_submap_idx; idx++) {
- zone_submap_init(&submap_min, idx, zone_sub_map_numer[idx],
- &remaining_denom, &remaining_size, MULTI_GUARD);
+ info_size = round_page(max_zones * sizeof *info);
+ kr = kmem_alloc_pageable(ipc_kernel_map,
+ &info_addr, info_size, VM_KERN_MEMORY_IPC);
+ if (kr != KERN_SUCCESS) {
+ kmem_free(ipc_kernel_map,
+ names_addr, names_size);
+ return kr;
}
+ info = (mach_zone_info_t *) info_addr;
- vm_map_t general_map = zone_submaps[Z_SUBMAP_IDX_GENERAL_MAP];
- zone_info.zi_general_range.min_address = vm_map_min(general_map);
- zone_info.zi_general_range.max_address = vm_map_max(general_map);
+ zn = &names[0];
+ zi = &info[0];
- assert(submap_min == map_range->max_address);
+ used_zones = max_zones;
+ for (i = 0; i < max_zones; i++) {
+ if (!get_zone_info(&(zone_array[i]), zn, zi)) {
+ used_zones--;
+ continue;
+ }
+ zones_collectable_bytes += GET_MZI_COLLECTABLE_BYTES(zi->mzi_collectable);
+ zn++;
+ zi++;
+ }
-#if CONFIG_GZALLOC
- gzalloc_init(zone_map_size);
-#endif
+ *namesp = (mach_zone_name_t *) create_vm_map_copy(names_addr, names_size, used_zones * sizeof *names);
+ *namesCntp = used_zones;
- zone_create_flags_t kma_flags = ZC_NOCACHING |
- ZC_NOGC | ZC_NOENCRYPT | ZC_NOGZALLOC | ZC_NOCALLOUT |
- ZC_KASAN_NOQUARANTINE | ZC_KASAN_NOREDZONE;
+ *infop = (mach_zone_info_t *) create_vm_map_copy(info_addr, info_size, used_zones * sizeof *info);
+ *infoCntp = used_zones;
- (void)zone_create_ext("vm.permanent", 1, kma_flags,
- ZONE_ID_PERMANENT, ^(zone_t z){
- z->permanent = true;
- z->z_elem_size = 1;
- z->pcpu_elem_size = 1;
-#if defined(__LP64__)
- z->submap_idx = Z_SUBMAP_IDX_VA_RESTRICTED_MAP;
-#endif
- });
- (void)zone_create_ext("vm.permanent.percpu", 1, kma_flags | ZC_PERCPU,
- ZONE_ID_PERCPU_PERMANENT, ^(zone_t z){
- z->permanent = true;
- z->z_elem_size = 1;
- z->pcpu_elem_size = zpercpu_count();
-#if defined(__LP64__)
- z->submap_idx = Z_SUBMAP_IDX_VA_RESTRICTED_MAP;
-#endif
- });
+ num_info = 0;
+ memory_info_addr = 0;
- /*
- * Now fix the zones that are missing their zone stats
- * we don't really know if zfree()s happened so our stats
- * are slightly off for early boot. ¯\_(ツ)_/¯
- */
- zone_index_foreach(idx) {
- zone_t tz = &zone_array[idx];
+ if (memoryInfop && memoryInfoCntp) {
+ vm_map_copy_t copy;
+ num_info = vm_page_diagnose_estimate();
+ memory_info_size = num_info * sizeof(*memory_info);
+ memory_info_vmsize = round_page(memory_info_size);
+ kr = kmem_alloc_pageable(ipc_kernel_map,
+ &memory_info_addr, memory_info_vmsize, VM_KERN_MEMORY_IPC);
+ if (kr != KERN_SUCCESS) {
+ return kr;
+ }
- if (tz->z_self) {
- zone_stats_t zs = zalloc_percpu_permanent_type(struct zone_stats);
+ kr = vm_map_wire_kernel(ipc_kernel_map, memory_info_addr, memory_info_addr + memory_info_vmsize,
+ VM_PROT_READ | VM_PROT_WRITE, VM_KERN_MEMORY_IPC, FALSE);
+ assert(kr == KERN_SUCCESS);
- zpercpu_get_cpu(zs, 0)->zs_mem_allocated +=
- (tz->countavail - tz->countfree) *
- zone_elem_size(tz);
- assert(tz->z_stats == NULL);
- tz->z_stats = zs;
-#if ZONE_ENABLE_LOGGING
- if (tz->zone_logging && !tz->zlog_btlog) {
- zone_enable_logging(tz);
- }
-#endif
- }
- }
+ memory_info = (mach_memory_info_t *) memory_info_addr;
+ vm_page_diagnose(memory_info, num_info, zones_collectable_bytes);
-#if CONFIG_ZLEAKS
- /*
- * Initialize the zone leak monitor
- */
- zleak_init(zone_map_size);
-#endif /* CONFIG_ZLEAKS */
+ kr = vm_map_unwire(ipc_kernel_map, memory_info_addr, memory_info_addr + memory_info_vmsize, FALSE);
+ assert(kr == KERN_SUCCESS);
-#if VM_MAX_TAG_ZONES
- if (zone_tagging_on) {
- vm_allocation_zones_init();
- }
-#endif
-}
-STARTUP(ZALLOC, STARTUP_RANK_FIRST, zone_init);
+ kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)memory_info_addr,
+ (vm_map_size_t)memory_info_size, TRUE, ©);
+ assert(kr == KERN_SUCCESS);
-__startup_func
-static void
-zone_set_foreign_range(
- vm_offset_t range_min,
- vm_offset_t range_max)
-{
- zone_info.zi_foreign_range.min_address = range_min;
- zone_info.zi_foreign_range.max_address = range_max;
-}
+ *memoryInfop = (mach_memory_info_t *) copy;
+ *memoryInfoCntp = num_info;
+ }
-__startup_func
-vm_offset_t
-zone_foreign_mem_init(vm_size_t size)
-{
- vm_offset_t mem = (vm_offset_t) pmap_steal_memory(size);
- zone_set_foreign_range(mem, mem + size);
- return mem;
+ return KERN_SUCCESS;
}
-#pragma mark zalloc
-
-#if KASAN_ZALLOC
-/*
- * Called from zfree() to add the element being freed to the KASan quarantine.
- *
- * Returns true if the newly-freed element made it into the quarantine without
- * displacing another, false otherwise. In the latter case, addrp points to the
- * address of the displaced element, which will be freed by the zone.
- */
-static bool
-kasan_quarantine_freed_element(
- zone_t *zonep, /* the zone the element is being freed to */
- void **addrp) /* address of the element being freed */
+kern_return_t
+mach_zone_info_for_zone(
+ host_priv_t host,
+ mach_zone_name_t name,
+ mach_zone_info_t *infop)
{
- zone_t zone = *zonep;
- void *addr = *addrp;
-
- /*
- * Resize back to the real allocation size and hand off to the KASan
- * quarantine. `addr` may then point to a different allocation, if the
- * current element replaced another in the quarantine. The zone then
- * takes ownership of the swapped out free element.
- */
- vm_size_t usersz = zone_elem_size(zone) - 2 * zone->kasan_redzone;
- vm_size_t sz = usersz;
+ zone_t zone_ptr;
- if (addr && zone->kasan_redzone) {
- kasan_check_free((vm_address_t)addr, usersz, KASAN_HEAP_ZALLOC);
- addr = (void *)kasan_dealloc((vm_address_t)addr, &sz);
- assert(sz == zone_elem_size(zone));
- }
- if (addr && !zone->kasan_noquarantine) {
- kasan_free(&addr, &sz, KASAN_HEAP_ZALLOC, zonep, usersz, true);
- if (!addr) {
- return TRUE;
- }
+ if (host == HOST_NULL) {
+ return KERN_INVALID_HOST;
}
- if (addr && zone->kasan_noquarantine) {
- kasan_unpoison(addr, zone_elem_size(zone));
+#if CONFIG_DEBUGGER_FOR_ZONE_INFO
+ if (!PE_i_can_has_debugger(NULL)) {
+ return KERN_INVALID_HOST;
}
- *addrp = addr;
- return FALSE;
-}
-
-#endif /* KASAN_ZALLOC */
+#endif
-static inline bool
-zone_needs_async_refill(zone_t zone)
-{
- if (zone->countfree != 0 || zone->async_pending || zone->no_callout) {
- return false;
+ if (infop == NULL) {
+ return KERN_INVALID_ARGUMENT;
}
- return zone->expandable || zone->page_count < zone->page_count_max;
-}
-
-__attribute__((noinline))
-static void
-zone_refill_synchronously_locked(
- zone_t zone,
- zalloc_flags_t flags)
-{
- thread_t thr = current_thread();
- bool set_expanding_vm_priv = false;
- zone_pva_t orig = zone->pages_intermediate;
-
- while ((flags & Z_NOWAIT) == 0 && (zone->permanent
- ? zone_pva_is_equal(zone->pages_intermediate, orig)
- : zone->countfree == 0)) {
+ zone_ptr = ZONE_NULL;
+ zone_foreach(z) {
/*
- * zone is empty, try to expand it
- *
- * Note that we now allow up to 2 threads (1 vm_privliged and
- * 1 non-vm_privliged) to expand the zone concurrently...
- *
- * this is necessary to avoid stalling vm_privileged threads
- * running critical code necessary to continue
- * compressing/swapping pages (i.e. making new free pages) from
- * stalling behind non-vm_privileged threads waiting to acquire
- * free pages when the vm_page_free_count is below the
- * vm_page_free_reserved limit.
+ * Append kalloc heap name to zone name (if zone is used by kalloc)
*/
- if ((zone->expanding_no_vm_priv || zone->expanding_vm_priv) &&
- (((thr->options & TH_OPT_VMPRIV) == 0) || zone->expanding_vm_priv)) {
- /*
- * This is a non-vm_privileged thread and a non-vm_privileged or
- * a vm_privileged thread is already expanding the zone...
- * OR
- * this is a vm_privileged thread and a vm_privileged thread is
- * already expanding the zone...
- *
- * In either case wait for a thread to finish, then try again.
- */
- zone->waiting = true;
- assert_wait(zone, THREAD_UNINT);
- unlock_zone(zone);
- thread_block(THREAD_CONTINUE_NULL);
- lock_zone(zone);
- continue;
- }
-
- if (zone->page_count >= zone->page_count_max) {
- if (zone->exhaustible) {
- break;
- }
- if (zone->expandable) {
- /*
- * If we're expandable, just don't go through this again.
- */
- zone->page_count_max = ~0u;
- } else {
- unlock_zone(zone);
+ char temp_zone_name[MAX_ZONE_NAME] = "";
+ snprintf(temp_zone_name, MAX_ZONE_NAME, "%s%s",
+ zone_heap_name(z), z->z_name);
- panic_include_zprint = true;
-#if CONFIG_ZLEAKS
- if (zleak_state & ZLEAK_STATE_ACTIVE) {
- panic_include_ztrace = true;
- }
-#endif /* CONFIG_ZLEAKS */
- panic("zalloc: zone \"%s\" empty.", zone->z_name);
- }
+ /* Find the requested zone by name */
+ if (track_this_zone(temp_zone_name, name.mzn_name)) {
+ zone_ptr = z;
+ break;
}
+ }
- /*
- * It is possible that a BG thread is refilling/expanding the zone
- * and gets pre-empted during that operation. That blocks all other
- * threads from making progress leading to a watchdog timeout. To
- * avoid that, boost the thread priority using the rwlock boost
- */
- set_thread_rwlock_boost();
+ /* No zones found with the requested zone name */
+ if (zone_ptr == ZONE_NULL) {
+ return KERN_INVALID_ARGUMENT;
+ }
- if ((thr->options & TH_OPT_VMPRIV)) {
- zone->expanding_vm_priv = true;
- set_expanding_vm_priv = true;
- } else {
- zone->expanding_no_vm_priv = true;
- }
+ if (get_zone_info(zone_ptr, NULL, infop)) {
+ return KERN_SUCCESS;
+ }
+ return KERN_FAILURE;
+}
- zone_replenish_locked(zone, flags, false);
+kern_return_t
+mach_zone_info_for_largest_zone(
+ host_priv_t host,
+ mach_zone_name_t *namep,
+ mach_zone_info_t *infop)
+{
+ if (host == HOST_NULL) {
+ return KERN_INVALID_HOST;
+ }
+#if CONFIG_DEBUGGER_FOR_ZONE_INFO
+ if (!PE_i_can_has_debugger(NULL)) {
+ return KERN_INVALID_HOST;
+ }
+#endif
- if (set_expanding_vm_priv == true) {
- zone->expanding_vm_priv = false;
- } else {
- zone->expanding_no_vm_priv = false;
- }
+ if (namep == NULL || infop == NULL) {
+ return KERN_INVALID_ARGUMENT;
+ }
- if (zone->waiting) {
- zone->waiting = false;
- thread_wakeup(zone);
- }
- clear_thread_rwlock_boost();
+ if (get_zone_info(zone_find_largest(), namep, infop)) {
+ return KERN_SUCCESS;
+ }
+ return KERN_FAILURE;
+}
+
+uint64_t
+get_zones_collectable_bytes(void)
+{
+ uint64_t zones_collectable_bytes = 0;
+ mach_zone_info_t zi;
- if (zone->countfree == 0) {
- assert(flags & Z_NOPAGEWAIT);
- break;
+ zone_foreach(z) {
+ if (get_zone_info(z, NULL, &zi)) {
+ zones_collectable_bytes +=
+ GET_MZI_COLLECTABLE_BYTES(zi.mzi_collectable);
}
}
- if ((flags & (Z_NOWAIT | Z_NOPAGEWAIT)) &&
- zone_needs_async_refill(zone) && !vm_pool_low()) {
- zone->async_pending = true;
- unlock_zone(zone);
- thread_call_enter(&call_async_alloc);
- lock_zone(zone);
- assert(zone->z_self == zone);
- }
+ return zones_collectable_bytes;
}
-__attribute__((noinline))
-static void
-zone_refill_asynchronously_locked(zone_t zone)
+kern_return_t
+mach_zone_get_zlog_zones(
+ host_priv_t host,
+ mach_zone_name_array_t *namesp,
+ mach_msg_type_number_t *namesCntp)
{
- uint32_t min_free = zone->prio_refill_count / 2;
- uint32_t resv_free = zone->prio_refill_count / 4;
- thread_t thr = current_thread();
+#if ZONE_ENABLE_LOGGING
+ unsigned int max_zones, logged_zones, i;
+ kern_return_t kr;
+ zone_t zone_ptr;
+ mach_zone_name_t *names;
+ vm_offset_t names_addr;
+ vm_size_t names_size;
- /*
- * Nothing to do if there are plenty of elements.
- */
- while (zone->countfree <= min_free) {
- /*
- * Wakeup the replenish thread if not running.
- */
- if (!zone->zone_replenishing) {
- lck_spin_lock(&zone_replenish_lock);
- assert(zone_replenish_active < zone_replenish_max_threads);
- ++zone_replenish_active;
- lck_spin_unlock(&zone_replenish_lock);
- zone->zone_replenishing = true;
- zone_replenish_wakeups_initiated++;
- thread_wakeup(&zone->prio_refill_count);
- }
+ if (host == HOST_NULL) {
+ return KERN_INVALID_HOST;
+ }
- /*
- * We'll let VM_PRIV threads to continue to allocate until the
- * reserve drops to 25%. After that only TH_OPT_ZONE_PRIV threads
- * may continue.
- *
- * TH_OPT_ZONE_PRIV threads are the GC thread and a replenish thread itself.
- * Replenish threads *need* to use the reserve. GC threads need to
- * get through the current allocation, but then will wait at a higher
- * level after they've dropped any locks which would deadlock the
- * replenish thread.
- */
- if ((zone->countfree > resv_free && (thr->options & TH_OPT_VMPRIV)) ||
- (thr->options & TH_OPT_ZONE_PRIV)) {
- break;
- }
+ if (namesp == NULL || namesCntp == NULL) {
+ return KERN_INVALID_ARGUMENT;
+ }
- /*
- * Wait for the replenish threads to add more elements for us to allocate from.
- */
- zone_replenish_throttle_count++;
- unlock_zone(zone);
- assert_wait_timeout(zone, THREAD_UNINT, 1, NSEC_PER_MSEC);
- thread_block(THREAD_CONTINUE_NULL);
- lock_zone(zone);
+ max_zones = os_atomic_load(&num_zones, relaxed);
- assert(zone->z_self == zone);
+ names_size = round_page(max_zones * sizeof *names);
+ kr = kmem_alloc_pageable(ipc_kernel_map,
+ &names_addr, names_size, VM_KERN_MEMORY_IPC);
+ if (kr != KERN_SUCCESS) {
+ return kr;
}
+ names = (mach_zone_name_t *) names_addr;
- /*
- * If we're here because of zone_gc(), we didn't wait for
- * zone_replenish_thread to finish. So we need to ensure that
- * we will successfully grab an element.
- *
- * zones that have a replenish thread configured.
- * The value of (refill_level / 2) in the previous bit of code should have
- * given us headroom even though this thread didn't wait.
- */
- if (thr->options & TH_OPT_ZONE_PRIV) {
- assert(zone->countfree != 0);
+ zone_ptr = ZONE_NULL;
+ logged_zones = 0;
+ for (i = 0; i < max_zones; i++) {
+ zone_t z = &(zone_array[i]);
+ assert(z != ZONE_NULL);
+
+ /* Copy out the zone name if zone logging is enabled */
+ if (z->zlog_btlog) {
+ get_zone_info(z, &names[logged_zones], NULL);
+ logged_zones++;
+ }
}
+
+ *namesp = (mach_zone_name_t *) create_vm_map_copy(names_addr, names_size, logged_zones * sizeof *names);
+ *namesCntp = logged_zones;
+
+ return KERN_SUCCESS;
+
+#else /* ZONE_ENABLE_LOGGING */
+#pragma unused(host, namesp, namesCntp)
+ return KERN_FAILURE;
+#endif /* ZONE_ENABLE_LOGGING */
}
-#if ZONE_ENABLE_LOGGING || CONFIG_ZLEAKS
-__attribute__((noinline))
-static void
-zalloc_log_or_trace_leaks(zone_t zone, vm_offset_t addr)
+kern_return_t
+mach_zone_get_btlog_records(
+ host_priv_t host,
+ mach_zone_name_t name,
+ zone_btrecord_array_t *recsp,
+ mach_msg_type_number_t *recsCntp)
{
- uintptr_t zbt[MAX_ZTRACE_DEPTH]; /* used in zone leak logging and zone leak detection */
- unsigned int numsaved = 0;
+#if DEBUG || DEVELOPMENT
+ unsigned int numrecs = 0;
+ zone_btrecord_t *recs;
+ kern_return_t kr;
+ zone_t zone_ptr;
+ vm_offset_t recs_addr;
+ vm_size_t recs_size;
-#if ZONE_ENABLE_LOGGING
- if (DO_LOGGING(zone)) {
- numsaved = backtrace_frame(zbt, MAX_ZTRACE_DEPTH,
- __builtin_frame_address(0), NULL);
- btlog_add_entry(zone->zlog_btlog, (void *)addr,
- ZOP_ALLOC, (void **)zbt, numsaved);
+ if (host == HOST_NULL) {
+ return KERN_INVALID_HOST;
}
-#endif
-#if CONFIG_ZLEAKS
- /*
- * Zone leak detection: capture a backtrace every zleak_sample_factor
- * allocations in this zone.
- */
- if (__improbable(zone->zleak_on)) {
- if (sample_counter(&zone->zleak_capture, zleak_sample_factor)) {
- /* Avoid backtracing twice if zone logging is on */
- if (numsaved == 0) {
- numsaved = backtrace_frame(zbt, MAX_ZTRACE_DEPTH,
- __builtin_frame_address(1), NULL);
- }
- /* Sampling can fail if another sample is happening at the same time in a different zone. */
- if (!zleak_log(zbt, addr, numsaved, zone_elem_size(zone))) {
- /* If it failed, roll back the counter so we sample the next allocation instead. */
- zone->zleak_capture = zleak_sample_factor;
- }
- }
+ if (recsp == NULL || recsCntp == NULL) {
+ return KERN_INVALID_ARGUMENT;
}
- if (__improbable(zone_leaks_scan_enable &&
- !(zone_elem_size(zone) & (sizeof(uintptr_t) - 1)))) {
- unsigned int count, idx;
- /* Fill element, from tail, with backtrace in reverse order */
- if (numsaved == 0) {
- numsaved = backtrace_frame(zbt, MAX_ZTRACE_DEPTH,
- __builtin_frame_address(1), NULL);
- }
- count = (unsigned int)(zone_elem_size(zone) / sizeof(uintptr_t));
- if (count >= numsaved) {
- count = numsaved - 1;
- }
- for (idx = 0; idx < count; idx++) {
- ((uintptr_t *)addr)[count - 1 - idx] = zbt[idx + 1];
+ zone_ptr = ZONE_NULL;
+ zone_foreach(z) {
+ /*
+ * Append kalloc heap name to zone name (if zone is used by kalloc)
+ */
+ char temp_zone_name[MAX_ZONE_NAME] = "";
+ snprintf(temp_zone_name, MAX_ZONE_NAME, "%s%s",
+ zone_heap_name(z), z->z_name);
+
+ /* Find the requested zone by name */
+ if (track_this_zone(temp_zone_name, name.mzn_name)) {
+ zone_ptr = z;
+ break;
}
}
-#endif /* CONFIG_ZLEAKS */
-}
-static inline bool
-zalloc_should_log_or_trace_leaks(zone_t zone, vm_size_t elem_size)
-{
-#if ZONE_ENABLE_LOGGING
- if (DO_LOGGING(zone)) {
- return true;
+ /* No zones found with the requested zone name */
+ if (zone_ptr == ZONE_NULL) {
+ return KERN_INVALID_ARGUMENT;
}
-#endif
-#if CONFIG_ZLEAKS
- /*
- * Zone leak detection: capture a backtrace every zleak_sample_factor
- * allocations in this zone.
- */
- if (zone->zleak_on) {
- return true;
+
+ /* Logging not turned on for the requested zone */
+ if (!DO_LOGGING(zone_ptr)) {
+ return KERN_FAILURE;
}
- if (zone_leaks_scan_enable && !(elem_size & (sizeof(uintptr_t) - 1))) {
- return true;
+
+ /* Allocate memory for btlog records */
+ numrecs = (unsigned int)(get_btlog_records_count(zone_ptr->zlog_btlog));
+ recs_size = round_page(numrecs * sizeof *recs);
+
+ kr = kmem_alloc_pageable(ipc_kernel_map, &recs_addr, recs_size, VM_KERN_MEMORY_IPC);
+ if (kr != KERN_SUCCESS) {
+ return kr;
}
-#endif /* CONFIG_ZLEAKS */
- return false;
-}
-#endif /* ZONE_ENABLE_LOGGING || CONFIG_ZLEAKS */
-#if ZONE_ENABLE_LOGGING
-__attribute__((noinline))
-static void
-zfree_log_trace(zone_t zone, vm_offset_t addr)
-{
/*
- * See if we're doing logging on this zone.
- *
- * There are two styles of logging used depending on
- * whether we're trying to catch a leak or corruption.
+ * We will call get_btlog_records() below which populates this region while holding a spinlock
+ * (the btlog lock). So these pages need to be wired.
*/
- if (__improbable(DO_LOGGING(zone))) {
- if (corruption_debug_flag) {
- uintptr_t zbt[MAX_ZTRACE_DEPTH];
- unsigned int numsaved;
- /*
- * We're logging to catch a corruption.
- *
- * Add a record of this zfree operation to log.
- */
- numsaved = backtrace_frame(zbt, MAX_ZTRACE_DEPTH,
- __builtin_frame_address(1), NULL);
- btlog_add_entry(zone->zlog_btlog, (void *)addr, ZOP_FREE,
- (void **)zbt, numsaved);
- } else {
- /*
- * We're logging to catch a leak.
- *
- * Remove any record we might have for this element
- * since it's being freed. Note that we may not find it
- * if the buffer overflowed and that's OK.
- *
- * Since the log is of a limited size, old records get
- * overwritten if there are more zallocs than zfrees.
- */
- btlog_remove_entries_for_element(zone->zlog_btlog, (void *)addr);
- }
- }
-}
-#endif /* ZONE_ENABLE_LOGGING */
+ kr = vm_map_wire_kernel(ipc_kernel_map, recs_addr, recs_addr + recs_size,
+ VM_PROT_READ | VM_PROT_WRITE, VM_KERN_MEMORY_IPC, FALSE);
+ assert(kr == KERN_SUCCESS);
-/*
- * Removes an element from the zone's free list, returning 0 if the free list is empty.
- * Verifies that the next-pointer and backup next-pointer are intact,
- * and verifies that a poisoned element hasn't been modified.
- */
-vm_offset_t
-zalloc_direct_locked(
- zone_t zone,
- zalloc_flags_t flags __unused,
- vm_size_t waste __unused)
-{
- struct zone_page_metadata *page_meta;
- zone_addr_kind_t kind = ZONE_ADDR_NATIVE;
- vm_offset_t element, page, validate_bit = 0;
-
- /* if zone is empty, bail */
- if (!zone_pva_is_null(zone->pages_any_free_foreign)) {
- kind = ZONE_ADDR_FOREIGN;
- page_meta = zone_pva_to_meta(zone->pages_any_free_foreign, kind);
- page = (vm_offset_t)page_meta;
- } else if (!zone_pva_is_null(zone->pages_intermediate)) {
- page_meta = zone_pva_to_meta(zone->pages_intermediate, kind);
- page = zone_pva_to_addr(zone->pages_intermediate);
- } else if (!zone_pva_is_null(zone->pages_all_free)) {
- page_meta = zone_pva_to_meta(zone->pages_all_free, kind);
- page = zone_pva_to_addr(zone->pages_all_free);
- if (os_sub_overflow(zone->allfree_page_count,
- page_meta->zm_page_count, &zone->allfree_page_count)) {
- zone_accounting_panic(zone, "allfree_page_count wrap-around");
- }
- } else {
- zone_accounting_panic(zone, "countfree corruption");
- }
+ recs = (zone_btrecord_t *)recs_addr;
+ get_btlog_records(zone_ptr->zlog_btlog, recs, &numrecs);
- if (!zone_has_index(zone, page_meta->zm_index)) {
- zone_page_metadata_index_confusion_panic(zone, page, page_meta);
- }
+ kr = vm_map_unwire(ipc_kernel_map, recs_addr, recs_addr + recs_size, FALSE);
+ assert(kr == KERN_SUCCESS);
- element = zone_page_meta_get_freelist(zone, page_meta, page);
+ *recsp = (zone_btrecord_t *) create_vm_map_copy(recs_addr, recs_size, numrecs * sizeof *recs);
+ *recsCntp = numrecs;
- vm_offset_t *primary = (vm_offset_t *) element;
- vm_offset_t *backup = get_backup_ptr(zone_elem_size(zone), primary);
+ return KERN_SUCCESS;
- /*
- * since the primary next pointer is xor'ed with zp_nopoison_cookie
- * for obfuscation, retrieve the original value back
- */
- vm_offset_t next_element = *primary ^ zp_nopoison_cookie;
- vm_offset_t next_element_primary = *primary;
- vm_offset_t next_element_backup = *backup;
+#else /* DEBUG || DEVELOPMENT */
+#pragma unused(host, name, recsp, recsCntp)
+ return KERN_FAILURE;
+#endif /* DEBUG || DEVELOPMENT */
+}
+
+
+#if DEBUG || DEVELOPMENT
+
+kern_return_t
+mach_memory_info_check(void)
+{
+ mach_memory_info_t * memory_info;
+ mach_memory_info_t * info;
+ unsigned int num_info;
+ vm_offset_t memory_info_addr;
+ kern_return_t kr;
+ size_t memory_info_size, memory_info_vmsize;
+ uint64_t top_wired, zonestotal, total;
- /*
- * backup_ptr_mismatch_panic will determine what next_element
- * should have been, and print it appropriately
- */
- if (!zone_page_meta_is_sane_element(zone, page_meta, page, next_element, kind)) {
- backup_ptr_mismatch_panic(zone, page_meta, page, element);
- }
+ num_info = vm_page_diagnose_estimate();
+ memory_info_size = num_info * sizeof(*memory_info);
+ memory_info_vmsize = round_page(memory_info_size);
+ kr = kmem_alloc(kernel_map, &memory_info_addr, memory_info_vmsize, VM_KERN_MEMORY_DIAG);
+ assert(kr == KERN_SUCCESS);
- /* Check the backup pointer for the regular cookie */
- if (__improbable(next_element_primary != next_element_backup)) {
- /* Check for the poisoned cookie instead */
- if (__improbable(next_element != (next_element_backup ^ zp_poisoned_cookie))) {
- /* Neither cookie is valid, corruption has occurred */
- backup_ptr_mismatch_panic(zone, page_meta, page, element);
- }
+ memory_info = (mach_memory_info_t *) memory_info_addr;
+ vm_page_diagnose(memory_info, num_info, 0);
- /*
- * Element was marked as poisoned, so check its integrity before using it.
- */
- validate_bit = ZALLOC_ELEMENT_NEEDS_VALIDATION;
- } else if (zone->zfree_clear_mem) {
- validate_bit = ZALLOC_ELEMENT_NEEDS_VALIDATION;
+ top_wired = total = zonestotal = 0;
+ zone_foreach(z) {
+ zonestotal += zone_size_wired(z);
}
- /* Remove this element from the free list */
- zone_page_meta_set_freelist(page_meta, page, next_element);
-
- if (kind == ZONE_ADDR_FOREIGN) {
- if (next_element == 0) {
- /* last foreign element allocated on page, move to all_used_foreign */
- zone_meta_requeue(zone, &zone->pages_all_used_foreign, page_meta, kind);
+ for (uint32_t idx = 0; idx < num_info; idx++) {
+ info = &memory_info[idx];
+ if (!info->size) {
+ continue;
+ }
+ if (VM_KERN_COUNT_WIRED == info->site) {
+ top_wired = info->size;
+ }
+ if (VM_KERN_SITE_HIDE & info->flags) {
+ continue;
+ }
+ if (!(VM_KERN_SITE_WIRED & info->flags)) {
+ continue;
}
- } else if (next_element == 0) {
- zone_meta_requeue(zone, &zone->pages_all_used, page_meta, kind);
- } else if (page_meta->zm_alloc_count == 0) {
- /* remove from free, move to intermediate */
- zone_meta_requeue(zone, &zone->pages_intermediate, page_meta, kind);
+ total += info->size;
}
+ total += zonestotal;
- if (os_add_overflow(page_meta->zm_alloc_count, 1,
- &page_meta->zm_alloc_count)) {
- /*
- * This will not catch a lot of errors, the proper check
- * would be against the number of elements this run should
- * have which is expensive to count.
- *
- * But zm_alloc_count is a 16 bit number which could
- * theoretically be valuable to cause to wrap around,
- * so catch this.
- */
- zone_page_meta_accounting_panic(zone, page_meta,
- "zm_alloc_count overflow");
- }
- if (os_sub_overflow(zone->countfree, 1, &zone->countfree)) {
- zone_accounting_panic(zone, "countfree wrap-around");
- }
+ printf("vm_page_diagnose_check %qd of %qd, zones %qd, short 0x%qx\n",
+ total, top_wired, zonestotal, top_wired - total);
-#if VM_MAX_TAG_ZONES
- if (__improbable(zone->tags)) {
- vm_tag_t tag = zalloc_flags_get_tag(flags);
- // set the tag with b0 clear so the block remains inuse
- ZTAG(zone, element)[0] = (vm_tag_t)(tag << 1);
- vm_tag_update_zone_size(tag, zone->tag_zone_index,
- zone_elem_size(zone), waste);
- }
-#endif /* VM_MAX_TAG_ZONES */
-#if KASAN_ZALLOC
- if (zone->percpu) {
- zpercpu_foreach_cpu(i) {
- kasan_poison_range(element + ptoa(i),
- zone_elem_size(zone), ASAN_VALID);
- }
- } else {
- kasan_poison_range(element, zone_elem_size(zone), ASAN_VALID);
- }
-#endif
+ kmem_free(kernel_map, memory_info_addr, memory_info_vmsize);
- return element | validate_bit;
+ return kr;
}
-/*
- * zalloc returns an element from the specified zone.
- *
- * The function is noinline when zlog can be used so that the backtracing can
- * reliably skip the zalloc_ext() and zalloc_log_or_trace_leaks()
- * boring frames.
- */
-#if ZONE_ENABLE_LOGGING
-__attribute__((noinline))
-#endif
-void *
-zalloc_ext(
- zone_t zone,
- zone_stats_t zstats,
- zalloc_flags_t flags,
- vm_size_t waste)
-{
- vm_offset_t addr = 0;
- vm_size_t elem_size = zone_elem_size(zone);
-
- /*
- * KASan uses zalloc() for fakestack, which can be called anywhere.
- * However, we make sure these calls can never block.
- */
- assert(zone->kasan_fakestacks ||
- ml_get_interrupts_enabled() ||
- ml_is_quiescing() ||
- debug_mode_active() ||
- startup_phase < STARTUP_SUB_EARLY_BOOT);
+extern boolean_t(*volatile consider_buffer_cache_collect)(int);
- /*
- * Make sure Z_NOFAIL was not obviously misused
- */
- if ((flags & Z_NOFAIL) && !zone->prio_refill_count) {
- assert(!zone->exhaustible && (flags & (Z_NOWAIT | Z_NOPAGEWAIT)) == 0);
- }
+#endif /* DEBUG || DEVELOPMENT */
-#if CONFIG_ZCACHE
- /*
- * Note: if zone caching is on, gzalloc and tags aren't used
- * so we can always check this first
- */
- if (zone_caching_enabled(zone)) {
- addr = zcache_alloc_from_cpu_cache(zone, zstats, waste);
- if (__probable(addr)) {
- goto allocated_from_cache;
- }
+kern_return_t
+mach_zone_force_gc(
+ host_t host)
+{
+ if (host == HOST_NULL) {
+ return KERN_INVALID_HOST;
}
-#endif /* CONFIG_ZCACHE */
-#if CONFIG_GZALLOC
- if (__improbable(zone->gzalloc_tracked)) {
- addr = gzalloc_alloc(zone, zstats, flags);
- goto allocated_from_gzalloc;
- }
-#endif /* CONFIG_GZALLOC */
-#if VM_MAX_TAG_ZONES
- if (__improbable(zone->tags)) {
- vm_tag_t tag = zalloc_flags_get_tag(flags);
- if (tag == VM_KERN_MEMORY_NONE) {
- /*
- * zone views into heaps can lead to a site-less call
- * and we fallback to KALLOC as a tag for those.
- */
- tag = VM_KERN_MEMORY_KALLOC;
- flags |= Z_VM_TAG(tag);
- }
- vm_tag_will_update_zone(tag, zone->tag_zone_index);
+#if DEBUG || DEVELOPMENT
+ /* Callout to buffer cache GC to drop elements in the apfs zones */
+ if (consider_buffer_cache_collect != NULL) {
+ (void)(*consider_buffer_cache_collect)(0);
}
-#endif /* VM_MAX_TAG_ZONES */
-
- lock_zone(zone);
- assert(zone->z_self == zone);
+ zone_gc(ZONE_GC_DRAIN);
+#endif /* DEBUG || DEVELOPMENT */
+ return KERN_SUCCESS;
+}
- /*
- * Check if we need another thread to replenish the zone or
- * if we have to wait for a replenish thread to finish.
- * This is used for elements, like vm_map_entry, which are
- * needed themselves to implement zalloc().
- */
- if (__improbable(zone->prio_refill_count &&
- zone->countfree <= zone->prio_refill_count / 2)) {
- zone_refill_asynchronously_locked(zone);
- } else if (__improbable(zone->countfree == 0)) {
- zone_refill_synchronously_locked(zone, flags);
- if (__improbable(zone->countfree == 0)) {
- unlock_zone(zone);
- if (__improbable(flags & Z_NOFAIL)) {
- zone_nofail_panic(zone);
- }
- goto out_nomem;
- }
- }
+zone_t
+zone_find_largest(void)
+{
+ uint32_t largest_idx = 0;
+ vm_offset_t largest_size = zone_size_wired(&zone_array[0]);
- addr = zalloc_direct_locked(zone, flags, waste);
- if (__probable(zstats != NULL)) {
- /*
- * The few vm zones used before zone_init() runs do not have
- * per-cpu stats yet
- */
- int cpu = cpu_number();
- zpercpu_get_cpu(zstats, cpu)->zs_mem_allocated += elem_size;
-#if ZALLOC_DETAILED_STATS
- if (waste) {
- zpercpu_get_cpu(zstats, cpu)->zs_mem_wasted += waste;
+ zone_index_foreach(i) {
+ vm_offset_t size = zone_size_wired(&zone_array[i]);
+ if (size > largest_size) {
+ largest_idx = i;
+ largest_size = size;
}
-#endif /* ZALLOC_DETAILED_STATS */
}
- unlock_zone(zone);
-
-#if ZALLOC_ENABLE_POISONING
- bool validate = addr & ZALLOC_ELEMENT_NEEDS_VALIDATION;
-#endif
- addr &= ~ZALLOC_ELEMENT_NEEDS_VALIDATION;
- zone_clear_freelist_pointers(zone, addr);
-#if ZALLOC_ENABLE_POISONING
- /*
- * Note: percpu zones do not respect ZONE_MIN_ELEM_SIZE,
- * so we will check the first word even if we just
- * cleared it.
- */
- zalloc_validate_element(zone, addr, elem_size - sizeof(vm_offset_t),
- validate);
-#endif /* ZALLOC_ENABLE_POISONING */
+ return &zone_array[largest_idx];
+}
-allocated_from_cache:
-#if ZONE_ENABLE_LOGGING || CONFIG_ZLEAKS
- if (__improbable(zalloc_should_log_or_trace_leaks(zone, elem_size))) {
- zalloc_log_or_trace_leaks(zone, addr);
- }
-#endif /* ZONE_ENABLE_LOGGING || CONFIG_ZLEAKS */
+#endif /* !ZALLOC_TEST */
+#pragma mark zone creation, configuration, destruction
+#if !ZALLOC_TEST
-#if CONFIG_GZALLOC
-allocated_from_gzalloc:
-#endif
-#if KASAN_ZALLOC
- if (zone->kasan_redzone) {
- addr = kasan_alloc(addr, elem_size,
- elem_size - 2 * zone->kasan_redzone, zone->kasan_redzone);
- elem_size -= 2 * zone->kasan_redzone;
- }
- /*
- * Initialize buffer with unique pattern only if memory
- * wasn't expected to be zeroed.
- */
- if (!zone->zfree_clear_mem && !(flags & Z_ZERO)) {
- kasan_leak_init(addr, elem_size);
- }
-#endif /* KASAN_ZALLOC */
- if ((flags & Z_ZERO) && !zone->zfree_clear_mem) {
- bzero((void *)addr, elem_size);
- }
+static zone_t
+zone_init_defaults(zone_id_t zid)
+{
+ zone_t z = &zone_array[zid];
- TRACE_MACHLEAKS(ZALLOC_CODE, ZALLOC_CODE_2, elem_size, addr);
+ z->z_wired_max = ~0u;
+ z->collectable = true;
+ z->expandable = true;
+ z->z_submap_idx = Z_SUBMAP_IDX_GENERAL;
-out_nomem:
- DTRACE_VM2(zalloc, zone_t, zone, void*, addr);
- return (void *)addr;
+ lck_spin_init(&z->z_lock, &zone_locks_grp, LCK_ATTR_NULL);
+ STAILQ_INIT(&z->z_recirc);
+ return z;
}
-void *
-zalloc(union zone_or_view zov)
+static bool
+zone_is_initializing(zone_t z)
{
- return zalloc_flags(zov, Z_WAITOK);
+ return !z->z_self && !z->z_destroyed;
}
-void *
-zalloc_noblock(union zone_or_view zov)
+void
+zone_set_submap_idx(zone_t zone, unsigned int sub_map_idx)
{
- return zalloc_flags(zov, Z_NOWAIT);
+ if (!zone_is_initializing(zone)) {
+ panic("%s: called after zone_create()", __func__);
+ }
+ if (sub_map_idx > zone_last_submap_idx) {
+ panic("zone_set_submap_idx(%d) > %d", sub_map_idx, zone_last_submap_idx);
+ }
+ zone->z_submap_idx = sub_map_idx;
}
-void *
-zalloc_flags(union zone_or_view zov, zalloc_flags_t flags)
+void
+zone_set_noexpand(zone_t zone, vm_size_t nelems)
{
- zone_t zone = zov.zov_view->zv_zone;
- zone_stats_t zstats = zov.zov_view->zv_stats;
- assert(!zone->percpu);
- return zalloc_ext(zone, zstats, flags, 0);
+ if (!zone_is_initializing(zone)) {
+ panic("%s: called after zone_create()", __func__);
+ }
+ zone->expandable = false;
+ zone->z_wired_max = zone_alloc_pages_for_nelems(zone, nelems);
}
-void *
-zalloc_percpu(union zone_or_view zov, zalloc_flags_t flags)
+void
+zone_set_exhaustible(zone_t zone, vm_size_t nelems)
{
- zone_t zone = zov.zov_view->zv_zone;
- zone_stats_t zstats = zov.zov_view->zv_stats;
- assert(zone->percpu);
- return (void *)__zpcpu_mangle(zalloc_ext(zone, zstats, flags, 0));
+ if (!zone_is_initializing(zone)) {
+ panic("%s: called after zone_create()", __func__);
+ }
+ zone->expandable = false;
+ zone->exhaustible = true;
+ zone->z_wired_max = zone_alloc_pages_for_nelems(zone, nelems);
}
-static void *
-_zalloc_permanent(zone_t zone, vm_size_t size, vm_offset_t mask)
+/**
+ * @function zone_create_find
+ *
+ * @abstract
+ * Finds an unused zone for the given name and element size.
+ *
+ * @param name the zone name
+ * @param size the element size (including redzones, ...)
+ * @param flags the flags passed to @c zone_create*
+ * @param zid_inout the desired zone ID or ZONE_ID_ANY
+ *
+ * @returns a zone to initialize further.
+ */
+static zone_t
+zone_create_find(
+ const char *name,
+ vm_size_t size,
+ zone_create_flags_t flags,
+ zone_id_t *zid_inout)
{
- const zone_addr_kind_t kind = ZONE_ADDR_NATIVE;
- struct zone_page_metadata *page_meta;
- vm_offset_t offs, addr;
- zone_pva_t pva;
+ zone_id_t nzones, zid = *zid_inout;
+ zone_t z;
- assert(ml_get_interrupts_enabled() ||
- ml_is_quiescing() ||
- debug_mode_active() ||
- startup_phase < STARTUP_SUB_EARLY_BOOT);
+ simple_lock(&all_zones_lock, &zone_locks_grp);
- size = (size + mask) & ~mask;
- assert(size <= PAGE_SIZE);
+ nzones = (zone_id_t)os_atomic_load(&num_zones, relaxed);
+ assert(num_zones_in_use <= nzones && nzones < MAX_ZONES);
- lock_zone(zone);
- assert(zone->z_self == zone);
+ if (__improbable(nzones < ZONE_ID__FIRST_DYNAMIC)) {
+ /*
+ * The first time around, make sure the reserved zone IDs
+ * have an initialized lock as zone_index_foreach() will
+ * enumerate them.
+ */
+ while (nzones < ZONE_ID__FIRST_DYNAMIC) {
+ zone_init_defaults(nzones++);
+ }
+
+ os_atomic_store(&num_zones, nzones, release);
+ }
+
+ if (zid != ZONE_ID_ANY) {
+ if (zid >= ZONE_ID__FIRST_DYNAMIC) {
+ panic("zone_create: invalid desired zone ID %d for %s",
+ zid, name);
+ }
+ if (flags & ZC_DESTRUCTIBLE) {
+ panic("zone_create: ID %d (%s) must be permanent", zid, name);
+ }
+ if (zone_array[zid].z_self) {
+ panic("zone_create: creating zone ID %d (%s) twice", zid, name);
+ }
+ z = &zone_array[zid];
+ } else {
+ if (flags & ZC_DESTRUCTIBLE) {
+ /*
+ * If possible, find a previously zdestroy'ed zone in the
+ * zone_array that we can reuse.
+ */
+ for (int i = bitmap_first(zone_destroyed_bitmap, MAX_ZONES);
+ i >= 0; i = bitmap_next(zone_destroyed_bitmap, i)) {
+ z = &zone_array[i];
- for (;;) {
- pva = zone->pages_intermediate;
- while (!zone_pva_is_null(pva)) {
- page_meta = zone_pva_to_meta(pva, kind);
- if (page_meta->zm_freelist_offs + size <= PAGE_SIZE) {
- goto found;
+ /*
+ * If the zone name and the element size are the
+ * same, we can just reuse the old zone struct.
+ */
+ if (strcmp(z->z_name, name) || zone_elem_size(z) != size) {
+ continue;
+ }
+ bitmap_clear(zone_destroyed_bitmap, i);
+ z->z_destroyed = false;
+ z->z_self = z;
+ zid = (zone_id_t)i;
+ goto out;
}
- pva = page_meta->zm_page_next;
}
- zone_refill_synchronously_locked(zone, Z_WAITOK);
- }
-
-found:
- offs = (page_meta->zm_freelist_offs + mask) & ~mask;
- page_meta->zm_freelist_offs = offs + size;
- page_meta->zm_alloc_count += size;
- zone->countfree -= size;
- if (__probable(zone->z_stats)) {
- zpercpu_get(zone->z_stats)->zs_mem_allocated += size;
- }
+ zid = nzones++;
+ z = zone_init_defaults(zid);
- if (page_meta->zm_alloc_count >= PAGE_SIZE - sizeof(vm_offset_t)) {
- zone_meta_requeue(zone, &zone->pages_all_used, page_meta, kind);
+ /*
+ * The release barrier pairs with the acquire in
+ * zone_index_foreach() and makes sure that enumeration loops
+ * always see an initialized zone lock.
+ */
+ os_atomic_store(&num_zones, nzones, release);
}
- unlock_zone(zone);
-
- addr = offs + zone_pva_to_addr(pva);
+out:
+ num_zones_in_use++;
+ simple_unlock(&all_zones_lock);
- DTRACE_VM2(zalloc, zone_t, zone, void*, addr);
- return (void *)addr;
+ *zid_inout = zid;
+ return z;
}
-static void *
-_zalloc_permanent_large(size_t size, vm_offset_t mask)
+__abortlike
+static void
+zone_create_panic(const char *name, const char *f1, const char *f2)
{
- kern_return_t kr;
- vm_offset_t addr;
-
- kr = kernel_memory_allocate(kernel_map, &addr, size, mask,
- KMA_KOBJECT | KMA_PERMANENT | KMA_ZERO,
- VM_KERN_MEMORY_KALLOC);
- if (kr != 0) {
- panic("zalloc_permanent: unable to allocate %zd bytes (%d)",
- size, kr);
- }
- return (void *)addr;
+ panic("zone_create: creating zone %s: flag %s and %s are incompatible",
+ name, f1, f2);
}
+#define zone_create_assert_not_both(name, flags, current_flag, forbidden_flag) \
+ if ((flags) & forbidden_flag) { \
+ zone_create_panic(name, #current_flag, #forbidden_flag); \
+ }
-void *
-zalloc_permanent(vm_size_t size, vm_offset_t mask)
+/*
+ * Adjusts the size of the element based on minimum size, alignment
+ * and kasan redzones
+ */
+static vm_size_t
+zone_elem_adjust_size(
+ const char *name __unused,
+ vm_size_t elem_size,
+ zone_create_flags_t flags __unused,
+ uint32_t *redzone __unused)
{
- if (size <= PAGE_SIZE) {
- zone_t zone = &zone_array[ZONE_ID_PERMANENT];
- return _zalloc_permanent(zone, size, mask);
+ vm_size_t size;
+ /*
+ * Adjust element size for minimum size and pointer alignment
+ */
+ size = (elem_size + sizeof(vm_offset_t) - 1) & -sizeof(vm_offset_t);
+ if (size < ZONE_MIN_ELEM_SIZE) {
+ size = ZONE_MIN_ELEM_SIZE;
}
- return _zalloc_permanent_large(size, mask);
-}
-void *
-zalloc_percpu_permanent(vm_size_t size, vm_offset_t mask)
-{
- zone_t zone = &zone_array[ZONE_ID_PERCPU_PERMANENT];
- return (void *)__zpcpu_mangle(_zalloc_permanent(zone, size, mask));
+#if KASAN_ZALLOC
+ /*
+ * Expand the zone allocation size to include the redzones.
+ *
+ * For page-multiple zones add a full guard page because they
+ * likely require alignment.
+ */
+ uint32_t redzone_tmp;
+ if (flags & (ZC_KASAN_NOREDZONE | ZC_PERCPU)) {
+ redzone_tmp = 0;
+ } else if ((size & PAGE_MASK) == 0) {
+ if (size != PAGE_SIZE && (flags & ZC_ALIGNMENT_REQUIRED)) {
+ panic("zone_create: zone %s can't provide more than PAGE_SIZE"
+ "alignment", name);
+ }
+ redzone_tmp = PAGE_SIZE;
+ } else if (flags & ZC_ALIGNMENT_REQUIRED) {
+ redzone_tmp = 0;
+ } else {
+ redzone_tmp = KASAN_GUARD_SIZE;
+ }
+ size += redzone_tmp * 2;
+ if (redzone) {
+ *redzone = redzone_tmp;
+ }
+#endif
+ return size;
}
-void
-zalloc_async(__unused thread_call_param_t p0, __unused thread_call_param_t p1)
+/*
+ * Returns the allocation chunk size that has least framentation
+ */
+static vm_size_t
+zone_get_min_alloc_granule(
+ vm_size_t elem_size,
+ zone_create_flags_t flags)
{
- zone_index_foreach(i) {
- zone_t z = &zone_array[i];
-
- if (z->no_callout) {
- /* async_pending will never be set */
- continue;
+ vm_size_t alloc_granule = PAGE_SIZE;
+ if (flags & ZC_PERCPU) {
+ alloc_granule = PAGE_SIZE * zpercpu_count();
+ if (PAGE_SIZE % elem_size > 256) {
+ panic("zone_create: per-cpu zone has too much fragmentation");
}
-
- lock_zone(z);
- if (z->z_self && z->async_pending) {
- z->async_pending = false;
- zone_refill_synchronously_locked(z, Z_WAITOK);
+ } else if ((elem_size & PAGE_MASK) == 0) {
+ /* zero fragmentation by definition */
+ alloc_granule = elem_size;
+ } else if (alloc_granule % elem_size == 0) {
+ /* zero fragmentation by definition */
+ } else {
+ vm_size_t frag = (alloc_granule % elem_size) * 100 / alloc_granule;
+ vm_size_t alloc_tmp = PAGE_SIZE;
+ while ((alloc_tmp += PAGE_SIZE) <= ZONE_MAX_ALLOC_SIZE) {
+ vm_size_t frag_tmp = (alloc_tmp % elem_size) * 100 / alloc_tmp;
+ if (frag_tmp < frag) {
+ frag = frag_tmp;
+ alloc_granule = alloc_tmp;
+ }
}
- unlock_zone(z);
}
+ return alloc_granule;
}
-/*
- * Adds the element to the head of the zone's free list
- * Keeps a backup next-pointer at the end of the element
- */
-void
-zfree_direct_locked(zone_t zone, vm_offset_t element, bool poison)
+vm_size_t
+zone_get_foreign_alloc_size(
+ const char *name __unused,
+ vm_size_t elem_size,
+ zone_create_flags_t flags,
+ uint16_t min_pages)
{
- struct zone_page_metadata *page_meta;
- vm_offset_t page, old_head;
- zone_addr_kind_t kind;
- vm_size_t elem_size = zone_elem_size(zone);
-
- vm_offset_t *primary = (vm_offset_t *) element;
- vm_offset_t *backup = get_backup_ptr(elem_size, primary);
+ vm_size_t adjusted_size = zone_elem_adjust_size(name, elem_size, flags,
+ NULL);
+ vm_size_t alloc_granule = zone_get_min_alloc_granule(adjusted_size,
+ flags);
+ vm_size_t min_size = min_pages * PAGE_SIZE;
+ /*
+ * Round up min_size to a multiple of alloc_granule
+ */
+ return ((min_size + alloc_granule - 1) / alloc_granule)
+ * alloc_granule;
+}
- page_meta = zone_allocated_element_resolve(zone, element, &page, &kind);
- old_head = zone_page_meta_get_freelist(zone, page_meta, page);
+zone_t
+zone_create_ext(
+ const char *name,
+ vm_size_t size,
+ zone_create_flags_t flags,
+ zone_id_t zid,
+ void (^extra_setup)(zone_t))
+{
+ vm_size_t alloc;
+ uint32_t redzone;
+ zone_t z;
- if (__improbable(old_head == element)) {
- panic("zfree: double free of %p to zone %s%s\n",
- (void *) element, zone_heap_name(zone), zone->z_name);
+ if (size > ZONE_MAX_ALLOC_SIZE) {
+ panic("zone_create: element size too large: %zd", (size_t)size);
}
-#if ZALLOC_ENABLE_POISONING
- if (poison && elem_size < ZONE_MIN_ELEM_SIZE) {
- assert(zone->percpu);
- poison = false;
+ if (size < 2 * sizeof(vm_size_t)) {
+ /* Elements are too small for kasan. */
+ flags |= ZC_KASAN_NOQUARANTINE | ZC_KASAN_NOREDZONE;
}
-#else
- poison = false;
-#endif
+ size = zone_elem_adjust_size(name, size, flags, &redzone);
/*
- * Always write a redundant next pointer
- * So that it is more difficult to forge, xor it with a random cookie
- * A poisoned element is indicated by using zp_poisoned_cookie
- * instead of zp_nopoison_cookie
+ * Allocate the zone slot, return early if we found an older match.
*/
-
- *backup = old_head ^ (poison ? zp_poisoned_cookie : zp_nopoison_cookie);
+ z = zone_create_find(name, size, flags, &zid);
+ if (__improbable(z->z_self)) {
+ /* We found a zone to reuse */
+ return z;
+ }
/*
- * Insert this element at the head of the free list. We also xor the
- * primary pointer with the zp_nopoison_cookie to make sure a free
- * element does not provide the location of the next free element directly.
+ * Initialize the zone properly.
*/
- *primary = old_head ^ zp_nopoison_cookie;
-#if VM_MAX_TAG_ZONES
- if (__improbable(zone->tags)) {
- vm_tag_t tag = (ZTAG(zone, element)[0] >> 1);
- // set the tag with b0 clear so the block remains inuse
- ZTAG(zone, element)[0] = 0xFFFE;
- vm_tag_update_zone_size(tag, zone->tag_zone_index,
- -((int64_t)elem_size), 0);
+ /*
+ * If the kernel is post lockdown, copy the zone name passed in.
+ * Else simply maintain a pointer to the name string as it can only
+ * be a core XNU zone (no unloadable kext exists before lockdown).
+ */
+ if (startup_phase >= STARTUP_SUB_LOCKDOWN) {
+ size_t nsz = MIN(strlen(name) + 1, MACH_ZONE_NAME_MAX_LEN);
+ char *buf = zalloc_permanent(nsz, ZALIGN_NONE);
+ strlcpy(buf, name, nsz);
+ z->z_name = buf;
+ } else {
+ z->z_name = name;
}
-#endif /* VM_MAX_TAG_ZONES */
-
- zone_page_meta_set_freelist(page_meta, page, element);
- if (os_sub_overflow(page_meta->zm_alloc_count, 1,
- &page_meta->zm_alloc_count)) {
- zone_page_meta_accounting_panic(zone, page_meta,
- "alloc_count wrap-around");
+ if (__probable(zone_array[ZONE_ID_PERCPU_PERMANENT].z_self)) {
+ z->z_stats = zalloc_percpu_permanent_type(struct zone_stats);
+ } else {
+ /*
+ * zone_init() hasn't run yet, use the storage provided by
+ * zone_stats_startup(), and zone_init() will replace it
+ * with the final value once the PERCPU zone exists.
+ */
+ z->z_stats = __zpcpu_mangle_for_boot(&zone_stats_startup[zone_index(z)]);
}
- zone->countfree++;
- if (kind == ZONE_ADDR_FOREIGN) {
- if (old_head == 0) {
- /* first foreign element freed on page, move from all_used_foreign */
- zone_meta_requeue(zone, &zone->pages_any_free_foreign, page_meta, kind);
- }
- } else if (page_meta->zm_alloc_count == 0) {
- /* whether the page was on the intermediate or all_used, queue, move it to free */
- zone_meta_requeue(zone, &zone->pages_all_free, page_meta, kind);
- zone->allfree_page_count += page_meta->zm_page_count;
- } else if (old_head == 0) {
- /* first free element on page, move from all_used */
- zone_meta_requeue(zone, &zone->pages_intermediate, page_meta, kind);
+ alloc = zone_get_min_alloc_granule(size, flags);
+
+ if (flags & ZC_KALLOC_HEAP) {
+ size_t rem = (alloc % size) / (alloc / size);
+
+ /*
+ * Try to grow the elements size and spread them more if the remaining
+ * space is large enough.
+ */
+ size += rem & ~(KALLOC_MINALIGN - 1);
}
-#if KASAN_ZALLOC
- if (zone->percpu) {
- zpercpu_foreach_cpu(i) {
- kasan_poison_range(element + ptoa(i), elem_size,
- ASAN_HEAP_FREED);
- }
+ z->z_elem_size = (uint16_t)size;
+ z->z_chunk_pages = (uint16_t)atop(alloc);
+ if (flags & ZC_PERCPU) {
+ z->z_chunk_elems = (uint16_t)(PAGE_SIZE / z->z_elem_size);
} else {
- kasan_poison_range(element, elem_size, ASAN_HEAP_FREED);
+ z->z_chunk_elems = (uint16_t)(alloc / z->z_elem_size);
+ }
+ if (zone_element_idx(zone_element_encode(0,
+ z->z_chunk_elems - 1, ZPM_AUTO)) != z->z_chunk_elems - 1) {
+ panic("zone_element_encode doesn't work for zone [%s]", name);
}
-#endif
-}
-
-/*
- * The function is noinline when zlog can be used so that the backtracing can
- * reliably skip the zfree_ext() and zfree_log_trace()
- * boring frames.
- */
-#if ZONE_ENABLE_LOGGING
-__attribute__((noinline))
-#endif
-void
-zfree_ext(zone_t zone, zone_stats_t zstats, void *addr)
-{
- vm_offset_t elem = (vm_offset_t)addr;
- vm_size_t elem_size = zone_elem_size(zone);
- bool poison = false;
-
- DTRACE_VM2(zfree, zone_t, zone, void*, addr);
- TRACE_MACHLEAKS(ZFREE_CODE, ZFREE_CODE_2, elem_size, elem);
#if KASAN_ZALLOC
- if (kasan_quarantine_freed_element(&zone, &addr)) {
- return;
+ z->z_kasan_redzone = redzone;
+ if (strncmp(name, "fakestack.", sizeof("fakestack.") - 1) == 0) {
+ z->kasan_fakestacks = true;
}
+#endif
+
/*
- * kasan_quarantine_freed_element() might return a different
- * {zone, addr} than the one being freed for kalloc heaps.
- *
- * Make sure we reload everything.
+ * Handle KPI flags
*/
- elem = (vm_offset_t)addr;
- elem_size = zone_elem_size(zone);
+#if __LP64__
+ if (flags & ZC_SEQUESTER) {
+ z->z_va_sequester = true;
+ }
#endif
+ /* ZC_CACHING applied after all configuration is done */
+ if (flags & ZC_NOCACHING) {
+ z->z_nocaching = true;
+ }
+
+ if (flags & ZC_PERCPU) {
+ /*
+ * ZC_ZFREE_CLEARMEM is forced because per-cpu zones allow for
+ * pointer-sized allocations which poisoning doesn't support.
+ */
+ zone_create_assert_not_both(name, flags, ZC_PERCPU, ZC_ALLOW_FOREIGN);
+ z->z_percpu = true;
+ z->gzalloc_exempt = true;
+ z->z_free_zeroes = true;
+ }
+ if (flags & ZC_ZFREE_CLEARMEM) {
+ z->z_free_zeroes = true;
+ }
+ if (flags & ZC_NOGC) {
+ z->collectable = false;
+ }
+ if (flags & ZC_NOENCRYPT) {
+ z->z_noencrypt = true;
+ }
+ if (flags & ZC_ALIGNMENT_REQUIRED) {
+ z->alignment_required = true;
+ }
+ if (flags & ZC_NOGZALLOC) {
+ z->gzalloc_exempt = true;
+ }
+ if (flags & ZC_NOCALLOUT) {
+ z->no_callout = true;
+ }
+ if (flags & ZC_DESTRUCTIBLE) {
+ zone_create_assert_not_both(name, flags, ZC_DESTRUCTIBLE, ZC_ALLOW_FOREIGN);
+ z->z_destructible = true;
+ }
-#if CONFIG_ZLEAKS
/*
- * Zone leak detection: un-track the allocation
+ * Handle Internal flags
*/
- if (__improbable(zone->zleak_on)) {
- zleak_free(elem, elem_size);
+ if (flags & ZC_ALLOW_FOREIGN) {
+ z->z_allows_foreign = true;
}
-#endif /* CONFIG_ZLEAKS */
+ if ((ZSECURITY_OPTIONS_SUBMAP_USER_DATA & zsecurity_options) &&
+ (flags & ZC_DATA_BUFFERS)) {
+ z->z_submap_idx = Z_SUBMAP_IDX_BAG_OF_BYTES;
+ }
+ if (flags & ZC_KASAN_NOQUARANTINE) {
+ z->kasan_noquarantine = true;
+ }
+ /* ZC_KASAN_NOREDZONE already handled */
-#if CONFIG_ZCACHE
/*
- * Note: if zone caching is on, gzalloc and tags aren't used
- * so we can always check this first
+ * Then if there's extra tuning, do it
*/
- if (zone_caching_enabled(zone)) {
- return zcache_free_to_cpu_cache(zone, zstats, (vm_offset_t)addr);
+ if (extra_setup) {
+ extra_setup(z);
}
-#endif /* CONFIG_ZCACHE */
+ /*
+ * Configure debugging features
+ */
#if CONFIG_GZALLOC
- if (__improbable(zone->gzalloc_tracked)) {
- return gzalloc_free(zone, zstats, addr);
+ gzalloc_zone_init(z); /* might set z->gzalloc_tracked */
+ if (z->gzalloc_tracked) {
+ z->z_nocaching = true;
}
-#endif /* CONFIG_GZALLOC */
-
+#endif
#if ZONE_ENABLE_LOGGING
- if (__improbable(DO_LOGGING(zone))) {
- zfree_log_trace(zone, elem);
+ if (!z->gzalloc_tracked && num_zones_logged < max_num_zones_to_log) {
+ /*
+ * Check for and set up zone leak detection if requested via boot-args.
+ * might set z->zone_logging
+ */
+ zone_setup_logging(z);
}
#endif /* ZONE_ENABLE_LOGGING */
-
- if (zone->zfree_clear_mem) {
- poison = zfree_clear(zone, elem, elem_size);
+#if VM_MAX_TAG_ZONES
+ if (!z->gzalloc_tracked && z->kalloc_heap && zone_tagging_on) {
+ static int tag_zone_index;
+ vm_offset_t esize = zone_elem_size(z);
+ z->tags = true;
+ z->tags_inline = (((page_size + esize - 1) / esize) <=
+ (sizeof(uint32_t) / sizeof(uint16_t)));
+ z->tag_zone_index = os_atomic_inc_orig(&tag_zone_index, relaxed);
+ assert(z->tag_zone_index < VM_MAX_TAG_ZONES);
}
+#endif
- lock_zone(zone);
- assert(zone->z_self == zone);
-
- if (!poison) {
- poison = zfree_poison_element(zone, &zone->zp_count, elem);
+ /*
+ * Finally, fixup properties based on security policies, boot-args, ...
+ */
+ if ((ZSECURITY_OPTIONS_SUBMAP_USER_DATA & zsecurity_options) &&
+ z->kalloc_heap == KHEAP_ID_DATA_BUFFERS) {
+ z->z_submap_idx = Z_SUBMAP_IDX_BAG_OF_BYTES;
}
-
- if (__probable(zstats != NULL)) {
- /*
- * The few vm zones used before zone_init() runs do not have
- * per-cpu stats yet
- */
- zpercpu_get(zstats)->zs_mem_freed += elem_size;
+#if __LP64__
+ if ((ZSECURITY_OPTIONS_SEQUESTER & zsecurity_options) &&
+ (flags & ZC_NOSEQUESTER) == 0 &&
+ z->z_submap_idx == Z_SUBMAP_IDX_GENERAL) {
+ z->z_va_sequester = true;
+ }
+#endif
+ /*
+ * Clear entire element for non data zones and upto zp_min_size for
+ * data zones.
+ */
+ if (z->z_submap_idx != Z_SUBMAP_IDX_BAG_OF_BYTES) {
+ z->z_free_zeroes = true;
+ } else if (size <= zp_min_size) {
+ z->z_free_zeroes = true;
}
- zfree_direct_locked(zone, elem, poison);
-
- unlock_zone(zone);
-}
-
-void
-(zfree)(union zone_or_view zov, void *addr)
-{
- zone_t zone = zov.zov_view->zv_zone;
- zone_stats_t zstats = zov.zov_view->zv_stats;
- assert(!zone->percpu);
- zfree_ext(zone, zstats, addr);
-}
-
-void
-zfree_percpu(union zone_or_view zov, void *addr)
-{
- zone_t zone = zov.zov_view->zv_zone;
- zone_stats_t zstats = zov.zov_view->zv_stats;
- assert(zone->percpu);
- zfree_ext(zone, zstats, (void *)__zpcpu_demangle(addr));
-}
-
-#pragma mark vm integration, MIG routines
-
-/*
- * Drops (i.e. frees) the elements in the all free pages queue of a zone.
- * Called by zone_gc() on each zone and when a zone is zdestroy()ed.
- */
-static void
-zone_drop_free_elements(zone_t z)
-{
- const zone_addr_kind_t kind = ZONE_ADDR_NATIVE;
- unsigned int total_freed_pages = 0;
- struct zone_page_metadata *page_meta, *seq_meta;
- vm_address_t page_addr;
- vm_size_t size_to_free;
- vm_size_t free_count;
- uint32_t page_count;
-
- current_thread()->options |= TH_OPT_ZONE_PRIV;
- lock_zone(z);
-
- while (!zone_pva_is_null(z->pages_all_free)) {
+ if ((flags & ZC_CACHING) && !z->z_nocaching) {
/*
- * If any replenishment threads are running, defer to them,
- * so that we don't deplete reserved zones.
+ * If zcache hasn't been initialized yet, remember our decision,
*
- * The timing of the check isn't super important, as there are
- * enough reserves to allow freeing an extra page_meta.
- *
- * Hence, we can check without grabbing the lock every time
- * through the loop. We do need the lock however to avoid
- * missing a wakeup when we decide to block.
- */
- if (zone_replenish_active > 0) {
- lck_spin_lock(&zone_replenish_lock);
- if (zone_replenish_active > 0) {
- assert_wait(&zone_replenish_active, THREAD_UNINT);
- lck_spin_unlock(&zone_replenish_lock);
- unlock_zone(z);
- thread_block(THREAD_CONTINUE_NULL);
- lock_zone(z);
- continue;
- }
- lck_spin_unlock(&zone_replenish_lock);
- }
-
- page_meta = zone_pva_to_meta(z->pages_all_free, kind);
- page_count = page_meta->zm_page_count;
- free_count = zone_elem_count(z, ptoa(page_count), kind);
-
- /*
- * Don't drain zones with async refill to below the refill
- * threshold, as they need some reserve to function properly.
+ * zone_enable_caching() will be called again by
+ * zcache_bootstrap(), while the system is still single
+ * threaded, to build the missing caches.
*/
- if (!z->destroyed && z->prio_refill_count &&
- (vm_size_t)(z->countfree - free_count) < z->prio_refill_count) {
- break;
- }
-
- zone_meta_queue_pop(z, &z->pages_all_free, kind, &page_addr);
-
- if (os_sub_overflow(z->countfree, free_count, &z->countfree)) {
- zone_accounting_panic(z, "countfree wrap-around");
- }
- if (os_sub_overflow(z->countavail, free_count, &z->countavail)) {
- zone_accounting_panic(z, "countavail wrap-around");
- }
- if (os_sub_overflow(z->allfree_page_count, page_count,
- &z->allfree_page_count)) {
- zone_accounting_panic(z, "allfree_page_count wrap-around");
- }
- if (os_sub_overflow(z->page_count, page_count, &z->page_count)) {
- zone_accounting_panic(z, "page_count wrap-around");
- }
-
- os_atomic_sub(&zones_phys_page_count, page_count, relaxed);
- os_atomic_sub(&zones_phys_page_mapped_count, page_count, relaxed);
-
- bzero(page_meta, sizeof(*page_meta) * page_count);
- seq_meta = page_meta;
- page_meta = NULL; /* page_meta fields are zeroed, prevent reuse */
-
- unlock_zone(z);
-
- /* Free the pages for metadata and account for them */
- total_freed_pages += page_count;
- size_to_free = ptoa(page_count);
-#if KASAN_ZALLOC
- kasan_poison_range(page_addr, size_to_free, ASAN_VALID);
-#endif
-#if VM_MAX_TAG_ZONES
- if (z->tags) {
- ztMemoryRemove(z, page_addr, size_to_free);
- }
-#endif /* VM_MAX_TAG_ZONES */
-
- if (z->va_sequester && z->alloc_pages == page_count) {
- kernel_memory_depopulate(submap_for_zone(z), page_addr,
- size_to_free, KMA_KOBJECT, VM_KERN_MEMORY_ZONE);
+ if (__probable(zc_magazine_zone)) {
+ zone_enable_caching(z);
} else {
- kmem_free(submap_for_zone(z), page_addr, size_to_free);
- seq_meta = NULL;
+ z->z_pcpu_cache =
+ __zpcpu_mangle_for_boot(&zone_cache_startup[zid]);
}
- thread_yield_to_preemption();
-
- lock_zone(z);
-
- if (seq_meta) {
- zone_meta_queue_push(z, &z->pages_sequester, seq_meta, kind);
- z->sequester_page_count += page_count;
- }
- }
- if (z->destroyed) {
- assert(zone_pva_is_null(z->pages_all_free));
- assert(z->allfree_page_count == 0);
- }
- unlock_zone(z);
- current_thread()->options &= ~TH_OPT_ZONE_PRIV;
-
-#if DEBUG || DEVELOPMENT
- if (zalloc_debug & ZALLOC_DEBUG_ZONEGC) {
- kprintf("zone_gc() of zone %s%s freed %lu elements, %d pages\n",
- zone_heap_name(z), z->z_name,
- (unsigned long)(ptoa(total_freed_pages) / z->pcpu_elem_size),
- total_freed_pages);
- }
-#endif /* DEBUG || DEVELOPMENT */
-}
-
-/* Zone garbage collection
- *
- * zone_gc will walk through all the free elements in all the
- * zones that are marked collectable looking for reclaimable
- * pages. zone_gc is called by consider_zone_gc when the system
- * begins to run out of memory.
- *
- * We should ensure that zone_gc never blocks.
- */
-void
-zone_gc(boolean_t consider_jetsams)
-{
- if (consider_jetsams) {
- kill_process_in_largest_zone();
- /*
- * If we do end up jetsamming something, we need to do a zone_gc so that
- * we can reclaim free zone elements and update the zone map size.
- * Fall through.
- */
}
- lck_mtx_lock(&zone_gc_lock);
-
-#if DEBUG || DEVELOPMENT
- if (zalloc_debug & ZALLOC_DEBUG_ZONEGC) {
- kprintf("zone_gc() starting...\n");
- }
-#endif /* DEBUG || DEVELOPMENT */
-
- zone_index_foreach(i) {
- zone_t z = &zone_array[i];
-
- if (!z->collectable) {
- continue;
- }
-#if CONFIG_ZCACHE
- if (zone_caching_enabled(z)) {
- zcache_drain_depot(z);
- }
-#endif /* CONFIG_ZCACHE */
- if (zone_pva_is_null(z->pages_all_free)) {
- continue;
+ if (zp_factor != 0 && !z->z_free_zeroes) {
+ if (__probable(zone_array[ZONE_ID_PERCPU_PERMANENT].z_self)) {
+ zpercpu_foreach(zs, z->z_stats) {
+ zs->zs_poison_seqno = zone_poison_count_init(z);
+ }
+ } else {
+ zone_stats_startup[zid].zs_poison_seqno =
+ zone_poison_count_init(z);
}
-
- zone_drop_free_elements(z);
}
- lck_mtx_unlock(&zone_gc_lock);
-}
+ zone_lock(z);
+ z->z_self = z;
+ zone_unlock(z);
-/*
- * consider_zone_gc:
- *
- * Called by the pageout daemon when the system needs more free pages.
- */
+ return z;
+}
+__startup_func
void
-consider_zone_gc(boolean_t consider_jetsams)
+zone_create_startup(struct zone_create_startup_spec *spec)
{
- /*
- * One-time reclaim of kernel_map resources we allocated in
- * early boot.
- *
- * Use atomic exchange in case multiple threads race into here.
- */
- vm_offset_t deallocate_kaddr;
- if (kmapoff_kaddr != 0 &&
- (deallocate_kaddr = os_atomic_xchg(&kmapoff_kaddr, 0, relaxed)) != 0) {
- vm_deallocate(kernel_map, deallocate_kaddr, ptoa_64(kmapoff_pgcnt));
- }
-
- zone_gc(consider_jetsams);
+ *spec->z_var = zone_create_ext(spec->z_name, spec->z_size,
+ spec->z_flags, spec->z_zid, spec->z_setup);
}
/*
- * Creates a vm_map_copy_t to return to the caller of mach_* MIG calls
- * requesting zone information.
- * Frees unused pages towards the end of the region, and zero'es out unused
- * space on the last page.
+ * The 4 first field of a zone_view and a zone alias, so that the zone_or_view_t
+ * union works. trust but verify.
*/
-static vm_map_copy_t
-create_vm_map_copy(
- vm_offset_t start_addr,
- vm_size_t total_size,
- vm_size_t used_size)
-{
- kern_return_t kr;
- vm_offset_t end_addr;
- vm_size_t free_size;
- vm_map_copy_t copy;
-
- if (used_size != total_size) {
- end_addr = start_addr + used_size;
- free_size = total_size - (round_page(end_addr) - start_addr);
-
- if (free_size >= PAGE_SIZE) {
- kmem_free(ipc_kernel_map,
- round_page(end_addr), free_size);
- }
- bzero((char *) end_addr, round_page(end_addr) - end_addr);
- }
-
- kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)start_addr,
- (vm_map_size_t)used_size, TRUE, ©);
- assert(kr == KERN_SUCCESS);
-
- return copy;
-}
+#define zalloc_check_zov_alias(f1, f2) \
+ static_assert(offsetof(struct zone, f1) == offsetof(struct zone_view, f2))
+zalloc_check_zov_alias(z_self, zv_zone);
+zalloc_check_zov_alias(z_stats, zv_stats);
+zalloc_check_zov_alias(z_name, zv_name);
+zalloc_check_zov_alias(z_views, zv_next);
+#undef zalloc_check_zov_alias
-static boolean_t
-get_zone_info(
- zone_t z,
- mach_zone_name_t *zn,
- mach_zone_info_t *zi)
+__startup_func
+void
+zone_view_startup_init(struct zone_view_startup_spec *spec)
{
- struct zone zcopy;
+ struct kalloc_heap *heap = NULL;
+ zone_view_t zv = spec->zv_view;
+ zone_t z;
- assert(z != ZONE_NULL);
- lock_zone(z);
- if (!z->z_self) {
- unlock_zone(z);
- return FALSE;
+ switch (spec->zv_heapid) {
+ case KHEAP_ID_DEFAULT:
+ heap = KHEAP_DEFAULT;
+ break;
+ case KHEAP_ID_DATA_BUFFERS:
+ heap = KHEAP_DATA_BUFFERS;
+ break;
+ case KHEAP_ID_KEXT:
+ heap = KHEAP_KEXT;
+ break;
+ default:
+ heap = NULL;
}
- zcopy = *z;
- unlock_zone(z);
-
- if (zn != NULL) {
- /*
- * Append kalloc heap name to zone name (if zone is used by kalloc)
- */
- char temp_zone_name[MAX_ZONE_NAME] = "";
- snprintf(temp_zone_name, MAX_ZONE_NAME, "%s%s",
- zone_heap_name(z), z->z_name);
- /* assuming here the name data is static */
- (void) __nosan_strlcpy(zn->mzn_name, temp_zone_name,
- strlen(temp_zone_name) + 1);
+ if (heap) {
+ z = kalloc_heap_zone_for_size(heap, spec->zv_size);
+ assert(z);
+ } else {
+ z = spec->zv_zone;
+ assert(spec->zv_size <= zone_elem_size(z));
}
- if (zi != NULL) {
- *zi = (mach_zone_info_t) {
- .mzi_count = zone_count_allocated(&zcopy),
- .mzi_cur_size = ptoa_64(zcopy.page_count),
- // max_size for zprint is now high-watermark of pages used
- .mzi_max_size = ptoa_64(zcopy.page_count_hwm),
- .mzi_elem_size = zcopy.pcpu_elem_size,
- .mzi_alloc_size = ptoa_64(zcopy.alloc_pages),
- .mzi_exhaustible = (uint64_t)zcopy.exhaustible,
- };
- zpercpu_foreach(zs, zcopy.z_stats) {
- zi->mzi_sum_size += zs->zs_mem_allocated;
- }
- if (zcopy.collectable) {
- SET_MZI_COLLECTABLE_BYTES(zi->mzi_collectable,
- ptoa_64(zcopy.allfree_page_count));
- SET_MZI_COLLECTABLE_FLAG(zi->mzi_collectable, TRUE);
- }
+ zv->zv_zone = z;
+ zv->zv_stats = zalloc_percpu_permanent_type(struct zone_stats);
+ zv->zv_next = z->z_views;
+ if (z->z_views == NULL && z->kalloc_heap == KHEAP_ID_NONE) {
+ /*
+ * count the raw view for zones not in a heap,
+ * kalloc_heap_init() already counts it for its members.
+ */
+ zone_view_count += 2;
+ } else {
+ zone_view_count += 1;
}
-
- return TRUE;
+ z->z_views = zv;
}
-kern_return_t
-task_zone_info(
- __unused task_t task,
- __unused mach_zone_name_array_t *namesp,
- __unused mach_msg_type_number_t *namesCntp,
- __unused task_zone_info_array_t *infop,
- __unused mach_msg_type_number_t *infoCntp)
+zone_t
+zone_create(
+ const char *name,
+ vm_size_t size,
+ zone_create_flags_t flags)
{
- return KERN_FAILURE;
+ return zone_create_ext(name, size, flags, ZONE_ID_ANY, NULL);
}
-kern_return_t
-mach_zone_info(
- host_priv_t host,
- mach_zone_name_array_t *namesp,
- mach_msg_type_number_t *namesCntp,
- mach_zone_info_array_t *infop,
- mach_msg_type_number_t *infoCntp)
+zone_t
+zinit(
+ vm_size_t size, /* the size of an element */
+ vm_size_t max, /* maximum memory to use */
+ vm_size_t alloc __unused, /* allocation size */
+ const char *name) /* a name for the zone */
{
- return mach_memory_info(host, namesp, namesCntp, infop, infoCntp, NULL, NULL);
+ zone_t z = zone_create(name, size, ZC_DESTRUCTIBLE);
+ z->z_wired_max = zone_alloc_pages_for_nelems(z, max / size);
+ return z;
}
-
-kern_return_t
-mach_memory_info(
- host_priv_t host,
- mach_zone_name_array_t *namesp,
- mach_msg_type_number_t *namesCntp,
- mach_zone_info_array_t *infop,
- mach_msg_type_number_t *infoCntp,
- mach_memory_info_array_t *memoryInfop,
- mach_msg_type_number_t *memoryInfoCntp)
+void
+zdestroy(zone_t z)
{
- mach_zone_name_t *names;
- vm_offset_t names_addr;
- vm_size_t names_size;
-
- mach_zone_info_t *info;
- vm_offset_t info_addr;
- vm_size_t info_size;
+ unsigned int zindex = zone_index(z);
- mach_memory_info_t *memory_info;
- vm_offset_t memory_info_addr;
- vm_size_t memory_info_size;
- vm_size_t memory_info_vmsize;
- unsigned int num_info;
+ current_thread()->options |= TH_OPT_ZONE_PRIV;
+ lck_mtx_lock(&zone_gc_lock);
- unsigned int max_zones, used_zones, i;
- mach_zone_name_t *zn;
- mach_zone_info_t *zi;
- kern_return_t kr;
+ zone_reclaim(z, ZONE_RECLAIM_DESTROY);
- uint64_t zones_collectable_bytes = 0;
+ lck_mtx_unlock(&zone_gc_lock);
+ current_thread()->options &= ~TH_OPT_ZONE_PRIV;
- if (host == HOST_NULL) {
- return KERN_INVALID_HOST;
- }
-#if CONFIG_DEBUGGER_FOR_ZONE_INFO
- if (!PE_i_can_has_debugger(NULL)) {
- return KERN_INVALID_HOST;
+#if CONFIG_GZALLOC
+ if (__improbable(z->gzalloc_tracked)) {
+ /* If the zone is gzalloc managed dump all the elements in the free cache */
+ gzalloc_empty_free_cache(z);
}
#endif
- /*
- * We assume that zones aren't freed once allocated.
- * We won't pick up any zones that are allocated later.
- */
+ zone_lock(z);
- max_zones = os_atomic_load(&num_zones, relaxed);
+ while (!zone_pva_is_null(z->z_pageq_va)) {
+ struct zone_page_metadata *meta;
+ vm_offset_t free_addr;
- names_size = round_page(max_zones * sizeof *names);
- kr = kmem_alloc_pageable(ipc_kernel_map,
- &names_addr, names_size, VM_KERN_MEMORY_IPC);
- if (kr != KERN_SUCCESS) {
- return kr;
+ zone_counter_sub(z, z_va_cur, z->z_percpu ? 1 : z->z_chunk_pages);
+ meta = zone_meta_queue_pop_native(z, &z->z_pageq_va, &free_addr);
+ assert(meta->zm_chunk_len <= ZM_CHUNK_LEN_MAX);
+ bzero(meta, sizeof(*meta) * z->z_chunk_pages);
+ zone_unlock(z);
+ kmem_free(zone_submap(z), free_addr, ptoa(z->z_chunk_pages));
+ zone_lock(z);
}
- names = (mach_zone_name_t *) names_addr;
- info_size = round_page(max_zones * sizeof *info);
- kr = kmem_alloc_pageable(ipc_kernel_map,
- &info_addr, info_size, VM_KERN_MEMORY_IPC);
- if (kr != KERN_SUCCESS) {
- kmem_free(ipc_kernel_map,
- names_addr, names_size);
- return kr;
+#if !KASAN_ZALLOC
+ /* Assert that all counts are zero */
+ if (z->z_elems_avail || z->z_elems_free ||
+ zone_size_wired(z) || z->z_va_cur) {
+ panic("zdestroy: Zone %s%s isn't empty at zdestroy() time",
+ zone_heap_name(z), z->z_name);
}
- info = (mach_zone_info_t *) info_addr;
- zn = &names[0];
- zi = &info[0];
+ /* consistency check: make sure everything is indeed empty */
+ assert(zone_pva_is_null(z->z_pageq_empty));
+ assert(zone_pva_is_null(z->z_pageq_partial));
+ assert(zone_pva_is_null(z->z_pageq_full));
+ assert(zone_pva_is_null(z->z_pageq_va));
+#endif
- used_zones = max_zones;
- for (i = 0; i < max_zones; i++) {
- if (!get_zone_info(&(zone_array[i]), zn, zi)) {
- used_zones--;
- continue;
- }
- zones_collectable_bytes += GET_MZI_COLLECTABLE_BYTES(zi->mzi_collectable);
- zn++;
- zi++;
- }
+ zone_unlock(z);
- *namesp = (mach_zone_name_t *) create_vm_map_copy(names_addr, names_size, used_zones * sizeof *names);
- *namesCntp = used_zones;
+ simple_lock(&all_zones_lock, &zone_locks_grp);
- *infop = (mach_zone_info_t *) create_vm_map_copy(info_addr, info_size, used_zones * sizeof *info);
- *infoCntp = used_zones;
+ assert(!bitmap_test(zone_destroyed_bitmap, zindex));
+ /* Mark the zone as empty in the bitmap */
+ bitmap_set(zone_destroyed_bitmap, zindex);
+ num_zones_in_use--;
+ assert(num_zones_in_use > 0);
- num_info = 0;
- memory_info_addr = 0;
+ simple_unlock(&all_zones_lock);
+}
- if (memoryInfop && memoryInfoCntp) {
- vm_map_copy_t copy;
- num_info = vm_page_diagnose_estimate();
- memory_info_size = num_info * sizeof(*memory_info);
- memory_info_vmsize = round_page(memory_info_size);
- kr = kmem_alloc_pageable(ipc_kernel_map,
- &memory_info_addr, memory_info_vmsize, VM_KERN_MEMORY_IPC);
- if (kr != KERN_SUCCESS) {
- return kr;
- }
+#endif /* !ZALLOC_TEST */
+#pragma mark zalloc module init
+#if !ZALLOC_TEST
- kr = vm_map_wire_kernel(ipc_kernel_map, memory_info_addr, memory_info_addr + memory_info_vmsize,
- VM_PROT_READ | VM_PROT_WRITE, VM_KERN_MEMORY_IPC, FALSE);
- assert(kr == KERN_SUCCESS);
+/*
+ * Initialize the "zone of zones" which uses fixed memory allocated
+ * earlier in memory initialization. zone_bootstrap is called
+ * before zone_init.
+ */
+__startup_func
+void
+zone_bootstrap(void)
+{
+ /* Validate struct zone_packed_virtual_address expectations */
+ static_assert((intptr_t)VM_MIN_KERNEL_ADDRESS < 0, "the top bit must be 1");
+ if (VM_KERNEL_POINTER_SIGNIFICANT_BITS - PAGE_SHIFT > 31) {
+ panic("zone_pva_t can't pack a kernel page address in 31 bits");
+ }
- memory_info = (mach_memory_info_t *) memory_info_addr;
- vm_page_diagnose(memory_info, num_info, zones_collectable_bytes);
+ zpercpu_early_count = ml_early_cpu_max_number() + 1;
- kr = vm_map_unwire(ipc_kernel_map, memory_info_addr, memory_info_addr + memory_info_vmsize, FALSE);
- assert(kr == KERN_SUCCESS);
+ /* Set up zone element poisoning */
+ zp_bootstrap();
- kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)memory_info_addr,
- (vm_map_size_t)memory_info_size, TRUE, ©);
- assert(kr == KERN_SUCCESS);
+ /*
+ * the KASAN quarantine for kalloc doesn't understand heaps
+ * and trips the heap confusion panics. At the end of the day,
+ * all these security measures are double duty with KASAN.
+ *
+ * On 32bit kernels, these protections are just too expensive.
+ */
+#if !defined(__LP64__) || KASAN_ZALLOC
+ zsecurity_options &= ~ZSECURITY_OPTIONS_SEQUESTER;
+ zsecurity_options &= ~ZSECURITY_OPTIONS_SUBMAP_USER_DATA;
+ zsecurity_options &= ~ZSECURITY_OPTIONS_SEQUESTER_KEXT_KALLOC;
+#endif
- *memoryInfop = (mach_memory_info_t *) copy;
- *memoryInfoCntp = num_info;
- }
+ thread_call_setup_with_options(&zone_expand_callout,
+ zone_expand_async, NULL, THREAD_CALL_PRIORITY_HIGH,
+ THREAD_CALL_OPTIONS_ONCE);
+
+ thread_call_setup_with_options(&zone_defrag_callout,
+ zone_defrag_async, NULL, THREAD_CALL_PRIORITY_USER,
+ THREAD_CALL_OPTIONS_ONCE);
+}
+
+#if __LP64__
+#if ARM_LARGE_MEMORY || __x86_64__
+#define ZONE_MAP_VIRTUAL_SIZE_LP64 (128ULL * 1024ULL * 1024 * 1024)
+#else
+#define ZONE_MAP_VIRTUAL_SIZE_LP64 (32ULL * 1024ULL * 1024 * 1024)
+#endif
+#endif /* __LP64__ */
- return KERN_SUCCESS;
-}
+#define ZONE_GUARD_SIZE (64UL << 10)
-kern_return_t
-mach_zone_info_for_zone(
- host_priv_t host,
- mach_zone_name_t name,
- mach_zone_info_t *infop)
+#if __LP64__
+static inline vm_offset_t
+zone_restricted_va_max(void)
{
- zone_t zone_ptr;
+ vm_offset_t compressor_max = VM_PACKING_MAX_PACKABLE(C_SLOT_PACKED_PTR);
+ vm_offset_t vm_page_max = VM_PACKING_MAX_PACKABLE(VM_PAGE_PACKED_PTR);
- if (host == HOST_NULL) {
- return KERN_INVALID_HOST;
- }
-#if CONFIG_DEBUGGER_FOR_ZONE_INFO
- if (!PE_i_can_has_debugger(NULL)) {
- return KERN_INVALID_HOST;
- }
+ return trunc_page(MIN(compressor_max, vm_page_max));
+}
#endif
- if (infop == NULL) {
- return KERN_INVALID_ARGUMENT;
+__startup_func
+static void
+zone_tunables_fixup(void)
+{
+ if (zone_map_jetsam_limit == 0 || zone_map_jetsam_limit > 100) {
+ zone_map_jetsam_limit = ZONE_MAP_JETSAM_LIMIT_DEFAULT;
}
+ if (zc_magazine_size > PAGE_SIZE / ZONE_MIN_ELEM_SIZE) {
+ zc_magazine_size = (uint16_t)(PAGE_SIZE / ZONE_MIN_ELEM_SIZE);
+ }
+}
+STARTUP(TUNABLES, STARTUP_RANK_MIDDLE, zone_tunables_fixup);
- zone_ptr = ZONE_NULL;
- zone_index_foreach(i) {
- zone_t z = &(zone_array[i]);
- assert(z != ZONE_NULL);
-
- /*
- * Append kalloc heap name to zone name (if zone is used by kalloc)
- */
- char temp_zone_name[MAX_ZONE_NAME] = "";
- snprintf(temp_zone_name, MAX_ZONE_NAME, "%s%s",
- zone_heap_name(z), z->z_name);
+__startup_func
+static vm_size_t
+zone_phys_size_max(void)
+{
+ vm_size_t zsize;
+ vm_size_t zsizearg;
- /* Find the requested zone by name */
- if (track_this_zone(temp_zone_name, name.mzn_name)) {
- zone_ptr = z;
- break;
- }
+ if (PE_parse_boot_argn("zsize", &zsizearg, sizeof(zsizearg))) {
+ zsize = zsizearg * (1024ULL * 1024);
+ } else {
+ /* Set target zone size as 1/4 of physical memory */
+ zsize = (vm_size_t)(sane_size >> 2);
+#if defined(__LP64__)
+ zsize += zsize >> 1;
+#endif /* __LP64__ */
}
- /* No zones found with the requested zone name */
- if (zone_ptr == ZONE_NULL) {
- return KERN_INVALID_ARGUMENT;
+ if (zsize < CONFIG_ZONE_MAP_MIN) {
+ zsize = CONFIG_ZONE_MAP_MIN; /* Clamp to min */
}
-
- if (get_zone_info(zone_ptr, NULL, infop)) {
- return KERN_SUCCESS;
+ if (zsize > sane_size >> 1) {
+ zsize = (vm_size_t)(sane_size >> 1); /* Clamp to half of RAM max */
}
- return KERN_FAILURE;
+ if (zsizearg == 0 && zsize > ZONE_MAP_MAX) {
+ /* if zsize boot-arg not present and zsize exceeds platform maximum, clip zsize */
+ printf("NOTE: zonemap size reduced from 0x%lx to 0x%lx\n",
+ (uintptr_t)zsize, (uintptr_t)ZONE_MAP_MAX);
+ zsize = ZONE_MAP_MAX;
+ }
+
+ return (vm_size_t)trunc_page(zsize);
}
-kern_return_t
-mach_zone_info_for_largest_zone(
- host_priv_t host,
- mach_zone_name_t *namep,
- mach_zone_info_t *infop)
+__options_decl(zone_init_allocate_flags_t, unsigned, {
+ ZIA_NONE = 0x00000000,
+ ZIA_REPLACE = 0x00000001, /* replace a previous non permanent range */
+ ZIA_RANDOM = 0x00000002, /* place at a random address */
+ ZIA_PERMANENT = 0x00000004, /* permanent allocation */
+ ZIA_GUARD = 0x00000008, /* will be used as a guard */
+});
+
+__startup_func
+static struct zone_map_range
+zone_init_allocate_va(vm_map_address_t addr, vm_size_t size,
+ zone_init_allocate_flags_t flags)
{
- if (host == HOST_NULL) {
- return KERN_INVALID_HOST;
+ vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
+ int vm_alloc_flags = 0;
+ struct zone_map_range r;
+ kern_return_t kr;
+
+ if (flags & ZIA_REPLACE) {
+ vm_alloc_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
+ } else {
+ vm_alloc_flags |= VM_FLAGS_ANYWHERE;
}
-#if CONFIG_DEBUGGER_FOR_ZONE_INFO
- if (!PE_i_can_has_debugger(NULL)) {
- return KERN_INVALID_HOST;
+ if (flags & ZIA_RANDOM) {
+ vm_alloc_flags |= VM_FLAGS_RANDOM_ADDR;
}
-#endif
-
- if (namep == NULL || infop == NULL) {
- return KERN_INVALID_ARGUMENT;
+ if (flags & ZIA_PERMANENT) {
+ vmk_flags.vmkf_permanent = true;
}
- if (get_zone_info(zone_find_largest(), namep, infop)) {
- return KERN_SUCCESS;
- }
- return KERN_FAILURE;
-}
+ vm_object_reference(kernel_object);
-uint64_t
-get_zones_collectable_bytes(void)
-{
- uint64_t zones_collectable_bytes = 0;
- mach_zone_info_t zi;
+ kr = vm_map_enter(kernel_map, &addr, size, 0,
+ vm_alloc_flags, vmk_flags, VM_KERN_MEMORY_ZONE,
+ kernel_object, 0, FALSE,
+ (flags & ZIA_GUARD) ? VM_PROT_NONE : VM_PROT_DEFAULT,
+ (flags & ZIA_GUARD) ? VM_PROT_NONE : VM_PROT_DEFAULT,
+ VM_INHERIT_NONE);
- zone_index_foreach(i) {
- if (get_zone_info(&zone_array[i], NULL, &zi)) {
- zones_collectable_bytes +=
- GET_MZI_COLLECTABLE_BYTES(zi.mzi_collectable);
- }
+ if (KERN_SUCCESS != kr) {
+ panic("vm_map_enter(0x%zx) failed: %d", (size_t)size, kr);
}
- return zones_collectable_bytes;
+ r.min_address = (vm_offset_t)addr;
+ r.max_address = (vm_offset_t)addr + size;
+ return r;
}
-kern_return_t
-mach_zone_get_zlog_zones(
- host_priv_t host,
- mach_zone_name_array_t *namesp,
- mach_msg_type_number_t *namesCntp)
+__startup_func
+static void
+zone_submap_init(
+ vm_offset_t *submap_min,
+ unsigned idx,
+ uint64_t zone_sub_map_numer,
+ uint64_t *remaining_denom,
+ vm_offset_t *remaining_size,
+ vm_size_t guard_size)
{
-#if ZONE_ENABLE_LOGGING
- unsigned int max_zones, logged_zones, i;
+ vm_offset_t submap_start, submap_end;
+ vm_size_t submap_size;
+ vm_map_t submap;
kern_return_t kr;
- zone_t zone_ptr;
- mach_zone_name_t *names;
- vm_offset_t names_addr;
- vm_size_t names_size;
- if (host == HOST_NULL) {
- return KERN_INVALID_HOST;
- }
+ submap_size = trunc_page(zone_sub_map_numer * *remaining_size /
+ *remaining_denom);
+ submap_start = *submap_min;
+ submap_end = submap_start + submap_size;
- if (namesp == NULL || namesCntp == NULL) {
- return KERN_INVALID_ARGUMENT;
- }
+#if defined(__LP64__)
+ if (idx == Z_SUBMAP_IDX_VA_RESTRICTED) {
+ vm_offset_t restricted_va_max = zone_restricted_va_max();
+ if (submap_end > restricted_va_max) {
+#if DEBUG || DEVELOPMENT
+ printf("zone_init: submap[%d] clipped to %zdM of %zdM\n", idx,
+ (size_t)(restricted_va_max - submap_start) >> 20,
+ (size_t)submap_size >> 20);
+#endif /* DEBUG || DEVELOPMENT */
+ guard_size += submap_end - restricted_va_max;
+ *remaining_size -= submap_end - restricted_va_max;
+ submap_end = restricted_va_max;
+ submap_size = restricted_va_max - submap_start;
+ }
- max_zones = os_atomic_load(&num_zones, relaxed);
+ vm_packing_verify_range("vm_compressor",
+ submap_start, submap_end, VM_PACKING_PARAMS(C_SLOT_PACKED_PTR));
+ vm_packing_verify_range("vm_page",
+ submap_start, submap_end, VM_PACKING_PARAMS(VM_PAGE_PACKED_PTR));
+ }
+#endif /* defined(__LP64__) */
- names_size = round_page(max_zones * sizeof *names);
- kr = kmem_alloc_pageable(ipc_kernel_map,
- &names_addr, names_size, VM_KERN_MEMORY_IPC);
+ vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
+ vmk_flags.vmkf_permanent = TRUE;
+ kr = kmem_suballoc(kernel_map, submap_min, submap_size,
+ FALSE, VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE, vmk_flags,
+ VM_KERN_MEMORY_ZONE, &submap);
if (kr != KERN_SUCCESS) {
- return kr;
+ panic("kmem_suballoc(kernel_map[%d] %p:%p) failed: %d",
+ idx, (void *)submap_start, (void *)submap_end, kr);
}
- names = (mach_zone_name_t *) names_addr;
- zone_ptr = ZONE_NULL;
- logged_zones = 0;
- for (i = 0; i < max_zones; i++) {
- zone_t z = &(zone_array[i]);
- assert(z != ZONE_NULL);
+#if DEBUG || DEVELOPMENT
+ printf("zone_init: submap[%d] %p:%p (%zuM)\n",
+ idx, (void *)submap_start, (void *)submap_end,
+ (size_t)submap_size >> 20);
+#endif /* DEBUG || DEVELOPMENT */
- /* Copy out the zone name if zone logging is enabled */
- if (z->zlog_btlog) {
- get_zone_info(z, &names[logged_zones], NULL);
- logged_zones++;
- }
+ zone_init_allocate_va(submap_end, guard_size,
+ ZIA_PERMANENT | ZIA_GUARD | ZIA_REPLACE);
+
+ zone_submaps[idx] = submap;
+ *submap_min = submap_end + guard_size;
+ *remaining_size -= submap_size;
+ *remaining_denom -= zone_sub_map_numer;
+}
+
+/*
+ * Allocate metadata array and migrate foreign initial metadata.
+ *
+ * So that foreign pages and native pages have the same scheme,
+ * we allocate VA space that covers both foreign and native pages.
+ */
+__startup_func
+static void
+zone_metadata_init(void)
+{
+ struct zone_map_range r0 = zone_info.zi_map_range[0];
+ struct zone_map_range r1 = zone_info.zi_map_range[1];
+ struct zone_map_range mr, br;
+ vm_size_t meta_size, bits_size, foreign_base;
+ vm_offset_t hstart, hend;
+
+ if (r0.min_address > r1.min_address) {
+ r0 = zone_info.zi_map_range[1];
+ r1 = zone_info.zi_map_range[0];
}
- *namesp = (mach_zone_name_t *) create_vm_map_copy(names_addr, names_size, logged_zones * sizeof *names);
- *namesCntp = logged_zones;
+ meta_size = round_page(atop(r1.max_address - r0.min_address) *
+ sizeof(struct zone_page_metadata)) + ZONE_GUARD_SIZE * 2;
- return KERN_SUCCESS;
+ /*
+ * Allocations can't be smaller than 8 bytes, which is 128b / 16B per 1k
+ * of physical memory (16M per 1G).
+ *
+ * Let's preallocate for the worst to avoid weird panics.
+ */
+ bits_size = round_page(16 * (ptoa(zone_phys_mapped_max_pages) >> 10));
-#else /* ZONE_ENABLE_LOGGING */
-#pragma unused(host, namesp, namesCntp)
- return KERN_FAILURE;
-#endif /* ZONE_ENABLE_LOGGING */
-}
+ /*
+ * Compute the size of the "hole" in the middle of the range.
+ *
+ * If it is smaller than 256k, just leave it be, with this layout:
+ *
+ * [G][ r0 meta ][ hole ][ r1 meta ][ bits ][G]
+ *
+ * else punch a hole with guard pages around the hole, and place the
+ * bits in the hole if it fits, or after r1 otherwise, yielding either
+ * of the following layouts:
+ *
+ * |__________________hend____________|
+ * |__hstart_| |
+ * [G][ r0 meta ][ bits ][G]..........[G][ r1 meta ][G]
+ * [G][ r0 meta ][G]..................[G][ r1 meta ][ bits ][G]
+ */
+ hstart = round_page(atop(r0.max_address - r0.min_address) *
+ sizeof(struct zone_page_metadata));
+ hend = trunc_page(atop(r1.min_address - r0.min_address) *
+ sizeof(struct zone_page_metadata));
+
+ if (hstart >= hend || hend - hstart < (256ul << 10)) {
+ mr = zone_init_allocate_va(0, meta_size + bits_size,
+ ZIA_PERMANENT | ZIA_RANDOM);
+ mr.min_address += ZONE_GUARD_SIZE;
+ mr.max_address -= ZONE_GUARD_SIZE;
+ br.max_address = mr.max_address;
+ mr.max_address -= bits_size;
+ br.min_address = mr.max_address;
-kern_return_t
-mach_zone_get_btlog_records(
- host_priv_t host,
- mach_zone_name_t name,
- zone_btrecord_array_t *recsp,
- mach_msg_type_number_t *recsCntp)
-{
#if DEBUG || DEVELOPMENT
- unsigned int numrecs = 0;
- zone_btrecord_t *recs;
- kern_return_t kr;
- zone_t zone_ptr;
- vm_offset_t recs_addr;
- vm_size_t recs_size;
+ printf("zone_init: metadata %p:%p (%zuK)\n",
+ (void *)mr.min_address, (void *)mr.max_address,
+ (size_t)zone_range_size(&mr) >> 10);
+ printf("zone_init: metabits %p:%p (%zuK)\n",
+ (void *)br.min_address, (void *)br.max_address,
+ (size_t)zone_range_size(&br) >> 10);
+#endif /* DEBUG || DEVELOPMENT */
+ } else {
+ vm_size_t size, alloc_size = meta_size;
+ vm_offset_t base;
+ bool bits_in_middle = true;
- if (host == HOST_NULL) {
- return KERN_INVALID_HOST;
- }
+ if (hend - hstart - 2 * ZONE_GUARD_SIZE < bits_size) {
+ alloc_size += bits_size;
+ bits_in_middle = false;
+ }
- if (recsp == NULL || recsCntp == NULL) {
- return KERN_INVALID_ARGUMENT;
- }
+ mr = zone_init_allocate_va(0, alloc_size, ZIA_RANDOM);
+
+ base = mr.min_address;
+ size = ZONE_GUARD_SIZE + hstart + ZONE_GUARD_SIZE;
+ if (bits_in_middle) {
+ size += bits_size;
+ br.min_address = base + ZONE_GUARD_SIZE + hstart;
+ br.max_address = br.min_address + bits_size;
+ }
+ zone_init_allocate_va(base, size, ZIA_PERMANENT | ZIA_REPLACE);
- zone_ptr = ZONE_NULL;
- zone_index_foreach(i) {
- zone_t z = &zone_array[i];
+ base += size;
+ size = mr.min_address + hend - base;
+ kmem_free(kernel_map, base, size);
- /*
- * Append kalloc heap name to zone name (if zone is used by kalloc)
- */
- char temp_zone_name[MAX_ZONE_NAME] = "";
- snprintf(temp_zone_name, MAX_ZONE_NAME, "%s%s",
- zone_heap_name(z), z->z_name);
+ base = mr.min_address + hend;
+ size = mr.max_address - base;
+ zone_init_allocate_va(base, size, ZIA_PERMANENT | ZIA_REPLACE);
- /* Find the requested zone by name */
- if (track_this_zone(temp_zone_name, name.mzn_name)) {
- zone_ptr = z;
- break;
+ mr.min_address += ZONE_GUARD_SIZE;
+ mr.max_address -= ZONE_GUARD_SIZE;
+ if (!bits_in_middle) {
+ br.max_address = mr.max_address;
+ mr.max_address -= bits_size;
+ br.min_address = mr.max_address;
}
+
+#if DEBUG || DEVELOPMENT
+ printf("zone_init: metadata0 %p:%p (%zuK)\n",
+ (void *)mr.min_address, (void *)(mr.min_address + hstart),
+ (size_t)hstart >> 10);
+ printf("zone_init: metadata1 %p:%p (%zuK)\n",
+ (void *)(mr.min_address + hend), (void *)mr.max_address,
+ (size_t)(zone_range_size(&mr) - hend) >> 10);
+ printf("zone_init: metabits %p:%p (%zuK)\n",
+ (void *)br.min_address, (void *)br.max_address,
+ (size_t)zone_range_size(&br) >> 10);
+#endif /* DEBUG || DEVELOPMENT */
}
- /* No zones found with the requested zone name */
- if (zone_ptr == ZONE_NULL) {
- return KERN_INVALID_ARGUMENT;
+ br.min_address = (br.min_address + ZBA_CHUNK_SIZE - 1) & -ZBA_CHUNK_SIZE;
+ br.max_address = br.max_address & -ZBA_CHUNK_SIZE;
+
+ zone_info.zi_meta_range = mr;
+ zone_info.zi_bits_range = br;
+
+ /*
+ * Migrate the original static metadata into its new location.
+ */
+ zone_info.zi_meta_base = (struct zone_page_metadata *)mr.min_address -
+ zone_pva_from_addr(r0.min_address).packed_address;
+ foreign_base = zone_info.zi_map_range[ZONE_ADDR_FOREIGN].min_address;
+ zone_meta_populate(foreign_base, zone_foreign_size());
+ memcpy(zone_meta_from_addr(foreign_base),
+ zone_foreign_meta_array_startup,
+ atop(zone_foreign_size()) * sizeof(struct zone_page_metadata));
+
+ zba_populate(0);
+ memcpy(zba_base_header(), zba_chunk_startup,
+ sizeof(zba_chunk_startup));
+}
+
+/* Global initialization of Zone Allocator.
+ * Runs after zone_bootstrap.
+ */
+__startup_func
+static void
+zone_init(void)
+{
+ vm_size_t zone_map_size;
+ vm_size_t remaining_size;
+ vm_offset_t submap_min = 0;
+ uint64_t denom = 0;
+ uint64_t submap_ratios[Z_SUBMAP_IDX_COUNT] = {
+#ifdef __LP64__
+ [Z_SUBMAP_IDX_VA_RESTRICTED] = 20,
+#else
+ [Z_SUBMAP_IDX_VA_RESERVE] = 10,
+#endif /* defined(__LP64__) */
+ [Z_SUBMAP_IDX_GENERAL] = 40,
+ [Z_SUBMAP_IDX_BAG_OF_BYTES] = 40,
+ };
+
+ if (ZSECURITY_OPTIONS_SUBMAP_USER_DATA & zsecurity_options) {
+ zone_last_submap_idx = Z_SUBMAP_IDX_BAG_OF_BYTES;
+ } else {
+ zone_last_submap_idx = Z_SUBMAP_IDX_GENERAL;
}
+ zone_phys_mapped_max_pages = (uint32_t)atop(zone_phys_size_max());
- /* Logging not turned on for the requested zone */
- if (!DO_LOGGING(zone_ptr)) {
- return KERN_FAILURE;
+ for (unsigned idx = 0; idx <= zone_last_submap_idx; idx++) {
+#if DEBUG || DEVELOPMENT
+ char submap_name[1 + sizeof("submap")];
+ snprintf(submap_name, sizeof(submap_name), "submap%d", idx);
+ PE_parse_boot_argn(submap_name, &submap_ratios[idx], sizeof(uint64_t));
+#endif
+ denom += submap_ratios[idx];
}
- /* Allocate memory for btlog records */
- numrecs = (unsigned int)(get_btlog_records_count(zone_ptr->zlog_btlog));
- recs_size = round_page(numrecs * sizeof *recs);
+#if __LP64__
+ zone_map_size = ZONE_MAP_VIRTUAL_SIZE_LP64;
+#else
+ zone_map_size = ptoa(zone_phys_mapped_max_pages *
+ (denom + submap_ratios[Z_SUBMAP_IDX_VA_RESERVE]) / denom);
+#endif
- kr = kmem_alloc_pageable(ipc_kernel_map, &recs_addr, recs_size, VM_KERN_MEMORY_IPC);
- if (kr != KERN_SUCCESS) {
- return kr;
- }
+ remaining_size = zone_map_size -
+ ZONE_GUARD_SIZE * (zone_last_submap_idx + 1);
/*
- * We will call get_btlog_records() below which populates this region while holding a spinlock
- * (the btlog lock). So these pages need to be wired.
+ * And now allocate the various pieces of VA and submaps.
+ *
+ * Make a first allocation of contiguous VA, that we'll deallocate,
+ * and we'll carve-out memory in that range again linearly.
+ * The kernel is stil single threaded at this stage.
*/
- kr = vm_map_wire_kernel(ipc_kernel_map, recs_addr, recs_addr + recs_size,
- VM_PROT_READ | VM_PROT_WRITE, VM_KERN_MEMORY_IPC, FALSE);
- assert(kr == KERN_SUCCESS);
-
- recs = (zone_btrecord_t *)recs_addr;
- get_btlog_records(zone_ptr->zlog_btlog, recs, &numrecs);
-
- kr = vm_map_unwire(ipc_kernel_map, recs_addr, recs_addr + recs_size, FALSE);
- assert(kr == KERN_SUCCESS);
- *recsp = (zone_btrecord_t *) create_vm_map_copy(recs_addr, recs_size, numrecs * sizeof *recs);
- *recsCntp = numrecs;
+ struct zone_map_range *map_range =
+ &zone_info.zi_map_range[ZONE_ADDR_NATIVE];
- return KERN_SUCCESS;
+ *map_range = zone_init_allocate_va(0, zone_map_size, ZIA_NONE);
+ submap_min = map_range->min_address;
-#else /* DEBUG || DEVELOPMENT */
-#pragma unused(host, name, recsp, recsCntp)
- return KERN_FAILURE;
-#endif /* DEBUG || DEVELOPMENT */
-}
+ /*
+ * Allocate the submaps
+ */
+ for (unsigned idx = 0; idx <= zone_last_submap_idx; idx++) {
+ zone_submap_init(&submap_min, idx, submap_ratios[idx],
+ &denom, &remaining_size, ZONE_GUARD_SIZE);
+ }
+ assert(submap_min == map_range->max_address);
-#if DEBUG || DEVELOPMENT
+ zone_metadata_init();
-kern_return_t
-mach_memory_info_check(void)
-{
- mach_memory_info_t * memory_info;
- mach_memory_info_t * info;
- unsigned int num_info;
- vm_offset_t memory_info_addr;
- kern_return_t kr;
- size_t memory_info_size, memory_info_vmsize;
- uint64_t top_wired, zonestotal, total;
+#if VM_MAX_TAG_ZONES
+ if (zone_tagging_on) {
+ zone_tagging_init(zone_map_size);
+ }
+#endif
+#if CONFIG_GZALLOC
+ gzalloc_init(zone_map_size);
+#endif
- num_info = vm_page_diagnose_estimate();
- memory_info_size = num_info * sizeof(*memory_info);
- memory_info_vmsize = round_page(memory_info_size);
- kr = kmem_alloc(kernel_map, &memory_info_addr, memory_info_vmsize, VM_KERN_MEMORY_DIAG);
- assert(kr == KERN_SUCCESS);
+ zone_create_flags_t kma_flags = ZC_NOCACHING |
+ ZC_NOGC | ZC_NOENCRYPT | ZC_NOGZALLOC | ZC_NOCALLOUT |
+ ZC_KASAN_NOQUARANTINE | ZC_KASAN_NOREDZONE;
- memory_info = (mach_memory_info_t *) memory_info_addr;
- vm_page_diagnose(memory_info, num_info, 0);
+ (void)zone_create_ext("vm.permanent", 1, kma_flags,
+ ZONE_ID_PERMANENT, ^(zone_t z){
+ z->z_permanent = true;
+ z->z_elem_size = 1;
+#if defined(__LP64__)
+ z->z_submap_idx = Z_SUBMAP_IDX_VA_RESTRICTED;
+#endif
+ });
+ (void)zone_create_ext("vm.permanent.percpu", 1, kma_flags | ZC_PERCPU,
+ ZONE_ID_PERCPU_PERMANENT, ^(zone_t z){
+ z->z_permanent = true;
+ z->z_elem_size = 1;
+#if defined(__LP64__)
+ z->z_submap_idx = Z_SUBMAP_IDX_VA_RESTRICTED;
+#endif
+ });
- top_wired = total = zonestotal = 0;
+ /*
+ * Now migrate the startup statistics into their final storage.
+ */
+ int cpu = cpu_number();
zone_index_foreach(idx) {
- zonestotal += zone_size_wired(&zone_array[idx]);
- }
+ zone_t tz = &zone_array[idx];
- for (uint32_t idx = 0; idx < num_info; idx++) {
- info = &memory_info[idx];
- if (!info->size) {
- continue;
- }
- if (VM_KERN_COUNT_WIRED == info->site) {
- top_wired = info->size;
- }
- if (VM_KERN_SITE_HIDE & info->flags) {
- continue;
- }
- if (!(VM_KERN_SITE_WIRED & info->flags)) {
- continue;
+ if (tz->z_stats == __zpcpu_mangle_for_boot(&zone_stats_startup[idx])) {
+ zone_stats_t zs = zalloc_percpu_permanent_type(struct zone_stats);
+
+ *zpercpu_get_cpu(zs, cpu) = *zpercpu_get_cpu(tz->z_stats, cpu);
+ tz->z_stats = zs;
+#if ZONE_ENABLE_LOGGING
+ if (tz->zone_logging && !tz->zlog_btlog) {
+ zone_enable_logging(tz);
+ }
+#endif /* ZONE_ENABLE_LOGGING */
}
- total += info->size;
}
- total += zonestotal;
- printf("vm_page_diagnose_check %qd of %qd, zones %qd, short 0x%qx\n",
- total, top_wired, zonestotal, top_wired - total);
-
- kmem_free(kernel_map, memory_info_addr, memory_info_vmsize);
+#if CONFIG_ZLEAKS
+ /*
+ * Initialize the zone leak monitor
+ */
+ zleak_init(zone_map_size);
+#endif /* CONFIG_ZLEAKS */
- return kr;
+#if VM_MAX_TAG_ZONES
+ if (zone_tagging_on) {
+ vm_allocation_zones_init();
+ }
+#endif
}
+STARTUP(ZALLOC, STARTUP_RANK_FIRST, zone_init);
-extern boolean_t(*volatile consider_buffer_cache_collect)(int);
+__startup_func
+static void
+zone_cache_bootstrap(void)
+{
+ zone_t magzone;
-#endif /* DEBUG || DEVELOPMENT */
+ magzone = zone_create("zcc_magazine_zone", sizeof(struct zone_magazine) +
+ zc_mag_size() * sizeof(zone_element_t),
+ ZC_NOGZALLOC | ZC_KASAN_NOREDZONE | ZC_KASAN_NOQUARANTINE |
+ ZC_SEQUESTER | ZC_CACHING | ZC_ZFREE_CLEARMEM);
+ magzone->z_elems_rsv = (uint16_t)(2 * zpercpu_count());
-kern_return_t
-mach_zone_force_gc(
- host_t host)
-{
- if (host == HOST_NULL) {
- return KERN_INVALID_HOST;
- }
+ os_atomic_store(&zc_magazine_zone, magzone, compiler_acq_rel);
-#if DEBUG || DEVELOPMENT
- /* Callout to buffer cache GC to drop elements in the apfs zones */
- if (consider_buffer_cache_collect != NULL) {
- (void)(*consider_buffer_cache_collect)(0);
+ /*
+ * Now that we are initialized, we can enable zone caching for zones that
+ * were made before zcache_bootstrap() was called.
+ *
+ * The system is still single threaded so we don't need to take the lock.
+ */
+ zone_index_foreach(i) {
+ zone_t z = &zone_array[i];
+ if (z->z_pcpu_cache) {
+ z->z_pcpu_cache = NULL;
+ zone_enable_caching(z);
+ }
}
- consider_zone_gc(FALSE);
-#endif /* DEBUG || DEVELOPMENT */
- return KERN_SUCCESS;
}
+STARTUP(ZALLOC, STARTUP_RANK_FOURTH, zone_cache_bootstrap);
-zone_t
-zone_find_largest(void)
+void
+zalloc_first_proc_made(void)
{
- uint32_t largest_idx = 0;
- vm_offset_t largest_size = zone_size_wired(&zone_array[0]);
+ zone_caching_disabled = 0;
+}
- zone_index_foreach(i) {
- vm_offset_t size = zone_size_wired(&zone_array[i]);
- if (size > largest_size) {
- largest_idx = i;
- largest_size = size;
- }
+__startup_func
+vm_offset_t
+zone_foreign_mem_init(vm_size_t size)
+{
+ vm_offset_t mem;
+
+ if (atop(size) > ZONE_FOREIGN_META_INLINE_COUNT) {
+ panic("ZONE_FOREIGN_META_INLINE_COUNT has become too small: "
+ "%d > %d", (int)atop(size), ZONE_FOREIGN_META_INLINE_COUNT);
}
- return &zone_array[largest_idx];
+ mem = (vm_offset_t)pmap_steal_memory(size);
+
+ zone_info.zi_meta_base = zone_foreign_meta_array_startup -
+ zone_pva_from_addr(mem).packed_address;
+ zone_info.zi_map_range[ZONE_ADDR_FOREIGN].min_address = mem;
+ zone_info.zi_map_range[ZONE_ADDR_FOREIGN].max_address = mem + size;
+
+ zone_info.zi_bits_range = (struct zone_map_range){
+ .min_address = (vm_offset_t)zba_chunk_startup,
+ .max_address = (vm_offset_t)zba_chunk_startup +
+ sizeof(zba_chunk_startup),
+ };
+ zba_init_chunk(0);
+
+ return mem;
}
+#endif /* !ZALLOC_TEST */
#pragma mark - tests
#if DEBUG || DEVELOPMENT
* a second zinit() comes through before zdestroy()), which could lead us to
* run out of zones.
*/
-SIMPLE_LOCK_DECLARE(zone_test_lock, 0);
+static SIMPLE_LOCK_DECLARE(zone_test_lock, 0);
static boolean_t zone_test_running = FALSE;
static zone_t test_zone_ptr = NULL;
static uintptr_t *
-zone_copy_allocations(zone_t z, uintptr_t *elems, bitmap_t *bits,
- zone_pva_t page_index, zone_addr_kind_t kind)
+zone_copy_allocations(zone_t z, uintptr_t *elems, zone_pva_t page_index)
{
- vm_offset_t free, first, end, page;
+ vm_offset_t elem_size = zone_elem_size(z);
+ vm_offset_t base;
struct zone_page_metadata *meta;
while (!zone_pva_is_null(page_index)) {
- page = zone_pva_to_addr(page_index);
- meta = zone_pva_to_meta(page_index, kind);
- end = page + ptoa(meta->zm_percpu ? 1 : meta->zm_page_count);
- first = page + ZONE_PAGE_FIRST_OFFSET(kind);
+ base = zone_pva_to_addr(page_index);
+ meta = zone_pva_to_meta(page_index);
- bitmap_clear(bits, (uint32_t)((end - first) / zone_elem_size(z)));
+ if (meta->zm_inline_bitmap) {
+ for (size_t i = 0; i < meta->zm_chunk_len; i++) {
+ uint32_t map = meta[i].zm_bitmap;
- // construct bitmap of all freed elements
- free = zone_page_meta_get_freelist(z, meta, page);
- while (free) {
- bitmap_set(bits, (uint32_t)((free - first) / zone_elem_size(z)));
-
- // next free element
- free = *(vm_offset_t *)free ^ zp_nopoison_cookie;
- }
-
- for (unsigned i = 0; first < end; i++, first += zone_elem_size(z)) {
- if (!bitmap_test(bits, i)) {
- *elems++ = INSTANCE_PUT(first);
+ for (; map; map &= map - 1) {
+ *elems++ = INSTANCE_PUT(base +
+ elem_size * __builtin_clz(map));
+ }
+ base += elem_size * 32;
+ }
+ } else {
+ uint32_t order = zba_bits_ref_order(meta->zm_bitmap);
+ bitmap_t *bits = zba_bits_ref_ptr(meta->zm_bitmap);
+ for (size_t i = 0; i < (1u << order); i++) {
+ uint64_t map = bits[i];
+
+ for (; map; map &= map - 1) {
+ *elems++ = INSTANCE_PUT(base +
+ elem_size * __builtin_clzll(map));
+ }
+ base += elem_size * 64;
}
}
uint32_t idx, count, found;
uint32_t btidx, btcount, nobtcount, btfound;
uint32_t elemSize;
- uint64_t maxElems;
+ size_t maxElems;
kern_return_t kr;
- bitmap_t *bits;
- zone_index_foreach(i) {
- if (!strncmp(zoneName, zone_array[i].z_name, nameLen)) {
- zone = &zone_array[i];
+ zone_foreach(z) {
+ if (!strncmp(zoneName, z->z_name, nameLen)) {
+ zone = z;
break;
}
}
return KERN_INVALID_NAME;
}
- elemSize = zone_elem_size(zone);
- maxElems = (zone->countavail + 1) & ~1ul;
+ elemSize = (uint32_t)zone_elem_size(zone);
+ maxElems = (zone->z_elems_avail + 1) & ~1ul;
- if ((ptoa(zone->percpu ? 1 : zone->alloc_pages) % elemSize) &&
+ if ((ptoa(zone->z_percpu ? 1 : zone->z_chunk_pages) % elemSize) &&
!zone_leaks_scan_enable) {
return KERN_INVALID_CAPABILITY;
}
kr = kmem_alloc_kobject(kernel_map, (vm_offset_t *) &array,
- maxElems * sizeof(uintptr_t) + BITMAP_LEN(ZONE_CHUNK_MAXELEMENTS),
- VM_KERN_MEMORY_DIAG);
+ maxElems * sizeof(uintptr_t), VM_KERN_MEMORY_DIAG);
if (KERN_SUCCESS != kr) {
return kr;
}
- /* maxElems is a 2-multiple so we're always aligned */
- bits = CAST_DOWN_EXPLICIT(bitmap_t *, array + maxElems);
-
- lock_zone(zone);
+ zone_lock(zone);
next = array;
- next = zone_copy_allocations(zone, next, bits,
- zone->pages_any_free_foreign, ZONE_ADDR_FOREIGN);
- next = zone_copy_allocations(zone, next, bits,
- zone->pages_all_used_foreign, ZONE_ADDR_FOREIGN);
- next = zone_copy_allocations(zone, next, bits,
- zone->pages_intermediate, ZONE_ADDR_NATIVE);
- next = zone_copy_allocations(zone, next, bits,
- zone->pages_all_used, ZONE_ADDR_NATIVE);
+ next = zone_copy_allocations(zone, next, zone->z_pageq_partial);
+ next = zone_copy_allocations(zone, next, zone->z_pageq_full);
count = (uint32_t)(next - array);
- unlock_zone(zone);
+ zone_unlock(zone);
- zone_leaks_scan(array, count, zone_elem_size(zone), &found);
+ zone_leaks_scan(array, count, (uint32_t)zone_elem_size(zone), &found);
assert(found <= count);
for (idx = 0; idx < count; idx++) {
unsigned int i = 0, max_iter = 5;
void * test_ptr;
zone_t test_zone;
+ zone_t test_pcpu_zone;
+ kern_return_t kr;
simple_lock(&zone_test_lock, &zone_locks_grp);
if (!zone_test_running) {
}
#if KASAN_ZALLOC
- if (test_zone_ptr == NULL && test_zone->countfree != 0) {
+ if (test_zone_ptr == NULL && test_zone->z_elems_free != 0) {
#else
- if (test_zone->countfree != 0) {
+ if (test_zone->z_elems_free != 0) {
#endif
printf("run_zone_test: free count is not zero\n");
return FALSE;
int idx, num_allocs = 8;
vm_size_t elem_size = 2 * PAGE_SIZE / num_allocs;
void *allocs[num_allocs];
- vm_offset_t phys_pages = os_atomic_load(&zones_phys_page_count, relaxed);
- vm_size_t zone_map_size = zone_range_size(&zone_info.zi_map_range);
+ void **allocs_pcpu;
+ vm_offset_t phys_pages = os_atomic_load(&zones_phys_page_mapped_count, relaxed);
test_zone = zone_create("test_zone_sysctl", elem_size,
ZC_DESTRUCTIBLE | ZC_SEQUESTER);
- if (test_zone == NULL) {
- printf("run_zone_test: zinit() failed\n");
- return FALSE;
- }
+ assert(test_zone);
+
+ test_pcpu_zone = zone_create("test_zone_sysctl.pcpu", sizeof(uint64_t),
+ ZC_DESTRUCTIBLE | ZC_SEQUESTER | ZC_PERCPU);
+ assert(test_pcpu_zone);
for (idx = 0; idx < num_allocs; idx++) {
allocs[idx] = zalloc(test_zone);
for (idx = 0; idx < num_allocs; idx++) {
zfree(test_zone, allocs[idx]);
}
- assert(!zone_pva_is_null(test_zone->pages_all_free));
+ assert(!zone_pva_is_null(test_zone->z_pageq_empty));
+
+ kr = kernel_memory_allocate(kernel_map,
+ (vm_address_t *)&allocs_pcpu, PAGE_SIZE,
+ 0, KMA_ZERO | KMA_KOBJECT, VM_KERN_MEMORY_DIAG);
+ assert(kr == KERN_SUCCESS);
+
+ for (idx = 0; idx < PAGE_SIZE / sizeof(uint64_t); idx++) {
+ allocs_pcpu[idx] = zalloc_percpu(test_pcpu_zone,
+ Z_WAITOK | Z_ZERO);
+ assert(NULL != allocs_pcpu[idx]);
+ }
+ for (idx = 0; idx < PAGE_SIZE / sizeof(uint64_t); idx++) {
+ zfree_percpu(test_pcpu_zone, allocs_pcpu[idx]);
+ }
+ assert(!zone_pva_is_null(test_pcpu_zone->z_pageq_empty));
- printf("vm_page_wire_count %d, vm_page_free_count %d, p to v %qd%%\n",
+ printf("vm_page_wire_count %d, vm_page_free_count %d, p to v %ld%%\n",
vm_page_wire_count, vm_page_free_count,
- (100ULL * ptoa_64(phys_pages)) / zone_map_size);
- zone_gc(FALSE);
- printf("vm_page_wire_count %d, vm_page_free_count %d, p to v %qd%%\n",
+ 100L * phys_pages / zone_phys_mapped_max_pages);
+ zone_gc(ZONE_GC_DRAIN);
+ printf("vm_page_wire_count %d, vm_page_free_count %d, p to v %ld%%\n",
vm_page_wire_count, vm_page_free_count,
- (100ULL * ptoa_64(phys_pages)) / zone_map_size);
+ 100L * phys_pages / zone_phys_mapped_max_pages);
+
unsigned int allva = 0;
- zone_index_foreach(zidx) {
- zone_t z = &zone_array[zidx];
- lock_zone(z);
- allva += z->page_count;
- if (!z->sequester_page_count) {
- unlock_zone(z);
+
+ zone_foreach(z) {
+ zone_lock(z);
+ allva += z->z_wired_cur;
+ if (zone_pva_is_null(z->z_pageq_va)) {
+ zone_unlock(z);
continue;
}
unsigned count = 0;
uint64_t size;
- zone_pva_t pg = z->pages_sequester;
+ zone_pva_t pg = z->z_pageq_va;
struct zone_page_metadata *page_meta;
while (pg.packed_address) {
- page_meta = zone_pva_to_meta(pg, ZONE_ADDR_NATIVE);
- count += z->alloc_pages;
+ page_meta = zone_pva_to_meta(pg);
+ count += z->z_percpu ? 1 : z->z_chunk_pages;
+ if (page_meta->zm_chunk_len == ZM_SECONDARY_PAGE) {
+ count -= page_meta->zm_page_index;
+ }
pg = page_meta->zm_page_next;
}
- assert(count == z->sequester_page_count);
+ assert(z->z_wired_cur + count == z->z_va_cur);
size = zone_size_wired(z);
if (!size) {
size = 1;
}
printf("%s%s: seq %d, res %d, %qd %%\n",
- zone_heap_name(z), z->z_name, z->sequester_page_count,
- z->page_count, zone_size_allocated(z) * 100ULL / size);
- unlock_zone(z);
+ zone_heap_name(z), z->z_name, z->z_va_cur - z->z_wired_cur,
+ z->z_wired_cur, zone_size_allocated(z) * 100ULL / size);
+ zone_unlock(z);
}
printf("total va: %d\n", allva);
- assert(zone_pva_is_null(test_zone->pages_all_free));
- assert(!zone_pva_is_null(test_zone->pages_sequester));
- assert(2 == test_zone->sequester_page_count);
+ assert(zone_pva_is_null(test_zone->z_pageq_empty));
+ assert(zone_pva_is_null(test_zone->z_pageq_partial));
+ assert(!zone_pva_is_null(test_zone->z_pageq_va));
+ assert(zone_pva_is_null(test_pcpu_zone->z_pageq_empty));
+ assert(zone_pva_is_null(test_pcpu_zone->z_pageq_partial));
+ assert(!zone_pva_is_null(test_pcpu_zone->z_pageq_va));
+
for (idx = 0; idx < num_allocs; idx++) {
assert(0 == pmap_find_phys(kernel_pmap, (addr64_t)(uintptr_t) allocs[idx]));
}
+
+ /* make sure the zone is still usable after a GC */
+
for (idx = 0; idx < num_allocs; idx++) {
allocs[idx] = zalloc(test_zone);
assert(allocs[idx]);
printf("alloc[%d] %p\n", idx, allocs[idx]);
}
- assert(zone_pva_is_null(test_zone->pages_sequester));
- assert(0 == test_zone->sequester_page_count);
+ assert(zone_pva_is_null(test_zone->z_pageq_va));
+ assert(test_zone->z_wired_cur == test_zone->z_va_cur);
for (idx = 0; idx < num_allocs; idx++) {
zfree(test_zone, allocs[idx]);
}
+
+ for (idx = 0; idx < PAGE_SIZE / sizeof(uint64_t); idx++) {
+ allocs_pcpu[idx] = zalloc_percpu(test_pcpu_zone,
+ Z_WAITOK | Z_ZERO);
+ assert(NULL != allocs_pcpu[idx]);
+ }
+ for (idx = 0; idx < PAGE_SIZE / sizeof(uint64_t); idx++) {
+ zfree_percpu(test_pcpu_zone, allocs_pcpu[idx]);
+ }
+
+ assert(!zone_pva_is_null(test_pcpu_zone->z_pageq_empty));
+ assert(zone_pva_is_null(test_pcpu_zone->z_pageq_va));
+
+ kmem_free(kernel_map, (vm_address_t)allocs_pcpu, PAGE_SIZE);
+
zdestroy(test_zone);
+ zdestroy(test_pcpu_zone);
} else {
printf("run_zone_test: skipping sequester test (not enabled)\n");
}
void
zone_gc_replenish_test(void)
{
- zone_gc(FALSE);
+ zone_gc(ZONE_GC_DRAIN);
}
*/
zone_index_foreach(i) {
z = &zone_array[i];
- if (z->prio_refill_count &&
- zone_elem_size(z) >= sizeof(struct data)) {
+ if (z->z_replenishes && zone_elem_size(z) >= sizeof(struct data)) {
z = &zone_array[i];
break;
}
#define zalloc_permanent_type(type_t) \
((type_t *)zalloc_permanent(sizeof(type_t), ZALIGN(type_t)))
+/*!
+ * @function zalloc_first_proc_made()
+ *
+ * @abstract
+ * Declare that the "early" allocation phase is done.
+ */
+extern void
+zalloc_first_proc_made(void);
+
#pragma mark XNU only: per-cpu allocations
/*!
ZONE_ID_PROC,
ZONE_ID_VM_MAP_COPY,
ZONE_ID_PMAP,
+ ZONE_ID_VM_MAP,
ZONE_ID__FIRST_DYNAMIC,
});
* @param zone the specified zone
* @returns the zone (sub)map this zone allocates from.
*/
+__pure2
extern vm_map_t zone_submap(
zone_t zone);
* - isn't sensitive to @c zone_t::elem_size being compromised,
* - is slightly faster as it saves one load and a multiplication.
*
+ * @warning: zones using foreign memory can't use this interface.
+ *
* @param zone_id the zone ID the address needs to belong to.
* @param elem_size the size of elements for this zone.
* @param addr the element address to check.
vm_size_t elem_size,
void *addr);
+/*!
+ * @function zone_id_require_allow_foreign
+ *
+ * @abstract
+ * Requires for a given pointer to belong to the specified zone, by ID and size.
+ *
+ * @discussion
+ * This is a version of @c zone_id_require() that works with zones allowing
+ * foreign memory.
+ */
+extern void zone_id_require_allow_foreign(
+ zone_id_t zone_id,
+ vm_size_t elem_size,
+ void *addr);
+
/*
* Zone submap indices
*
- * Z_SUBMAP_IDX_VA_RESTRICTED_MAP (LP64)
+ * Z_SUBMAP_IDX_VA_RESTRICTED (LP64)
* used to restrict VM allocations lower in the kernel VA space,
* for pointer packing
*
- * Z_SUBMAP_IDX_GENERAL_MAP
+ * Z_SUBMAP_IDX_VA_RESERVE (ILP32)
+ * used to keep a reserve of VA space for the urgent allocations
+ * backing allocations of crucial VM types (fictious pages, holes, ...)
+ *
+ * Z_SUBMAP_IDX_GENERAL
* used for unrestricted allocations
*
- * Z_SUBMAP_IDX_BAG_OF_BYTES_MAP
+ * Z_SUBMAP_IDX_BAG_OF_BYTES
* used to sequester bags of bytes from all other allocations and allow VA reuse
* within the map
*/
-#if !defined(__LP64__)
-#define Z_SUBMAP_IDX_GENERAL_MAP 0
-#define Z_SUBMAP_IDX_BAG_OF_BYTES_MAP 1
-#define Z_SUBMAP_IDX_COUNT 2
+#if defined(__LP64__)
+#define Z_SUBMAP_IDX_VA_RESTRICTED 0
#else
-#define Z_SUBMAP_IDX_VA_RESTRICTED_MAP 0
-#define Z_SUBMAP_IDX_GENERAL_MAP 1
-#define Z_SUBMAP_IDX_BAG_OF_BYTES_MAP 2
-#define Z_SUBMAP_IDX_COUNT 3
+#define Z_SUBMAP_IDX_VA_RESERVE 0
#endif
+#define Z_SUBMAP_IDX_GENERAL 1
+#define Z_SUBMAP_IDX_BAG_OF_BYTES 2
+#define Z_SUBMAP_IDX_COUNT 3
/* Change zone sub-map, to be called from the zone_create_ext() setup hook */
extern void zone_set_submap_idx(
/* Make zone as non expandable, to be called from the zone_create_ext() setup hook */
extern void zone_set_noexpand(
zone_t zone,
- vm_size_t maxsize);
+ vm_size_t max_elements);
/* Make zone exhaustible, to be called from the zone_create_ext() setup hook */
extern void zone_set_exhaustible(
zone_t zone,
- vm_size_t maxsize);
+ vm_size_t max_elements);
-/* Initially fill zone with specified number of elements */
-extern int zfill(
- zone_t zone,
- int nelem);
-
-/* Fill zone with memory */
-extern void zcram(
+/*!
+ * @function zone_fill_initially
+ *
+ * @brief
+ * Initially fill a non collectable zone to have the specified amount of
+ * elements.
+ *
+ * @discussion
+ * This function must be called on a non collectable permanent zone before it
+ * has been used yet.
+ *
+ * @param zone The zone to fill.
+ * @param nelems The number of elements to be able to hold.
+ */
+extern void zone_fill_initially(
zone_t zone,
- vm_offset_t newmem,
- vm_size_t size);
+ vm_size_t nelems);
#pragma mark XNU only: misc & implementation details
#define __zpcpu_cast(ptr, e) ((typeof(ptr))(e))
#define __zpcpu_next(ptr) __zpcpu_cast(ptr, __zpcpu_addr(ptr) + PAGE_SIZE)
+/**
+ * @macro __zpcpu_mangle_for_boot()
+ *
+ * @discussion
+ * Per-cpu variables allocated in zones (as opposed to percpu globals) that need
+ * to function early during boot (before @c STARTUP_SUB_ZALLOC) might use static
+ * storage marked @c __startup_data and replace it with the proper allocation
+ * at the end of the @c STARTUP_SUB_ZALLOC phase (@c STARTUP_RANK_LAST).
+ *
+ * However, some devices boot from a cpu where @c cpu_number() != 0. This macro
+ * provides the proper mangling of the storage into a "fake" percpu pointer so
+ * that accesses through @c zpercpu_get() functions properly.
+ *
+ * This is invalid to use after the @c STARTUP_SUB_ZALLOC phase has completed.
+ */
+#define __zpcpu_mangle_for_boot(ptr) ({ \
+ assert(startup_phase < STARTUP_SUB_ZALLOC); \
+ __zpcpu_cast(ptr, __zpcpu_mangle(__zpcpu_addr(ptr) - ptoa(cpu_number()))); \
+})
+
extern unsigned zpercpu_count(void) __pure2;
#include <kern/locks.h>
#include <kern/btlog.h>
#include <kern/simple_lock.h>
-#include <kern/zcache_internal.h>
#include <os/atomic_private.h>
+#include <sys/queue.h>
#if KASAN
-#include <sys/queue.h>
#include <san/kasan.h>
-/*
- * Set to 0 to debug poisoning and ZC_ZFREE_CLEARMEM validation under kasan.
- * Otherwise they are double-duty with what kasan already does.
- */
-#define ZALLOC_ENABLE_POISONING 0
-#else /* !KASAN */
-#define ZALLOC_ENABLE_POISONING 1
+#include <kern/spl.h>
#endif /* !KASAN */
-#if DEBUG || DEVELOPMENT
-#define ZALLOC_DETAILED_STATS 1
-#else
-#define ZALLOC_DETAILED_STATS 0
-#endif
-
/*!
* @file <kern/zalloc_internal.h>
*
struct zone_stats {
uint64_t zs_mem_allocated;
uint64_t zs_mem_freed;
-#if ZALLOC_DETAILED_STATS
- uint64_t zs_mem_wasted;
-#endif /* ZALLOC_DETAILED_STATS */
+ uint32_t zs_poison_seqno; /* counter for poisoning every N frees */
+ uint32_t zs_alloc_rr; /* allocation rr bias */
};
+STAILQ_HEAD(zone_depot, zone_magazine);
+
struct zone {
/*
* Readonly / rarely written fields
zone_stats_t z_stats;
const char *z_name;
struct zone_view *z_views;
-#ifdef CONFIG_ZCACHE
- struct zone_cache zcache;
-#endif /* CONFIG_ZCACHE */
- uint16_t alloc_pages; /* size used for more memory in pages */
- uint16_t z_elem_size; /* size of an element */
- uint16_t pcpu_elem_size;
- uint16_t prio_refill_count; /* if !=0 , refill to this count */
- uint32_t page_count_max; /* how large can this zone grow */
+ struct thread *z_expander;
+ struct zone_cache *__zpercpu z_pcpu_cache;
- uint32_t page_count_hwm; /* page_count high watermark */
- uint32_t page_count; /* number of pages used by this zone */
- uint32_t countavail; /* Number of elements available */
+ uint16_t z_chunk_pages; /* size used for more memory in pages */
+ uint16_t z_chunk_elems; /* count of allocations per chunk */
+ uint16_t z_elems_rsv; /* maintain a free reserve of elements */
+ uint16_t z_elem_size; /* size of an element */
uint64_t
/*
* Lifecycle state (Mutable after creation)
*/
- destroyed :1, /* zone is (being) destroyed */
- expanding_no_vm_priv:1, /* zone expanding via a non-vm_privileged thread */
- expanding_vm_priv :1, /* zone expanding via a vm_privileged thread */
- async_pending :1, /* asynchronous allocation pending? */
- waiting :1, /* is thread waiting for expansion? */
- zone_replenishing :1,
+ z_destroyed :1, /* zone is (being) destroyed */
+ z_async_refilling :1, /* asynchronous allocation pending? */
+ z_replenish_wait :1, /* someone is waiting on the replenish thread */
+ z_expanding_wait :1, /* is thread waiting for expansion? */
+ z_expander_vm_priv :1, /* a vm privileged thread is expanding */
/*
* Security sensitive configuration bits
*/
- allows_foreign :1, /* allow non-zalloc space */
- destructible :1, /* zone can be zdestroy()ed */
+ z_allows_foreign :1, /* allow non-zalloc space */
+ z_destructible :1, /* zone can be zdestroy()ed */
kalloc_heap :2, /* zone_kheap_id_t when part of a kalloc heap */
- noencrypt :1, /* do not encrypt pages when hibernating */
- submap_idx :2, /* a Z_SUBMAP_IDX_* value */
- va_sequester :1, /* page sequester: no VA reuse with other zones */
- zfree_clear_mem :1, /* clear memory of elements on free and assert on alloc */
+ z_noencrypt :1, /* do not encrypt pages when hibernating */
+ z_submap_idx :2, /* a Z_SUBMAP_IDX_* value */
+ z_va_sequester :1, /* page sequester: no VA reuse with other zones */
+ z_free_zeroes :1, /* clear memory of elements on free and assert on alloc */
/*
* Behavior configuration bits
*/
+ z_percpu :1, /* the zone is percpu */
+ z_permanent :1, /* the zone allocations are permanent */
+ z_replenishes :1, /* uses the async replenish mechanism for VM */
+ z_nocaching :1, /* disallow zone caching for this zone */
collectable :1, /* garbage collect empty pages */
- cpu_cache_enabled :1,
- permanent :1, /* the zone allocations are permanent */
exhaustible :1, /* merely return if empty? */
expandable :1, /* expand zone (with message)? */
no_callout :1,
- percpu :1, /* the zone is percpu */
_reserved :26,
* often mutated fields
*/
- decl_simple_lock_data(, lock);
+ lck_spin_t z_lock;
+ struct zone_depot z_recirc;
+
+ /*
+ * Page accounting (wired / VA)
+ *
+ * Those numbers are unscaled for z_percpu zones
+ * (zone_scale_for_percpu() needs to be used to find the true value).
+ */
+ uint32_t z_wired_max; /* how large can this zone grow */
+ uint32_t z_wired_hwm; /* z_wired_cur high watermark */
+ uint32_t z_wired_cur; /* number of pages used by this zone */
+ uint32_t z_wired_empty; /* pages collectable by GC */
+ uint32_t z_va_cur; /* amount of VA used by this zone */
/*
* list of metadata structs, which maintain per-page free element lists
* Note: Due to the index packing in page metadata,
* these pointers can't be at the beginning of the zone struct.
*/
- zone_pva_t pages_any_free_foreign; /* foreign pages crammed into zone */
- zone_pva_t pages_all_used_foreign;
- zone_pva_t pages_all_free;
- zone_pva_t pages_intermediate;
- zone_pva_t pages_all_used;
- zone_pva_t pages_sequester; /* sequestered pages - allocated VA with no populated pages */
-
- uint32_t zp_count; /* counter for poisoning every N frees */
- uint32_t countfree; /* Number of free elements */
- uint32_t allfree_page_count; /* Number of pages collectable by GC */
- uint32_t sequester_page_count;
+ zone_pva_t z_pageq_empty; /* populated, completely empty pages */
+ zone_pva_t z_pageq_partial;/* populated, partially filled pages */
+ zone_pva_t z_pageq_full; /* populated, completely full pages */
+ zone_pva_t z_pageq_va; /* non-populated VA pages */
+
+ /*
+ * Zone statistics
+ *
+ * z_contention_wma:
+ * weighted moving average of the number of contentions per second,
+ * in Z_CONTENTION_WMA_UNIT units (fixed point decimal).
+ *
+ * z_contention_cur:
+ * count of recorded contentions that will be fused in z_contention_wma
+ * at the next period.
+ *
+ * z_recirc_cur:
+ * number of magazines in the recirculation depot.
+ *
+ * z_elems_free:
+ * number of free elements in the zone.
+ *
+ * z_elems_{min,max}:
+ * tracks the low/high watermark of z_elems_free for the current
+ * weighted moving average period.
+ *
+ * z_elems_free_wss:
+ * weighted moving average of the (z_elems_free_max - z_elems_free_min)
+ * amplited which is used by the GC for trim operations.
+ *
+ * z_elems_avail:
+ * number of elements in the zone (at all).
+ */
+#define Z_CONTENTION_WMA_UNIT (1u << 8)
+ uint32_t z_contention_wma;
+ uint32_t z_contention_cur;
+ uint32_t z_recirc_cur;
+ uint32_t z_elems_free_max;
+ uint32_t z_elems_free_wss;
+ uint32_t z_elems_free_min;
+ uint32_t z_elems_free; /* Number of free elements */
+ uint32_t z_elems_avail; /* Number of elements available */
#if CONFIG_ZLEAKS
uint32_t zleak_capture; /* per-zone counter for capturing every N allocations */
gzalloc_data_t gz;
#endif
#if KASAN_ZALLOC
- vm_size_t kasan_redzone;
+ uint32_t z_kasan_redzone;
+ spl_t z_kasan_spl;
#endif
#if DEBUG || DEVELOPMENT || CONFIG_ZLEAKS
/* zone logging structure to hold stacks and element references to those stacks. */
};
extern zone_security_options_t zsecurity_options;
-extern uint32_t _Atomic num_zones;
+extern zone_id_t _Atomic num_zones;
extern uint32_t zone_view_count;
extern struct zone zone_array[];
-extern lck_grp_t zone_locks_grp;
extern const char * const kalloc_heap_names[KHEAP_ID_COUNT];
+extern bool panic_include_zprint;
+#if CONFIG_ZLEAKS
+extern bool panic_include_ztrace;
+extern struct ztrace *top_ztrace;
+#endif
+extern mach_memory_info_t *panic_kext_memory_info;
+extern vm_size_t panic_kext_memory_size;
+extern unsigned int zone_map_jetsam_limit;
#define zone_index_foreach(i) \
- for (uint32_t i = 1, num_zones_##i = os_atomic_load(&num_zones, acquire); \
+ for (zone_id_t i = 1, num_zones_##i = os_atomic_load(&num_zones, acquire); \
i < num_zones_##i; i++)
+#define zone_foreach(z) \
+ for (zone_t z = &zone_array[1], \
+ last_zone_##z = &zone_array[os_atomic_load(&num_zones, acquire)]; \
+ z < last_zone_##z; z++)
+
+struct zone_map_range {
+ vm_offset_t min_address;
+ vm_offset_t max_address;
+} __attribute__((aligned(2 * sizeof(vm_offset_t))));
+
__pure2
static inline vm_offset_t
zone_elem_size(zone_t zone)
static inline uint32_t
zone_count_allocated(zone_t zone)
{
- return zone->countavail - zone->countfree;
+ return zone->z_elems_avail - zone->z_elems_free;
+}
+
+static inline vm_size_t
+zone_scale_for_percpu(zone_t zone, vm_size_t size)
+{
+ if (zone->z_percpu) {
+ size *= zpercpu_count();
+ }
+ return size;
}
static inline vm_size_t
* this either require the zone lock,
* or to be used for statistics purposes only.
*/
- return ptoa(os_atomic_load(&zone->page_count, relaxed));
+ vm_size_t size = ptoa(os_atomic_load(&zone->z_wired_cur, relaxed));
+ return zone_scale_for_percpu(zone, size);
}
static inline vm_size_t
zone_size_free(zone_t zone)
{
- return (vm_size_t)zone->pcpu_elem_size * zone->countfree;
+ return zone_scale_for_percpu(zone,
+ (vm_size_t)zone->z_elem_size * zone->z_elems_free);
}
static inline vm_size_t
zone_size_allocated(zone_t zone)
{
- return (vm_size_t)zone->pcpu_elem_size * zone_count_allocated(zone);
+ return zone_scale_for_percpu(zone,
+ (vm_size_t)zone->z_elem_size * zone_count_allocated(zone));
}
static inline vm_size_t
zone_size_wasted(zone_t zone)
{
- return zone_size_wired(zone) -
- (vm_size_t)zone->pcpu_elem_size * zone->countavail;
+ return zone_size_wired(zone) - zone_scale_for_percpu(zone,
+ (vm_size_t)zone->z_elem_size * zone->z_elems_avail);
}
/*
*/
extern uint64_t get_zones_collectable_bytes(void);
-/*
- * zone_gc also checks if the zone maps are getting close to full and triggers
- * jetsams if needed, provided consider_jetsams is set to TRUE.
+/*!
+ * @enum zone_gc_level_t
+ *
+ * @const ZONE_GC_TRIM
+ * Request a trimming GC: it will trim allocations in excess
+ * of the working set size estimate only.
+ *
+ * @const ZONE_GC_DRAIN
+ * Request a draining GC: this is an aggressive mode that will
+ * cause all caches to be drained and all free pages returned to the system.
+ *
+ * @const ZONE_GC_JETSAM
+ * Request to consider a jetsam, and then fallback to @c ZONE_GC_TRIM or
+ * @c ZONE_GC_DRAIN depending on the state of the zone map.
+ * To avoid deadlocks, only @c vm_pageout_garbage_collect() should ever
+ * request a @c ZONE_GC_JETSAM level.
+ */
+__enum_closed_decl(zone_gc_level_t, uint32_t, {
+ ZONE_GC_TRIM,
+ ZONE_GC_DRAIN,
+ ZONE_GC_JETSAM,
+});
+
+/*!
+ * @function zone_gc
+ *
+ * @brief
+ * Reduces memory used by zones by trimming caches and freelists.
*
- * To avoid deadlocks, we only pass a value of TRUE from within the
- * vm_pageout_garbage_collect thread.
+ * @discussion
+ * @c zone_gc() is called:
+ * - by the pageout daemon when the system needs more free pages.
+ * - by the VM when contiguous page allocation requests get stuck
+ * (see vm_page_find_contiguous()).
+ *
+ * @param level The zone GC level requested.
+ */
+extern void zone_gc(zone_gc_level_t level);
+
+extern void zone_gc_trim(void);
+extern void zone_gc_drain(void);
+
+#define ZONE_WSS_UPDATE_PERIOD 10
+/*!
+ * @function compute_zone_working_set_size
+ *
+ * @brief
+ * Recomputes the working set size for every zone
+ *
+ * @discussion
+ * This runs about every @c ZONE_WSS_UPDATE_PERIOD seconds (10),
+ * computing an exponential moving average with a weight of 75%,
+ * so that the history of the last minute is the dominating factor.
*/
-extern void zone_gc(boolean_t consider_jetsams);
-extern void consider_zone_gc(boolean_t consider_jetsams);
+extern void compute_zone_working_set_size(void *);
/* Debug logging for zone-map-exhaustion jetsams. */
extern void get_zone_map_size(uint64_t *current_size, uint64_t *capacity);
/* Bootstrap zone module (create zone zone) */
extern void zone_bootstrap(void);
-/*
+/*!
+ * @function zone_foreign_mem_init
+ *
+ * @brief
* Steal memory from pmap (prior to initialization of zalloc)
* for the special vm zones that allow foreign memory and store
- * the range so as to facilitate range checking in zfree/zcram.
+ * the range so as to facilitate range checking in zfree.
*/
__startup_func
-extern vm_offset_t zone_foreign_mem_init(vm_size_t size);
+extern vm_offset_t zone_foreign_mem_init(
+ vm_size_t size);
-/*
- * Returns size (greater than min_pages) that is a multiple
- * of the allocation granule for the zone.
+/*!
+ * @function zone_get_foreign_alloc_size
+ *
+ * @brief
+ * Compute the correct size (greater than @c ptoa(min_pages)) that is a multiple
+ * of the allocation granule for the zone with the given creation flags and
+ * element size.
*/
__startup_func
extern vm_size_t zone_get_foreign_alloc_size(
zone_create_flags_t flags,
uint16_t min_pages);
+/*!
+ * @function zone_cram_foreign
+ *
+ * @brief
+ * Cram memory allocated with @c zone_foreign_mem_init() into a zone.
+ *
+ * @param zone The zone to cram memory into.
+ * @param newmem The base address for the memory to cram.
+ * @param size The size of the memory to cram into the zone.
+ */
+__startup_func
+extern void zone_cram_foreign(
+ zone_t zone,
+ vm_offset_t newmem,
+ vm_size_t size);
+
extern bool zone_maps_owned(
vm_address_t addr,
vm_size_t size);
vm_map_size_t *pfree,
vm_map_size_t *plargest_free);
-extern boolean_t
-is_zone_map_nearing_exhaustion(void);
+extern bool
+zone_map_nearing_exhaustion(void);
#if defined(__LP64__)
#define ZONE_POISON 0xdeadbeefdeadbeef
#define ZONE_POISON 0xdeadbeef
#endif
-/*
- * Used by zalloc_direct_locked() and zcache to mark elements that have been
- * cleared or poisoned and need to be checked.
- */
-#define ZALLOC_ELEMENT_NEEDS_VALIDATION ((vm_offset_t)1)
-
static inline vm_tag_t
zalloc_flags_get_tag(zalloc_flags_t flags)
{
extern void *zalloc_ext(
zone_t zone,
zone_stats_t zstats,
- zalloc_flags_t flags,
- vm_size_t wasted);
+ zalloc_flags_t flags);
extern void zfree_ext(
zone_t zone,
zone_stats_t zstats,
void *addr);
-/* free an element with no regard for gzalloc, zleaks, or kasan*/
-extern void zfree_direct_locked(
- zone_t zone,
- vm_offset_t elem,
- bool poison);
-
-/*
- * attempts to allocate an element with no regard for gzalloc, zleaks, or kasan
- * returns an address possibly tagged with ZALLOC_ELEMENT_NEEDS_VALIDATION.
+/*!
+ * @function zone_replenish_configure
+ *
+ * @brief
+ * Used by zones backing the VM to maintain a reserve of free elements.
+ *
+ * @discussion
+ * This function should not be used by anyone else than the VM.
*/
-extern vm_offset_t zalloc_direct_locked(
- zone_t zone,
- zalloc_flags_t flags,
- vm_size_t waste);
-
-extern uint32_t zone_poison_count_init(
- zone_t zone);
-
-extern bool zfree_clear_or_poison(
- zone_t zone,
- uint32_t *zp_count,
- vm_address_t addr);
-
-extern void zone_clear_freelist_pointers(
- zone_t zone,
- vm_offset_t addr);
-
-#if ZALLOC_ENABLE_POISONING
-extern void zalloc_validate_element(
- zone_t zone,
- vm_offset_t addr,
- vm_size_t size,
- bool validate);
-#endif
-
-extern void zone_allocated_element_validate(
- zone_t zone,
- vm_offset_t addr);
-
-extern void zone_prio_refill_configure(
+extern void zone_replenish_configure(
zone_t zone);
extern vm_size_t zone_element_size(
#endif /* VM_MAX_TAG_ZONES */
-#define lock_zone(zone) simple_lock(&(zone)->lock, &zone_locks_grp)
-#define unlock_zone(zone) simple_unlock(&(zone)->lock)
+static inline void
+zone_lock(zone_t zone)
+{
+#if KASAN_ZALLOC
+ spl_t s = 0;
+ if (zone->kasan_fakestacks) {
+ s = splsched();
+ }
+#endif /* KASAN_ZALLOC */
+ lck_spin_lock(&zone->z_lock);
+#if KASAN_ZALLOC
+ zone->z_kasan_spl = s;
+#endif /* KASAN_ZALLOC */
+}
+
+static inline void
+zone_unlock(zone_t zone)
+{
+#if KASAN_ZALLOC
+ spl_t s = zone->z_kasan_spl;
+ zone->z_kasan_spl = 0;
+#endif /* KASAN_ZALLOC */
+ lck_spin_unlock(&zone->z_lock);
+#if KASAN_ZALLOC
+ if (zone->kasan_fakestacks) {
+ splx(s);
+ }
+#endif /* KASAN_ZALLOC */
+}
#if CONFIG_GZALLOC
void gzalloc_init(vm_size_t);
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- *
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- *
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <kern/assert.h>
-#include <kern/cpu_data.h>
-#include <mach/mach_host.h>
-#include <vm/vm_kern.h>
-#include <kern/startup.h>
-#include <kern/zalloc_internal.h>
-
-/* Size of array in magazine determined by boot-arg or default */
-TUNABLE(uint16_t, magazine_element_count, "zcc_magazine_element_count", 8);
-
-/* Size of depot lists determined by boot-arg or default */
-TUNABLE(uint16_t, depot_element_count, "zcc_depot_element_count", 8);
-
-SECURITY_READ_ONLY_LATE(zone_t) magazine_zone; /* zone to allocate zcc_magazine structs from */
-SECURITY_READ_ONLY_LATE(uintptr_t) zcache_canary; /* Canary used for the caching layer to prevent UaF attacks */
-
-/*
- * The zcc_magazine is used as a stack to store cached zone elements. These
- * sets of elements can be moved around to perform bulk operations.
- */
-struct zcc_magazine {
- uint32_t zcc_magazine_index; /* Used as a stack pointer to acess elements in the array */
- uint32_t zcc_magazine_capacity; /* Number of pointers able to be stored in the zcc_elements array */
- vm_offset_t zcc_elements[0]; /* Array of pointers to objects */
-};
-
-
-/*
- * Each CPU will use one of these to store its elements
- */
-struct zcc_per_cpu_cache {
- /* Magazine from which we will always try to allocate from and free to first */
- struct zcc_magazine *current;
- /* Dedicated magazine for a quick reload and to prevent thrashing wen we swap with the depot */
- struct zcc_magazine *previous;
- /* Zcache poisoning count */
- uint32_t zp_count;
-#if ZALLOC_DETAILED_STATS
- uint64_t zcc_allocs;
- uint64_t zcc_frees;
-#endif /* ZALLOC_DETAILED_STATS */
-};
-
-
-/* This is the basic struct to take care of cahing and is included within
- * the zone.
- */
-struct zcc_depot {
- /* marks the point in the array where empty magazines begin */
- int zcc_depot_index;
-
-#if ZALLOC_DETAILED_STATS
- uint64_t zcc_swap;
- uint64_t zcc_fill;
- uint64_t zcc_drain;
- uint64_t zcc_fail;
- uint64_t zcc_gc;
-#endif /* ZALLOC_DETAILED_STATS */
-
- /* Stores full and empty magazines in the depot layer */
- struct zcc_magazine *zcc_depot_list[0];
-};
-
-static bool zcache_mag_fill_locked(zone_t zone, struct zcc_magazine *mag);
-static void zcache_mag_drain_locked(zone_t zone, struct zcc_magazine *mag);
-static bool zcache_mag_has_space(struct zcc_magazine *mag);
-static bool zcache_mag_has_elements(struct zcc_magazine *mag);
-static void zcache_swap_magazines(struct zcc_magazine **a, struct zcc_magazine **b);
-static void zcache_mag_depot_swap_for_alloc(struct zcc_depot *depot, struct zcc_per_cpu_cache *cache);
-static void zcache_mag_depot_swap_for_free(struct zcc_depot *depot, struct zcc_per_cpu_cache *cache);
-static void zcache_canary_add(zone_t zone, vm_offset_t addr);
-#if ZALLOC_ENABLE_POISONING
-static void zcache_validate_element(zone_t zone, vm_offset_t *addr, bool poison);
-static void zcache_validate_and_clear_canary(zone_t zone, vm_offset_t *primary, vm_offset_t *backup);
-#endif
-
-/*
- * zcache_ready
- *
- * Returns whether or not the zone caches are ready to use
- *
- */
-static bool
-zcache_ready(void)
-{
- return magazine_zone != NULL;
-}
-
-/*
- * zcache_bootstrap
- *
- * Initializes zone to allocate magazines from and sets
- * magazine_element_count and depot_element_count from
- * boot-args or default values
- *
- */
-__startup_func
-static void
-zcache_bootstrap(void)
-{
- int magazine_size = sizeof(struct zcc_magazine) + magazine_element_count * sizeof(void *);
- zone_t magzone;
-
- /* Generate the canary value for zone caches */
- zcache_canary = (uintptr_t) early_random();
-
- magzone = zone_create("zcc_magazine_zone", magazine_size,
- ZC_NOCACHING | ZC_ZFREE_CLEARMEM);
-
- /*
- * This causes zcache_ready() to return true.
- */
- os_atomic_store(&magazine_zone, magzone, compiler_acq_rel);
-
- /*
- * Now that we are initialized, we can enable zone caching for zones that
- * were made before zcache_bootstrap() was called.
- *
- * The system is still single threaded so we don't need to take the lock.
- */
- zone_index_foreach(i) {
- if (zone_array[i].cpu_cache_enabled) {
- zcache_init(&zone_array[i]);
- }
- }
-}
-STARTUP(ZALLOC, STARTUP_RANK_FOURTH, zcache_bootstrap);
-
-static struct zcc_magazine *
-zcache_mag_alloc(void)
-{
- struct zcc_magazine *mag = zalloc_flags(magazine_zone, Z_WAITOK);
- mag->zcc_magazine_capacity = magazine_element_count;
- return mag;
-}
-
-
-/*
- * zcache_init
- *
- * Initializes all parts of the per-cpu caches for a given zone
- *
- * Parameters:
- * zone pointer to zone on which to iniitalize caching
- *
- */
-void
-zcache_init(zone_t zone)
-{
- struct zcc_per_cpu_cache *pcpu_caches;
- struct zcc_depot *depot;
- vm_size_t size;
-
- /*
- * If zcache hasn't been initialized yet, remember our decision,
- *
- * zcache_init() will be called again by zcache_bootstrap(),
- * while the system is still single threaded, to build the missing caches.
- */
- if (!zcache_ready()) {
- zone->cpu_cache_enabled = true;
- return;
- }
-
- /* Allocate chunk of memory for all structs */
- size = sizeof(struct zcc_depot) + (depot_element_count * sizeof(void *));
- depot = zalloc_permanent(size, ZALIGN_PTR);
-
- size = sizeof(struct zcc_per_cpu_cache);
- pcpu_caches = zalloc_percpu_permanent(size, ZALIGN_PTR);
-
- /* Initialize a cache for every CPU */
- zpercpu_foreach(cache, pcpu_caches) {
- cache->current = zcache_mag_alloc();
- cache->previous = zcache_mag_alloc();
- cache->zp_count = zone_poison_count_init(zone);
- }
-
- /* Initialize empty magazines in the depot list */
- for (int i = 0; i < depot_element_count; i++) {
- depot->zcc_depot_list[i] = zcache_mag_alloc();
- }
-
- lock_zone(zone);
- if (zone->zcache.zcc_depot) {
- panic("allocating caches for zone %s twice", zone->z_name);
- }
-
- /* Make the initialization of the per-cpu magazines visible. */
- os_atomic_thread_fence(release);
-
- zone->zcache.zcc_depot = depot;
- zone->zcache.zcc_pcpu = pcpu_caches;
- zone->cpu_cache_enabled = true;
- unlock_zone(zone);
-}
-
-/*
- * zcache_drain_depot
- *
- * Frees all the full magazines from the depot layer to the zone allocator as part
- * of zone_gc(). The routine assumes that only one zone_gc() is in progress (zone_gc_lock
- * ensures that)
- *
- * Parameters:
- * zone pointer to zone for which the depot layer needs to be drained
- *
- * Returns: None
- *
- */
-void
-zcache_drain_depot(zone_t zone)
-{
- struct zcc_depot *depot;
- int drain_depot_index = 0;
-
- lock_zone(zone);
- depot = zone->zcache.zcc_depot;
- drain_depot_index = depot->zcc_depot_index;
- for (int i = 0; i < drain_depot_index; i++) {
- zcache_mag_drain_locked(zone, depot->zcc_depot_list[i]);
- }
-#if ZALLOC_DETAILED_STATS
- depot->zcc_gc += drain_depot_index;
-#endif /* ZALLOC_DETAILED_STATS */
- depot->zcc_depot_index = 0;
- unlock_zone(zone);
-}
-
-__attribute__((noinline))
-static void
-zcache_free_to_cpu_cache_slow(zone_t zone, struct zcc_per_cpu_cache *per_cpu_cache)
-{
- struct zcc_depot *depot;
-
- lock_zone(zone);
- depot = zone->zcache.zcc_depot;
- if (depot->zcc_depot_index < depot_element_count) {
- /* If able, rotate in a new empty magazine from the depot and retry */
- zcache_mag_depot_swap_for_free(depot, per_cpu_cache);
- } else {
- /* Free an entire magazine of elements */
- zcache_mag_drain_locked(zone, per_cpu_cache->current);
-#if ZALLOC_DETAILED_STATS
- depot->zcc_drain++;
-#endif /* ZALLOC_DETAILED_STATS */
- }
- unlock_zone(zone);
-}
-
-
-void
-zcache_free_to_cpu_cache(zone_t zone, zone_stats_t zstats, vm_offset_t addr)
-{
- struct zcc_per_cpu_cache *per_cpu_cache;
- vm_offset_t elem = addr;
- int cpu;
-
- zone_allocated_element_validate(zone, elem);
-
- /*
- * This is racy but we don't need zp_count to be accurate.
- * This allows us to do the poisoning with preemption enabled.
- */
- per_cpu_cache = zpercpu_get(zone->zcache.zcc_pcpu);
- if (zfree_clear_or_poison(zone, &per_cpu_cache->zp_count, elem)) {
- addr |= ZALLOC_ELEMENT_NEEDS_VALIDATION;
- } else {
- zcache_canary_add(zone, elem);
- }
-
-#if KASAN_ZALLOC
- kasan_poison_range(elem, zone_elem_size(zone), ASAN_HEAP_FREED);
-#endif
-
- disable_preemption();
- cpu = cpu_number();
- per_cpu_cache = zpercpu_get_cpu(zone->zcache.zcc_pcpu, cpu);
-
- if (zcache_mag_has_space(per_cpu_cache->current)) {
- /* If able, free into current magazine */
- } else if (zcache_mag_has_space(per_cpu_cache->previous)) {
- /* If able, swap current and previous magazine and retry */
- zcache_swap_magazines(&per_cpu_cache->previous, &per_cpu_cache->current);
- } else {
- zcache_free_to_cpu_cache_slow(zone, per_cpu_cache);
- }
-
- struct zcc_magazine *mag = per_cpu_cache->current;
- mag->zcc_elements[mag->zcc_magazine_index++] = addr;
- zpercpu_get_cpu(zstats, cpu)->zs_mem_freed += zone_elem_size(zone);
-#if ZALLOC_DETAILED_STATS
- per_cpu_cache->zcc_frees++;
-#endif /* ZALLOC_DETAILED_STATS */
-
- enable_preemption();
-}
-
-__attribute__((noinline))
-static bool
-zcache_alloc_from_cpu_cache_slow(zone_t zone, struct zcc_per_cpu_cache *per_cpu_cache)
-{
- struct zcc_depot *depot;
-
- lock_zone(zone);
- depot = zone->zcache.zcc_depot;
- if (depot->zcc_depot_index > 0) {
- /* If able, rotate in a full magazine from the depot */
- zcache_mag_depot_swap_for_alloc(depot, per_cpu_cache);
- } else if (zcache_mag_fill_locked(zone, per_cpu_cache->current)) {
-#if ZALLOC_DETAILED_STATS
- depot->zcc_fill++;
-#endif /* ZALLOC_DETAILED_STATS */
- } else {
-#if ZALLOC_DETAILED_STATS
- depot->zcc_fail++;
-#endif /* ZALLOC_DETAILED_STATS */
- /* If unable to allocate from cache return NULL and fall through to zalloc */
- unlock_zone(zone);
- enable_preemption();
- return false;
- }
- unlock_zone(zone);
-
- return true;
-}
-
-vm_offset_t
-zcache_alloc_from_cpu_cache(zone_t zone, zone_stats_t zstats, vm_size_t waste)
-{
- struct zcc_per_cpu_cache *per_cpu_cache;
- int cpu;
-
- disable_preemption();
- cpu = cpu_number();
- per_cpu_cache = zpercpu_get_cpu(zone->zcache.zcc_pcpu, cpu);
-
- if (zcache_mag_has_elements(per_cpu_cache->current)) {
- /* If able, allocate from current magazine */
- } else if (zcache_mag_has_elements(per_cpu_cache->previous)) {
- /* If able, swap current and previous magazine and retry */
- zcache_swap_magazines(&per_cpu_cache->previous, &per_cpu_cache->current);
- } else if (!zcache_alloc_from_cpu_cache_slow(zone, per_cpu_cache)) {
- return (vm_offset_t)NULL;
- }
-
- struct zcc_magazine *mag = per_cpu_cache->current;
- vm_offset_t elem_size = zone_elem_size(zone);
- uint32_t index = --mag->zcc_magazine_index;
- vm_offset_t addr = mag->zcc_elements[index];
- mag->zcc_elements[index] = 0;
- zpercpu_get_cpu(zstats, cpu)->zs_mem_allocated += elem_size;
-#if ZALLOC_DETAILED_STATS
- if (waste) {
- zpercpu_get_cpu(zstats, cpu)->zs_mem_wasted += waste;
- }
- per_cpu_cache->zcc_allocs++;
-#else
- (void)waste;
-#endif /* ZALLOC_DETAILED_STATS */
-
- enable_preemption();
-
-#if ZALLOC_ENABLE_POISONING
- bool validate = addr & ZALLOC_ELEMENT_NEEDS_VALIDATION;
-#endif /* ZALLOC_ENABLE_POISONING */
-
- addr &= ~ZALLOC_ELEMENT_NEEDS_VALIDATION;
-
-#if KASAN_ZALLOC
- kasan_poison_range(addr, elem_size, ASAN_VALID);
-#endif
-#if ZALLOC_ENABLE_POISONING
- if (!validate) {
- vm_offset_t backup = addr + elem_size - sizeof(vm_offset_t);
- zcache_validate_and_clear_canary(zone, (vm_offset_t *)addr,
- (vm_offset_t *)backup);
- }
- zalloc_validate_element(zone, addr, elem_size, validate);
-#endif /* ZALLOC_ENABLE_POISONING */
-
- return addr;
-}
-
-
-/*
- * zcache_mag_fill_locked
- *
- * Fills a magazine with as many elements as the zone can give
- * without blocking to carve out more memory
- *
- * Parameters:
- * zone zone from which to allocate
- * mag pointer to magazine to fill
- *
- * Return: True if able to allocate elements, false is mag is still empty
- */
-static bool
-zcache_mag_fill_locked(zone_t zone, struct zcc_magazine *mag)
-{
- uint32_t i = mag->zcc_magazine_index;
- uint32_t end = mag->zcc_magazine_capacity;
- vm_offset_t elem, addr;
-
- while (i < end && zone->countfree) {
- addr = zalloc_direct_locked(zone, Z_NOWAIT, 0);
- elem = addr & ~ZALLOC_ELEMENT_NEEDS_VALIDATION;
- if (addr & ZALLOC_ELEMENT_NEEDS_VALIDATION) {
- zone_clear_freelist_pointers(zone, elem);
- } else {
- zcache_canary_add(zone, elem);
- }
-#if KASAN_ZALLOC
- kasan_poison_range(elem, zone_elem_size(zone), ASAN_HEAP_FREED);
-#endif
- mag->zcc_elements[i++] = addr;
- }
-
- mag->zcc_magazine_index = i;
-
- return i != 0;
-}
-
-/*
- * zcache_mag_drain_locked
- *
- * Frees all elements in a magazine
- *
- * Parameters:
- * zone zone to which elements will be freed
- * mag pointer to magazine to empty
- *
- */
-static void
-zcache_mag_drain_locked(zone_t zone, struct zcc_magazine *mag)
-{
- vm_offset_t elem, addr;
- bool poison;
-
- for (uint32_t i = 0, end = mag->zcc_magazine_index; i < end; i++) {
- addr = mag->zcc_elements[i];
- poison = addr & ZALLOC_ELEMENT_NEEDS_VALIDATION;
- elem = addr & ~ZALLOC_ELEMENT_NEEDS_VALIDATION;
-
-#if ZALLOC_ENABLE_POISONING
- zcache_validate_element(zone, (vm_offset_t *)elem, poison);
-#endif /* ZALLOC_ENABLE_POISONING */
- zfree_direct_locked(zone, elem, poison);
- mag->zcc_elements[i] = 0;
- }
- mag->zcc_magazine_index = 0;
-}
-
-
-/*
- * zcache_mag_has_space
- *
- * Checks if magazine still has capacity
- *
- * Parameters:
- * mag pointer to magazine to check
- *
- * Returns: true if magazine is full
- *
- */
-static bool
-zcache_mag_has_space(struct zcc_magazine *mag)
-{
- return mag->zcc_magazine_index < mag->zcc_magazine_capacity;
-}
-
-
-/*
- * zcache_mag_has_elements
- *
- * Checks if magazine is empty
- *
- * Parameters:
- * mag pointer to magazine to check
- *
- * Returns: true if magazine has no elements
- *
- */
-static bool
-zcache_mag_has_elements(struct zcc_magazine *mag)
-{
- return mag->zcc_magazine_index > 0;
-}
-
-
-/*
- * zcache_swap_magazines
- *
- * Function which swaps two pointers of any type
- *
- * Parameters:
- * a pointer to first pointer
- * b pointer to second pointer
- */
-static void
-zcache_swap_magazines(struct zcc_magazine **a, struct zcc_magazine **b)
-{
- struct zcc_magazine *temp = *a;
- *a = *b;
- *b = temp;
-}
-
-
-/*
- * zcache_mag_depot_swap_for_alloc
- *
- * Swaps a full magazine into the current position
- *
- * Parameters:
- * depot pointer to the depot
- * cache pointer to the current per-cpu cache
- *
- * Precondition: Check that the depot list has full elements
- */
-static void
-zcache_mag_depot_swap_for_alloc(struct zcc_depot *depot, struct zcc_per_cpu_cache *cache)
-{
- /* Loads a full magazine from which we can allocate */
- assert(depot->zcc_depot_index > 0);
- depot->zcc_depot_index--;
-#if ZALLOC_DETAILED_STATS
- depot->zcc_swap++;
-#endif /* ZALLOC_DETAILED_STATS */
- zcache_swap_magazines(&cache->current, &depot->zcc_depot_list[depot->zcc_depot_index]);
-}
-
-
-/*
- * zcache_mag_depot_swap_for_free
- *
- * Swaps an empty magazine into the current position
- *
- * Parameters:
- * depot pointer to the depot
- * cache pointer to the current per-cpu cache
- *
- * Precondition: Check that the depot list has empty elements
- */
-static void
-zcache_mag_depot_swap_for_free(struct zcc_depot *depot, struct zcc_per_cpu_cache *cache)
-{
- /* Loads an empty magazine into which we can free */
- assert(depot->zcc_depot_index < depot_element_count);
- zcache_swap_magazines(&cache->current, &depot->zcc_depot_list[depot->zcc_depot_index]);
-#if ZALLOC_DETAILED_STATS
- depot->zcc_swap++;
-#endif /* ZALLOC_DETAILED_STATS */
- depot->zcc_depot_index++;
-}
-
-/*
- * zcache_canary_add
- *
- * Adds a canary to an element by putting zcache_canary at the first
- * and last location of the element
- *
- * Parameters:
- * zone zone for the element
- * addr element address to add canary to
- */
-static void
-zcache_canary_add(zone_t zone, vm_offset_t element)
-{
-#if ZALLOC_ENABLE_POISONING
- vm_offset_t *primary = (vm_offset_t *)element;
- vm_offset_t *backup = (vm_offset_t *)((vm_offset_t)primary +
- zone_elem_size(zone) - sizeof(vm_offset_t));
- *primary = *backup = (zcache_canary ^ (uintptr_t)element);
-#else
-#pragma unused(zone, element)
-#endif
-}
-
-#if ZALLOC_ENABLE_POISONING
-__abortlike static void
-zcache_validation_panic(zone_t zone, vm_offset_t *primary, vm_offset_t *backup,
- vm_offset_t permutation)
-{
- vm_offset_t primary_value = 0;
- vm_offset_t backup_value = 0;
-
- if (permutation == zcache_canary) {
- primary_value = *primary ^ (vm_offset_t)primary;
- backup_value = *backup ^ (vm_offset_t)primary;
- permutation = permutation ^ (vm_offset_t)primary;
- } else {
- primary_value = *primary;
- backup_value = *backup;
- }
- if (primary_value != permutation) {
- panic("Zone cache element was used after free! Element %p was corrupted at "
- "beginning; Expected 0x%lx but found 0x%lx; canary 0x%lx; zone %p (%s%s)",
- primary, (uintptr_t) permutation, (uintptr_t) *primary, zcache_canary, zone,
- zone_heap_name(zone), zone->z_name);
- } else {
- panic("Zone cache element was used after free! Element %p was corrupted at end; "
- "Expected 0x%lx but found 0x%lx; canary 0x%lx; zone %p (%s%s)",
- primary, (uintptr_t) permutation, (uintptr_t) *backup, zcache_canary, zone,
- zone_heap_name(zone), zone->z_name);
- }
-}
-
-/*
- * zcache_validate_and_clear_canary
- *
- * Validates an element of the zone cache to make sure it still contains the zone
- * caching canary and clears it.
- *
- * Parameters:
- * zone zone for the element
- * primary addr of canary placed in front
- * backup addr of canary placed at the back
- */
-static void
-zcache_validate_and_clear_canary(zone_t zone, vm_offset_t *primary, vm_offset_t *backup)
-{
- vm_offset_t primary_value = (*primary ^ (uintptr_t)primary);
- vm_offset_t backup_value = (*backup ^ (uintptr_t)primary);
-
- if (primary_value == zcache_canary && backup_value == zcache_canary) {
- *primary = *backup = ZONE_POISON;
- } else {
- zcache_validation_panic(zone, primary, backup, zcache_canary);
- }
-}
-
-/*
- * zcache_validate_element
- *
- * Validates the first and last pointer size of the element to ensure
- * that they haven't been altered. This function is used when an
- * element moves from cache to zone, therefore only validing the
- * first and last pointer size (location of future freelist pointers).
- *
- * Parameters:
- * zone zone for the element
- * element addr of element to validate
- * poison has the element been poisoned
- */
-static void
-zcache_validate_element(zone_t zone, vm_offset_t *element, bool poison)
-{
- vm_offset_t *primary = (vm_offset_t *)element;
- vm_offset_t *backup = (vm_offset_t *)((vm_offset_t)primary +
- zone_elem_size(zone) - sizeof(vm_offset_t));
-
- if (zone->zfree_clear_mem) {
- if (*primary == 0 && *backup == 0) {
- return;
- } else {
- zcache_validation_panic(zone, primary, backup, 0);
- }
- }
-
- if (__probable(!poison)) {
- zcache_validate_and_clear_canary(zone, primary, backup);
- } else {
- if (*primary == ZONE_POISON && *backup == ZONE_POISON) {
- return;
- } else {
- zcache_validation_panic(zone, primary, backup, ZONE_POISON);
- }
- }
-}
-#endif /* ZALLOC_ENABLE_POISONING */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- *
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- *
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * Below is a diagram of the caching system. This design is based of the
- * paper "Magazines and Vmem: Extending the Slab Allocator to Many CPUs and
- * Arbitrary Resources" by Jeff Bonwick and Jonathan Adams. It is divided into 3
- * layers: the Per-cpu Layer, the Depot Layer, and the Zone Allocator. The
- * Per-CPU and Depot layers store elements using arrays we call magazines.
- *
- * Magazines function like a stack (we push and pop elements) and can be
- * moved around for bulk operations.
- * _________ _________ _________
- * | CPU 1 | | CPU 2 | | CPU 3 |
- * | _ _ | | _ _ | | _ _ |
- * | |#| | | | | | | |#| | | |#| |#| | Per-CPU Layer
- * | |#| |_| | | |_| |#| | | |#| |#| |
- * |_________| |_________| |_________|
- *
- * ______________________________________________
- * | _ _ _ _ _ _ |
- * | |#| |#| |#| | | | | | | | Depot Layer
- * | |#| |#| |#| |_| |_| |_| |
- * |______________________________________________|
- *
- * _______________________________________________
- * | # | # | # | # | # | # | # | # | # | # | # | # | Zone Allocator
- * |_______________________________________________|
- *
- * The top layer is the per-cpu cache and consists of a current and
- * previous magazine for each CPU. The current magazine is the one we always try
- * to allocate from and free to first. Only if we are unable, do we check the
- * previous magazine. If the previous magazine can satisfy the allocate or free,
- * then we switch the two and allocate from the new current magazine. This layer
- * requires no locking, so we can access multiple CPU's caches concurrently.
- * This is the main source of the speedup.
- *
- * We have two magazines here to prevent thrashing when swapping magazines
- * with the depot layer. If a certain pattern of alloc and free are called we
- * can waste a lot of time swapping magazines to and from the depot layer. We
- * prevent this by dividing the per-cpu cache into two separate magazines.
- *
- * The middle layer is the magazine depot. This layer consists of a
- * collection of full and empty magazines. These are used to reload the per-cpu
- * caches when needed. This is implemented as an array of magazines which are
- * initially all empty and as we fill up magazines we increment the index to
- * point at the first empty magazine. Since this layer is per-zone, it allows us
- * to balance the cache between cpus, but does require taking a lock.
- *
- * When neither the current nor previous magazine for a given CPU can
- * satisfy the free or allocation, we look to the depot layer. If there are
- * magazines in the depot that can satisfy the free or allocation we swap
- * that magazine into the current position. In the example below, to allocate on
- * the given CPU we must lock the depot layer and swap magazine A with magazine
- * B and decrement the depot index.
- *
- * _____________________ _______________________________________
- * | Per-CPU Cache | | Depot Layer |
- * | | | |
- * | A___ ____ | | ____ B___ ____ ____ |
- * | | | | | | | | ## | | ## | | | | | |
- * | | | | | | | | ## | | ## | | | | | |
- * | | | | | | | | ## | | ## | | | | | |
- * | | | | | | | | ## | | ## | | | | | |
- * | |____| |____| | | |_##_| |_##_| |____| |____| |
- * | Current Previous | | |
- * |_____________________| |_______________________________________|
- *
- * The bottom layer is the Zone Allocator. This is already implemented in
- * XNU and will remain mostly unchanged. Implementation for this can be found
- * in zalloc.c and zalloc.h. We will only use the zone if all other layers are
- * unable to satisfy the allocation or free. When we do use the zone, we will
- * try to allocate an entire magazine of elements or free an entire magazine of
- * elements at once.
- *
- * Caching must be enabled explicitly, by calling zone_create() with the
- * ZC_CACHING flag, for every zone you want to cache elements for. Zones
- * which are good candidates for this are ones with highly contended zone locks.
- *
- * Some good potential candidates are kalloc.16, kalloc.48, Vm objects, VM map
- * entries, ipc vouchers, and ipc ports.
- *
- *
- * Some factors can be tuned by boot-arg:
- * zcc_enable_for_zone_name name of a single zone to enable caching for
- * (replace space characters with '.')
- *
- * zcc_magazine_element_count integer value for magazine size used for all
- * zones (default 8 is used if not specified)
- *
- * zcc_depot_element_count integer value for how many full and empty
- * magazines to store in the depot, if N specified
- * depot will have N full and N empty magazines
- * (default 16 used if not specified)
- */
-
-#ifndef _KERN_ZCACHE_H_
-#define _KERN_ZCACHE_H_
-
-#include <kern/kern_types.h>
-#include <kern/zalloc.h> /* zone_stats_t */
-#include <vm/vm_kern.h>
-
-#if CONFIG_ZCACHE
-#pragma GCC visibility push(hidden)
-
-__BEGIN_DECLS
-
-struct zone_cache {
- struct zcc_per_cpu_cache *__zpercpu zcc_pcpu;
- struct zcc_depot *zcc_depot;
-};
-
-/**
- * @function zcache_init
- *
- * @abstract
- * Initializes all parts of the per-cpu caches for a given zone
- *
- * @param zone pointer to zone on which to iniitalize caching
- *
- */
-extern void zcache_init(
- zone_t zone);
-
-
-/**
- * @function zcache_free_to_cpu_cache()
- *
- * @abstract
- * Checks per-cpu caches to free element there if possible.
- *
- * @discussion
- * The caller is responsible for checking that caching is enabled for zone.
- *
- * @param zone pointer to zone for which element comes from
- * @param zstats pointer to the per-cpu statistics to maintain
- * @param addr adddress of the element to free
- */
-extern void zcache_free_to_cpu_cache(
- zone_t zone,
- zone_stats_t zstats,
- vm_offset_t addr);
-
-
-/**
- * @function zcache_alloc_from_cpu_cache
- *
- * @abstract
- * Checks per-cpu caches to allocate element from there if possible
- *
- * @discussion
- * The caller is responsible for checking that caching is enabled for zone.
- *
- * @param zone pointer to zone for which element will comes from
- * @param zstats pointer to the per-cpu statistics to maintain
- * @param waste amount of waste of this allocation (or 0)
- *
- * @return pointer to usable element
- */
-extern vm_offset_t zcache_alloc_from_cpu_cache(
- zone_t zone,
- zone_stats_t zstats,
- vm_size_t waste);
-
-/**
- * @function zcache_drain_depot
- *
- * @abstract
- * Frees all the full magazines from the depot layer to the zone allocator
- * Invoked by zone_gc()
- *
- * @param zone pointer to zone for which the depot layer needs to be drained
- */
-extern void zcache_drain_depot(
- zone_t zone);
-
-__END_DECLS
-
-#pragma GCC visibility pop
-#endif /* CONFIG_ZCACHE */
-#endif /* _KERN_ZCACHE_H_ */
sysdiagnose_notification.defs \
upl.defs \
vfs_nspace.defs \
- vm32_map.defs
+ vm32_map.defs \
+ iocompressionstats_notification.defs
#
# MIG-generated headers that are traditionally used by user
task_access_server.h \
telemetry_notification_server.h \
sysdiagnose_notification_server.h \
+ iocompressionstats_notification_server.h \
vfs_nspace_server.h
MIG_UUHDRS = \
coalition.h \
coalition_notification.defs \
fairplayd_notification.defs \
+ iocompressionstats_notification.defs \
arcade_upcall.defs \
host_info.h \
ktrace_background.defs \
resource_notify_user.c \
task_access_user.c \
telemetry_notification_user.c \
+ iocompressionstats_notification_user.c \
upl_user.c \
vfs_nspace_user.c \
vm_map_user.c \
#define MACH_ARM_TRAP_ABSTIME -3
#define MACH_ARM_TRAP_CONTTIME -4
+
#include <mach/port.h>
#include <mach/thread_status.h>
#include <mach/machine/vm_types.h>
+#include <mach_debug/ipc_info.h>
/*
* Exported types
*/
typedef exception_behavior_t *exception_behavior_array_t;
typedef thread_state_flavor_t *exception_flavor_array_t;
typedef mach_port_t *exception_port_array_t;
+typedef ipc_info_port_t *exception_port_info_array_t;
typedef mach_exception_data_type_t mach_exception_code_t;
typedef mach_exception_data_type_t mach_exception_subcode_t;
#define HOST_SYSPOLICYD_PORT (22 + HOST_MAX_SPECIAL_KERNEL_PORT)
#define HOST_FILECOORDINATIOND_PORT (23 + HOST_MAX_SPECIAL_KERNEL_PORT)
#define HOST_FAIRPLAYD_PORT (24 + HOST_MAX_SPECIAL_KERNEL_PORT)
+#define HOST_IOCOMPRESSIONSTATS_PORT (25 + HOST_MAX_SPECIAL_KERNEL_PORT)
-#define HOST_MAX_SPECIAL_PORT HOST_FAIRPLAYD_PORT
-/* MAX = last since rdar://35861175 */
+#define HOST_MAX_SPECIAL_PORT HOST_IOCOMPRESSIONSTATS_PORT
+/* MAX = last since rdar://59872249 */
/* obsolete name */
#define HOST_CHUD_PORT HOST_LAUNCHCTL_PORT
#define host_set_fairplayd_port(host, port) \
(host_set_special_port((host), HOST_FAIRPLAYD_PORT, (port)))
+#define host_get_iocompressionstats_port(host, port) \
+ (host_get_special_port((host), \
+ HOST_LOCAL_NODE, HOST_IOCOMPRESSIONSTATS_PORT, (port)))
+#define host_set_iocompressionstats_port(host, port) \
+ (host_set_special_port((host), HOST_IOCOMPRESSIONSTATS_PORT, (port)))
+
+
/* HOST_RESOURCE_NOTIFY_PORT doesn't #defines these conveniences.
* All lookups go through send_resource_violation()
*/
--- /dev/null
+/*
+ * Copyright (c) 2020, Apple Inc. All rights reserved.
+ */
+
+ /*
+ * Interface definition for the telemetry facility.
+ */
+
+subsystem
+#if KERNEL_USER
+ KernelUser
+#endif /* KERNEL_USER */
+ iocompressionstats_notification 5600;
+
+#include <mach/std_types.defs>
+#include <mach/mach_types.defs>
+
+simpleroutine iocompressionstats_notification(
+ RequestPort iocompressionstats_port : mach_port_t;
+ in flags : uint32_t);
/* Denied by security policy
*/
+#define KERN_MISSING_KC 54
+/* The KC on which the function is operating is missing
+ */
+
+#define KERN_INVALID_KC 55
+/* The KC on which the function is operating is invalid
+ */
+
#define KERN_RETURN_MAX 0x100
/* Maximum return value allowable
*/
mach_vm_size_t size
);
+extern kern_return_t task_dyld_process_info_notify_get(
+ mach_port_name_array_t names_addr,
+ natural_t *names_count_addr
+ );
+
extern kern_return_t _kernelrpc_mach_vm_protect_trap(
mach_port_name_t target,
mach_vm_address_t address,
extern kern_return_t _kernelrpc_mach_vm_deallocate_trap(
struct _kernelrpc_mach_vm_deallocate_args *args);
+struct task_dyld_process_info_notify_get_trap_args {
+ PAD_ARG_(mach_vm_address_t, names_addr); /* 2 words */
+ PAD_ARG_(mach_vm_address_t, names_count_addr); /* 2 words */
+}; /* Total: 4 */
+
+extern kern_return_t task_dyld_process_info_notify_get_trap(
+ struct task_dyld_process_info_notify_get_trap_args *args);
+
struct _kernelrpc_mach_vm_protect_args {
PAD_ARG_(mach_port_name_t, target); /* 1 word */
PAD_ARG_(mach_vm_address_t, address); /* 2 words */
#endif /* KERNEL_SERVER */
;
+type task_id_token_t = mach_port_t
+#if KERNEL_SERVER
+ intran: task_id_token_t convert_port_to_task_id_token(mach_port_t)
+ outtran: mach_port_t convert_task_id_token_to_port(task_id_token_t)
+ destructor: task_id_token_release(task_id_token_t)
+#endif /* KERNEL_SERVER */
+ ;
+
type thread_t = mach_port_t
#if KERNEL_SERVER
intran: thread_t convert_port_to_thread(mach_port_t)
type exception_handler_t = mach_port_t;
+type exception_handler_info_t = struct[2] of natural_t;
+
type exception_handler_array_t =
array[*:32] of exception_handler_t;
+type exception_handler_info_array_t =
+ array[*:32] of exception_handler_info_t;
+
type exception_behavior_array_t =
array[*:32] of exception_behavior_t;
#ifdef MACH_KERNEL_PRIVATE
simport <ipc/ipc_voucher.h>; /* for voucher conversions */
simport <kern/ipc_kobject.h>; /* for null conversion */
-simport <kern/ipc_tt.h>; /* for task/thread conversion */
-simport <kern/ipc_host.h>; /* for host/processor/pset conversions */
+simport <kern/ipc_tt.h>; /* for task/thread conversion */
+simport <kern/ipc_host.h>; /* for host/processor/pset conversions */
simport <kern/ipc_sync.h>; /* for lock_set and semaphore conversions */
-simport <kern/ledger.h>; /* for ledger conversions */
-simport <kern/processor.h>; /* for processor conversions */
-simport <kern/sync_lock.h>; /* for lock-set conversions */
-simport <kern/sync_sema.h>; /* for semaphore conversions */
+simport <kern/ledger.h>; /* for ledger conversions */
+simport <kern/processor.h>; /* for processor conversions */
+simport <kern/sync_lock.h>; /* for lock-set conversions */
+simport <kern/sync_sema.h>; /* for semaphore conversions */
simport <ipc/ipc_eventlink.h>; /* for eventlink conversions */
simport <vm/memory_object.h>; /* for memory object type conversions */
-simport <vm/vm_map.h>; /* for vm_map conversions */
+simport <vm/vm_map.h>; /* for vm_map conversions */
#if CONFIG_ARCADE
-simport <kern/arcade.h>; /* for arcade_register conversions */
+simport <kern/arcade.h>; /* for arcade_register conversions */
#endif
#endif /* MACH_KERNEL_PRIVATE */
-simport <kern/ipc_mig.h>; /* pick up kernel-specific MIG things */
+simport <kern/ipc_mig.h>; /* pick up kernel-specific MIG things */
simport <kern/suid_cred.h>;
+simport <kern/task_ident.h>; /* for task_id_token conversions */
#endif /* KERNEL_SERVER */
import <mach/mig.h>;
typedef struct ipc_eventlink *ipc_eventlink_t;
typedef struct ipc_port *eventlink_port_pair_t[2];
typedef struct suid_cred *suid_cred_t;
+typedef struct task_id_token *task_id_token_t;
/*
* OBSOLETE: lock_set interfaces are obsolete.
typedef mach_port_t ipc_eventlink_t;
typedef mach_port_t eventlink_port_pair_t[2];
typedef mach_port_t suid_cred_t;
+typedef mach_port_t task_id_token_t;
#endif /* KERNEL */
typedef mach_port_t UNDServerRef;
typedef mach_port_t mach_eventlink_t;
+typedef ipc_info_port_t exception_handler_info_t;
+
/*
* Mig doesn't translate the components of an array.
* For example, Mig won't use the thread_t translations
#define MACH_EVENTLINK_NULL ((mach_eventlink_t) 0)
#define IPC_EVENTLINK_NULL ((ipc_eventlink_t) NULL)
#define SUID_CRED_NULL ((suid_cred_t) NULL)
+#define TASK_ID_TOKEN_NULL ((task_id_token_t) NULL)
#else
#define TASK_NULL ((task_t) 0)
#define TASK_NAME_NULL ((task_name_t) 0)
#define MACH_EVENTLINK_NULL ((mach_eventlink_t) 0)
#define IPC_EVENTLINK_NULL ((ipc_eventlink_t) 0)
#define SUID_CRED_NULL ((suid_cred_t) 0)
+#define TASK_ID_TOKEN_NULL ((task_id_token_t) 0)
#endif
/* capability strictly _DECREASING_.
* to be closest to the itk_lock. see task.h.
*/
typedef unsigned int mach_task_flavor_t;
-#define TASK_FLAVOR_CONTROL 0 /* a task_t */
+#define TASK_FLAVOR_CONTROL 0 /* a task_t */
#define TASK_FLAVOR_READ 1 /* a task_read_t */
#define TASK_FLAVOR_INSPECT 2 /* a task_inspect_t */
#define TASK_FLAVOR_NAME 3 /* a task_name_t */
/* capability strictly _DECREASING_ */
typedef unsigned int mach_thread_flavor_t;
-#define THREAD_FLAVOR_CONTROL 0 /* a thread_t */
+#define THREAD_FLAVOR_CONTROL 0 /* a thread_t */
#define THREAD_FLAVOR_READ 1 /* a thread_read_t */
#define THREAD_FLAVOR_INSPECT 2 /* a thread_inspect_t */
/* DEPRECATED */
-typedef natural_t ledger_item_t;
+typedef natural_t ledger_item_t;
#define LEDGER_ITEM_INFINITY ((ledger_item_t) (~0))
typedef int64_t ledger_amount_t;
skip;
#endif
+/*
+ * Map portion of a task's address space, {max, cur}_protection is inout.
+ */
+#if !defined(_MACH_VM_PUBLISH_AS_LOCAL_)
+routine PREFIX(KERNEL_SERVER_SUFFIX(mach_vm_remap_new)) (
+#else
+routine PREFIX(KERNEL_SERVER_SUFFIX(vm_remap_new)) (
+#endif
+ target_task : vm_map_t;
+ inout target_address : mach_vm_address_t;
+ size : mach_vm_size_t;
+ mask : mach_vm_offset_t;
+ flags : int;
+#ifdef KERNEL_SERVER
+ src_tport : mach_port_t;
+#else
+ src_task : vm_map_read_t;
+#endif
+ src_address : mach_vm_address_t;
+ copy : boolean_t;
+ inout cur_protection : vm_prot_t;
+ inout max_protection : vm_prot_t;
+ inheritance : vm_inherit_t);
+
/****************************** Legacy section ***************************/
/* The following definitions are exist to provide compatibility with */
/* the legacy APIs. They are no different. We just need to produce */
/* extract a recipe array to reconstitue all the key values in a future voucher */
routine mach_voucher_debug_info(
- task : ipc_space_t;
+ task : ipc_space_read_t;
voucher_name: mach_port_name_t;
out recipes : mach_voucher_attr_raw_recipe_array_t, CountInOut);
#include <sys/cdefs.h>
+#if XNU_KERNEL_PRIVATE
+#include <os/refcnt.h>
+#if __LP64__
+#define MEMORY_OBJECT_HAS_REFCOUNT 1
+#else
+#define MEMORY_OBJECT_HAS_REFCOUNT 0
+#endif
+#endif /* XNU_KERNEL_PRIVATE */
+
#define VM_64_BIT_DATA_OBJECTS
typedef unsigned long long memory_object_offset_t;
struct memory_object_pager_ops; /* forward declaration */
+typedef struct vm_object *memory_object_control_t;
/*
- * "memory_object" and "memory_object_control" types used to be Mach ports
- * in user space and can be passed as such to some kernel APIs.
- * Their first field must match the "io_bits" field of a
- * "struct ipc_object" to identify them as a "IKOT_MEMORY_OBJECT" and
- * "IKOT_MEM_OBJ_CONTROL" respectively.
+ * "memory_object" used to be a Mach port in user space and could be passed
+ * as such to some kernel APIs.
+ *
+ * Its first field must match the "io_bits" field of a
+ * "struct ipc_object" to identify them as a "IKOT_MEMORY_OBJECT".
*/
-typedef struct memory_object {
+typedef struct memory_object {
mo_ipc_object_bits_t mo_ikot; /* DO NOT CHANGE */
+#if __LP64__
+#if XNU_KERNEL_PRIVATE
+ /*
+ * On LP64 there's a 4 byte hole that is perfect for a refcount.
+ * Expose it so that all pagers can take advantage of it.
+ */
+ os_ref_atomic_t mo_ref;
+#else
+ unsigned int __mo_padding;
+#endif /* XNU_KERNEL_PRIVATE */
+#endif /* __LP64__ */
const struct memory_object_pager_ops *mo_pager_ops;
- struct memory_object_control *mo_control;
+ memory_object_control_t mo_control;
} *memory_object_t;
-typedef struct memory_object_control {
- mo_ipc_object_bits_t moc_ikot; /* DO NOT CHANGE */
- struct vm_object *moc_object;
-} *memory_object_control_t;
-
typedef const struct memory_object_pager_ops {
void (*memory_object_reference)(
memory_object_t mem_obj);
#else /* KERNEL_PRIVATE */
typedef mach_port_t memory_object_t;
+/*
+ * vestigial, maintained for source compatibility,
+ * no MIG interface will accept or return non NULL
+ * objects for those.
+ */
typedef mach_port_t memory_object_control_t;
#endif /* KERNEL_PRIVATE */
#define MAX_UPL_TRANSFER_BYTES (1024 * 1024)
#define MAX_UPL_SIZE_BYTES (1024 * 1024 * 64)
-#ifndef CONFIG_EMBEDDED
#define MAX_UPL_SIZE (MAX_UPL_SIZE_BYTES / PAGE_SIZE)
#define MAX_UPL_TRANSFER (MAX_UPL_TRANSFER_BYTES / PAGE_SIZE)
-#endif
struct upl_page_info {
ppnum_t phys_addr; /* physical page index number */
#define IPC_PORT_NULL ((ipc_port_t) NULL)
#define IPC_PORT_DEAD ((ipc_port_t)~0UL)
-#define IPC_PORT_VALID(port) \
- ((port) != IPC_PORT_NULL && (port) != IPC_PORT_DEAD)
+#define IPC_PORT_VALID(port) ipc_port_valid(port)
+
+static inline boolean_t
+ipc_port_valid(ipc_port_t port)
+{
+ return port != IPC_PORT_DEAD && port;
+}
typedef ipc_port_t mach_port_t;
#define MACH_PORT_TYPE_DEAD_NAME MACH_PORT_TYPE(MACH_PORT_RIGHT_DEAD_NAME)
#define MACH_PORT_TYPE_LABELH MACH_PORT_TYPE(MACH_PORT_RIGHT_LABELH) /* obsolete */
-
#ifdef MACH_KERNEL_PRIVATE
/* Holder used to have a receive right - remembered to filter exceptions */
#define MACH_PORT_TYPE_EX_RECEIVE MACH_PORT_TYPE_LABELH
kGUARD_EXC_SEND_INVALID_RIGHT = 1u << 18,
kGUARD_EXC_RCV_INVALID_NAME = 1u << 19,
kGUARD_EXC_RCV_GUARDED_DESC = 1u << 20, /* should never be fatal; for development only */
+ kGUARD_EXC_MOD_REFS_NON_FATAL = 1u << 21,
+ kGUARD_EXC_IMMOVABLE_NON_FATAL = 1u << 22,
};
-#define MAX_FATAL_kGUARD_EXC_CODE (1u << 6)
+#define MAX_FATAL_kGUARD_EXC_CODE (1u << 7)
+
+/*
+ * Mach port guard flags.
+ */
+#define MPG_FLAGS_NONE (0x00ull)
/*
* These flags are used as bits in the subcode of kGUARD_EXC_STRICT_REPLY exceptions.
#define MPG_FLAGS_STRICT_REPLY_MISMATCHED_PERSONA (0x10ull << 56)
#define MPG_FLAGS_STRICT_REPLY_MASK (0xffull << 56)
+/*
+ * These flags are used as bits in the subcode of kGUARD_EXC_MOD_REFS exceptions.
+ */
+#define MPG_FLAGS_MOD_REFS_PINNED_DEALLOC (0x01ull << 56)
+
+/*
+ * These flags are used as bits in the subcode of kGUARD_EXC_IMMOVABLE exceptions.
+ */
+#define MPG_FLAGS_IMMOVABLE_PINNED (0x01ull << 56)
+
/*
* Flags for mach_port_guard_with_flags. These flags extend
* the attributes associated with a guarded port.
kernel_trap(_kernelrpc_mach_vm_allocate_trap,-10,5) /* 4 args, +1 for mach_vm_size_t */
kernel_trap(_kernelrpc_mach_vm_purgable_control_trap,-11,5) /* 4 args, +1 for mach_vm_offset_t */
kernel_trap(_kernelrpc_mach_vm_deallocate_trap,-12,5) /* 3 args, +2 for mach_vm_size_t and mach_vm_address_t */
+kernel_trap(task_dyld_process_info_notify_get,-13,4) /* 2 args, +2 for mach_vm_address_t */
kernel_trap(_kernelrpc_mach_vm_protect_trap,-14,7) /* 5 args, +2 for mach_vm_address_t and mach_vm_size_t */
kernel_trap(_kernelrpc_mach_vm_map_trap,-15,9)
kernel_trap(_kernelrpc_mach_port_allocate_trap,-16,3)
#include <mach/mach_types.defs>
#include <mach_debug/mach_debug_types.defs>
+#if !KERNEL && !LIBSYSCALL_INTERFACE
+#define PREFIX(NAME) _kernelrpc_ ## NAME
+#else
+#define PREFIX(NAME) NAME
+#endif
+
/*
* Create a new task with an empty set of IPC rights,
* and having an address space constructed from the
* count for that task is non-zero.
*/
routine task_suspend(
- target_task : task_t);
+ target_task : task_read_t);
/*
* that also have non-zero suspend counts may execute.
*/
routine task_resume(
- target_task : task_t);
+ target_task : task_read_t);
/*
* Returns the current value of the selected special port
behavior : exception_behavior_t;
new_flavor : thread_state_flavor_t;
out masks : exception_mask_array_t;
- out old_handlerss : exception_handler_array_t, SameCount;
+ out old_handlers : exception_handler_array_t, SameCount;
out old_behaviors : exception_behavior_array_t, SameCount;
out old_flavors : exception_flavor_array_t, SameCount);
out old_limit : int);
routine task_suspend2(
- target_task : task_t;
+ target_task : task_read_t;
out suspend_token : task_suspension_token_t);
routine task_resume2(
inout old_voucher : ipc_voucher_t);
routine task_generate_corpse(
- task :task_t;
+ task :task_read_t;
out corpse_task_port:mach_port_t);
routine task_map_corpse_info(
uid : suid_cred_uid_t;
out delegation : suid_cred_t);
+#if KERNEL || (!KERNEL && !LIBSYSCALL_INTERFACE)
+routine PREFIX(mach_task_is_self)(
+ task : task_name_t;
+ out is_self : boolean_t);
+#else
+ /* Do not generate header, use the one in mach_init.h */
+ skip;
+#endif
+
+routine task_dyld_process_info_notify_register(
+ target_task : task_read_t;
+ notify : mach_port_make_send_t);
+
+routine task_create_identity_token(
+ task : task_t;
+ out token : task_id_token_t);
+
+routine task_identity_token_get_task_port(
+ token : task_id_token_t;
+ flavor : task_flavor_t;
+ out task_port: mach_port_t);
+
+routine task_dyld_process_info_notify_deregister(
+ target_task : task_read_t;
+ notify : mach_port_name_t);
+
+routine task_get_exception_ports_info(
+ port : mach_port_t;
+ exception_mask : exception_mask_t;
+ out masks : exception_mask_array_t;
+ out old_handlers_info : exception_handler_info_array_t, SameCount;
+ out old_behaviors : exception_behavior_array_t, SameCount;
+ out old_flavors : exception_flavor_array_t, SameCount);
+
/* vim: set ft=c : */
task_access_port : mach_port_t;
new_pid : int32_t);
+routine check_task_access_with_flavor(
+ task_access_port : mach_port_t;
+ calling_pid : int32_t;
+ calling_gid : uint32_t;
+ target_pid : int32_t;
+ flavor : mach_task_flavor_t;
+ ServerAuditToken caller_cred : audit_token_t);
+
/* vim: set ft=c : */
#define TASK_READ_PORT 6 /* The read port for task. */
-
+/*
+ * Evolving and likely to change.
+ */
#define TASK_SEATBELT_PORT 7 /* Seatbelt compiler/DEM port for task. */
* for its task is also zero.
*/
routine thread_suspend(
- target_act : thread_act_t);
+ target_act : thread_read_t);
/*
* Decrement the suspend count for the target thread,
* if that count is not already zero.
*/
routine thread_resume(
- target_act : thread_act_t);
+ target_act : thread_read_t);
/*
* Cause any user or meta- instructions currently being
out out_state : thread_state_t, CountInOut);
#ifdef XNU_KERNEL_PRIVATE
-#endif
+ skip;
+#else
+ skip;
+#endif /* XNU_KERNEL_PRIVATE */
+
+routine thread_get_exception_ports_info(
+ port : mach_port_t;
+ exception_mask : exception_mask_t;
+ out masks : exception_mask_array_t;
+ out old_handlers_info : exception_handler_info_array_t, SameCount;
+ out old_behaviors : exception_behavior_array_t, SameCount;
+ out old_flavors : exception_flavor_array_t, SameCount);
/* vim: set ft=c : */
#define THREAD_READ_PORT 3 /* The read port for thread. */
+#define THREAD_MAX_SPECIAL_PORT THREAD_READ_PORT
/*
* Definitions for ease of use
*/
routine vm_map_exec_lockdown(
target_task : vm_map_t);
+routine PREFIX(KERNEL_SERVER_SUFFIX(vm_remap_new)) (
+ target_task : vm_map_t;
+inout target_address : vm_address_t;
+ size : vm_size_t;
+ mask : vm_address_t;
+ flags : int;
+#ifdef KERNEL_SERVER
+ src_tport : mach_port_t;
+#else
+ src_task : vm_map_read_t;
+#endif
+ src_address : vm_address_t;
+ copy : boolean_t;
+inout cur_protection : vm_prot_t;
+inout max_protection : vm_prot_t;
+ inheritance : vm_inherit_t);
/* vim: set ft=c : */
* When we need to allocate a chunk of anonymous memory over that size,
* we have to allocate more than one chunk.
*/
-#define ANON_MAX_SIZE 0xFFFFF000ULL
+#define ANON_MAX_SIZE ((1ULL << 32) - PAGE_SIZE)
/*
* Work-around for <rdar://problem/6626493>
* Break large anonymous memory areas into 128MB chunks to alleviate
#define VM_FLAGS_NO_CACHE 0x0010
#define VM_FLAGS_RESILIENT_CODESIGN 0x0020
#define VM_FLAGS_RESILIENT_MEDIA 0x0040
+#define VM_FLAGS_PERMANENT 0x0080
#define VM_FLAGS_OVERWRITE 0x4000 /* delete any existing mappings first */
/*
* VM_FLAGS_SUPERPAGE_MASK
VM_FLAGS_4GB_CHUNK | \
VM_FLAGS_RANDOM_ADDR | \
VM_FLAGS_NO_CACHE | \
+ VM_FLAGS_PERMANENT | \
VM_FLAGS_OVERWRITE | \
VM_FLAGS_SUPERPAGE_MASK | \
VM_FLAGS_ALIAS_MASK)
#define VM_KERN_MEMORY_SKYWALK 26
#define VM_KERN_MEMORY_LTABLE 27
#define VM_KERN_MEMORY_HV 28
+#define VM_KERN_MEMORY_RETIRED 29
-#define VM_KERN_MEMORY_FIRST_DYNAMIC 29
+#define VM_KERN_MEMORY_FIRST_DYNAMIC 30
/* out of tags: */
#define VM_KERN_MEMORY_ANY 255
#define VM_KERN_MEMORY_COUNT 256
};
struct vm_allocation_zone_total {
- uint64_t total;
- uint64_t peak;
- uint32_t waste;
- uint32_t wastediv;
+ vm_size_t vazt_total;
+ vm_size_t vazt_peak;
};
typedef struct vm_allocation_zone_total vm_allocation_zone_total_t;
typedef ipc_info_tree_name_t *ipc_info_tree_name_array_t;
+typedef struct ipc_info_port {
+ natural_t iip_port_object; /* port object identifier */
+ natural_t iip_receiver_object; /* receiver task identifier (if any) */
+} ipc_info_port_t;
+
+typedef ipc_info_port_t *exception_handler_info_array_t;
+
#endif /* _MACH_DEBUG_IPC_INFO_H_ */
*/
bool ml_cpu_can_exit(int cpu_id);
-/*!
- * @function ml_cpu_init_state
- * @brief Needs to be called from schedulable context prior to using
- * the ml_cpu_*_state_transition or ml_cpu_*_loop functions.
- */
-void ml_cpu_init_state(void);
-
/*!
* @function ml_cpu_begin_state_transition
* @brief Tell the platform code that processor_start() or
<dt> <strong>TASK_KERNEL_PORT</strong>
<dd>
[task-self send right] The port used to control this task. Used
-to send messages that affect the task. This is the port returned
-by <strong>mach_task_self</strong>.
+to send messages that affect the task. This is the movable task port and
+different from the one returned by <strong>mach_task_self</strong> (immovable).
<p>
<dt> <strong>TASK_BOOTSTRAP_PORT</strong>
<dd>
<dt> <strong>TASK_KERNEL_PORT</strong>
<dd>
[task-self send right] The task's kernel port. Used by the
-kernel to receive messages to manipulate the task. This is the
-port returned by <strong>mach_task_self</strong>. Setting this special port
-does not change the identity of the kernel port that names the
-task; this simply changes the value returned as the kernel
+kernel to receive messages to manipulate the task. This is the movable task
+port and different from the one returned by <strong>mach_task_self</strong>
+(immovable). Setting this special port does not change the identity of the
+kernel port that names the task; this simply changes the value returned as the kernel
special port.
<p>
<dt> <strong>TASK_HOST_NAME_PORT</strong>
<dt> <strong>THREAD_KERNEL_PORT</strong>
<dd>
[thread-self send right] The port used to name the thread.
-Used to invoke operations that affect the thread. This is the
-port returned by <strong>mach_thread_self</strong>.
+Used to invoke operations that affect the thread. This is the movable
+port for the thread and different from <strong>mach_thread_self</strong> (immovable).
</dl>
<p>
<dt> <var>special_port</var>
<dt> <strong>THREAD_KERNEL_PORT</strong>
<dd>
[thread-self port] The thread's kernel port. Used by the kernel
-to receive messages from the thread. This is the port returned
-by <strong>mach_thread_self</strong>.
+to receive messages from the thread. This is the movable
+port for the thread and different from <strong>mach_thread_self</strong>(immovable).
</dl>
<p>
<dt> <var>special_port</var>
#include <kern/misc_protos.h>
#include <pexpert/pexpert.h>
#include <prng/entropy.h>
-#include <crypto/entropy/entropy_sysctl.h>
#include <machine/machine_routines.h>
#include <libkern/section_keywords.h>
#include <sys/cdefs.h>
entropy_analysis_max_sample_count = sample_count;
entropy_analysis_buffer_size = sample_count * sizeof(entropy_sample_t);
entropy_analysis_buffer = zalloc_permanent(entropy_analysis_buffer_size, ZALIGN(entropy_sample_t));
- entropy_analysis_register_sysctls();
}
__startup_func
#include <tests/xnupost.h>
#include <kern/kalloc.h>
#include <kern/bits.h>
+#include <pexpert/pexpert.h>
extern void dump_bitmap_next(bitmap_t *map, uint nbits);
extern void dump_bitmap_lsb(bitmap_t *map, uint nbits);
assert(bitmap_first(map, nbits) == -1);
assert(bitmap_lsb_first(map, nbits) == -1);
+ /* bitmap_not */
+ bitmap_not(map, map, nbits);
+ assert(bitmap_is_full(map, nbits));
+
+ bitmap_not(map, map, nbits);
+ assert(bitmap_first(map, nbits) == -1);
+ assert(bitmap_lsb_first(map, nbits) == -1);
+
+ /* bitmap_and */
+ bitmap_t *map0 = bitmap_alloc(nbits);
+ assert(bitmap_first(map0, nbits) == -1);
+
+ bitmap_t *map1 = bitmap_alloc(nbits);
+ bitmap_full(map1, nbits);
+ assert(bitmap_is_full(map1, nbits));
+
+ bitmap_and(map, map0, map1, nbits);
+ assert(bitmap_first(map, nbits) == -1);
+
+ bitmap_and(map, map1, map1, nbits);
+ assert(bitmap_is_full(map, nbits));
+
+ /* bitmap_and_not */
+ bitmap_and_not(map, map0, map1, nbits);
+ assert(bitmap_first(map, nbits) == -1);
+
+ bitmap_and_not(map, map1, map0, nbits);
+ assert(bitmap_is_full(map, nbits));
+
+ /* bitmap_equal */
+ for (uint i = 0; i < nbits; i++) {
+ bitmap_clear(map, i);
+ assert(!bitmap_equal(map, map1, nbits));
+ bitmap_set(map, i);
+ assert(bitmap_equal(map, map1, nbits));
+ }
+
+ /* bitmap_and_not_mask_first */
+ for (uint i = 0; i < nbits; i++) {
+ bitmap_clear(map, i);
+ expected_result = i;
+ int result = bitmap_and_not_mask_first(map1, map, nbits);
+ assert(result == expected_result);
+ bitmap_set(map, i);
+ result = bitmap_and_not_mask_first(map1, map, nbits);
+ assert(result == -1);
+ }
+
bitmap_free(map, nbits);
+ bitmap_free(map0, nbits);
+ bitmap_free(map1, nbits);
}
}
extern kern_return_t test_os_log(void);
extern kern_return_t test_os_log_parallel(void);
extern kern_return_t bitmap_post_test(void);
+extern kern_return_t counter_tests(void);
#ifdef __arm64__
extern kern_return_t arm64_munger_test(void);
#if __ARM_VFP__
XNUPOST_TEST_CONFIG_BASIC(vfp_state_test),
#endif
- XNUPOST_TEST_CONFIG_BASIC(vm_tests), };
+ XNUPOST_TEST_CONFIG_BASIC(vm_tests),
+ XNUPOST_TEST_CONFIG_BASIC(counter_tests)};
uint32_t kernel_post_tests_count = sizeof(kernel_post_tests) / sizeof(xnupost_test_data_t);
ZC_DESTRUCTIBLE);
T_ASSERT_NOTNULL(test_zone, NULL);
- T_ASSERT_EQ_INT(test_zone->countfree, 0, NULL);
+ T_ASSERT_EQ_INT(test_zone->z_elems_free, 0, NULL);
T_SETUPEND;
T_ASSERT_NOTNULL(test_ptr = zalloc(test_zone), NULL);
/* task_t */
ALLOC_VALIDATE_DATA_PTR(struct task, vm_map_t, map, "task.map");
- ALLOC_VALIDATE_DATA_PTR(struct task, struct ipc_port *, itk_self[0], "task.itk_self");
+ ALLOC_VALIDATE_DATA_PTR(struct task, struct ipc_port *, itk_task_ports[0], "task.itk_task_ports");
ALLOC_VALIDATE_DATA_PTR(struct task, struct ipc_port *, itk_settable_self, "task.itk_settable_self");
ALLOC_VALIDATE_DATA_PTR(struct task, struct ipc_port *, itk_host, "task.itk_host");
ALLOC_VALIDATE_DATA_PTR(struct task, struct ipc_port *, itk_bootstrap, "task.itk_bootstrap");
EXPORT_ONLY_FILES = \
memory_types.h \
pmap.h \
+ lz4.h \
+ lz4_constants.h \
+ lz4_assembly_select.h \
vm_fault.h \
vm_kern.h \
vm_map.h \
struct memory_object vn_pgr_hdr;
/* pager-specific */
- struct os_refcnt ref_count;
+#if MEMORY_OBJECT_HAS_REFCOUNT
+#define vn_pgr_hdr_ref vn_pgr_hdr.mo_ref
+#else
+ os_ref_atomic_t vn_pgr_hdr_ref;
+#endif
struct vnode *vnode_handle; /* vnode handle */
} *vnode_pager_t;
vnode_pager_t vnode_object;
vnode_object = vnode_pager_lookup(mem_obj);
- os_ref_retain(&vnode_object->ref_count);
+ os_ref_retain_raw(&vnode_object->vn_pgr_hdr_ref, NULL);
}
/*
vnode_object = vnode_pager_lookup(mem_obj);
- if (os_ref_release(&vnode_object->ref_count) == 0) {
+ if (os_ref_release_raw(&vnode_object->vn_pgr_hdr_ref, NULL) == 0) {
if (vnode_object->vnode_handle != NULL) {
vnode_pager_vrele(vnode_object->vnode_handle);
}
vnode_object->vn_pgr_hdr.mo_pager_ops = &vnode_pager_ops;
vnode_object->vn_pgr_hdr.mo_control = MEMORY_OBJECT_CONTROL_NULL;
- os_ref_init(&vnode_object->ref_count, NULL);
+ os_ref_init_raw(&vnode_object->vn_pgr_hdr_ref, NULL);
vnode_object->vnode_handle = vp;
return vnode_object;
/* pager-specific data */
lck_mtx_t lock;
- struct os_refcnt ref_count; /* reference count */
device_port_t device_handle; /* device_handle */
vm_size_t size;
+#if MEMORY_OBJECT_HAS_REFCOUNT
+#define dev_pgr_hdr_ref dev_pgr_hdr.mo_ref
+#else
+ os_ref_atomic_t dev_pgr_hdr_ref;
+#endif
int flags;
boolean_t is_mapped;
} *device_pager_t;
+__header_always_inline os_ref_count_t
+device_pager_get_refcount(device_pager_t device_object)
+{
+ return os_ref_get_count_raw(&device_object->dev_pgr_hdr_ref);
+}
+
LCK_GRP_DECLARE(device_pager_lck_grp, "device_pager");
ZONE_DECLARE(device_pager_zone, "device node pager structures",
assert(mem_obj->mo_pager_ops == &device_pager_ops);
device_object = (device_pager_t)mem_obj;
- assert(os_ref_get_count(&device_object->ref_count) > 0);
+ assert(device_pager_get_refcount(device_object) > 0);
return device_object;
}
device_pager_t device_object;
device_object = device_pager_lookup(mem_obj);
- os_ref_retain(&device_object->ref_count);
+ os_ref_retain_raw(&device_object->dev_pgr_hdr_ref, NULL);
DTRACE_VM2(device_pager_reference,
device_pager_t, device_object,
- unsigned int, os_ref_get_count(&device_object->ref_count));
+ unsigned int, device_pager_get_refcount(device_object));
}
/*
{
device_pager_t device_object;
memory_object_control_t device_control;
+ os_ref_count_t ref_count;
device_object = device_pager_lookup(mem_obj);
DTRACE_VM2(device_pager_deallocate,
device_pager_t, device_object,
- unsigned int, os_ref_get_count(&device_object->ref_count));
+ unsigned int, device_pager_get_refcount(device_object));
- os_ref_count_t ref_count = os_ref_release(&device_object->ref_count);
+ ref_count = os_ref_release_raw(&device_object->dev_pgr_hdr_ref, NULL);
if (ref_count == 1) {
/*
DTRACE_VM2(device_pager_destroy,
device_pager_t, device_object,
- unsigned int, os_ref_get_count(&device_object->ref_count));
+ unsigned int, device_pager_get_refcount(device_object));
assert(device_object->is_mapped == FALSE);
if (device_object->device_handle != (device_port_t) NULL) {
*/
DTRACE_VM2(device_pager_free,
device_pager_t, device_object,
- unsigned int, os_ref_get_count(&device_object->ref_count));
+ unsigned int, device_pager_get_refcount(device_object));
+ device_control = device_object->dev_pgr_hdr.mo_control;
+
+ if (device_control != MEMORY_OBJECT_CONTROL_NULL) {
+ memory_object_control_deallocate(device_control);
+ device_object->dev_pgr_hdr.mo_control = MEMORY_OBJECT_CONTROL_NULL;
+ }
device_pager_lock_destroy(device_object);
zfree(device_pager_zone, device_object);
device_object = device_pager_lookup(mem_obj);
device_pager_lock(device_object);
- assert(os_ref_get_count(&device_object->ref_count) > 0);
+ assert(device_pager_get_refcount(device_object) > 0);
if (device_object->is_mapped == FALSE) {
/*
* First mapping of this pager: take an extra reference
device_object = device_pager_lookup(mem_obj);
device_pager_lock(device_object);
- assert(os_ref_get_count(&device_object->ref_count) > 0);
+ assert(device_pager_get_refcount(device_object) > 0);
if (device_object->is_mapped) {
device_object->is_mapped = FALSE;
drop_ref = TRUE;
device_object->dev_pgr_hdr.mo_control = MEMORY_OBJECT_CONTROL_NULL;
device_pager_lock_init(device_object);
- os_ref_init(&device_object->ref_count, NULL);
+ os_ref_init_raw(&device_object->dev_pgr_hdr_ref, NULL);
device_object->is_mapped = FALSE;
DTRACE_VM2(device_pager_create,
device_pager_t, device_object,
- unsigned int, os_ref_get_count(&device_object->ref_count));
+ unsigned int, device_pager_get_refcount(device_object));
return device_object;
}
#include "lz4_assembly_select.h"
#include "lz4_constants.h"
+#if CONFIG_IO_COMPRESSION_STATS
+#include <string.h>
+#else
#define memcpy __builtin_memcpy
+#endif
#pragma mark - Building blocks
vm_object_reference(object);
named_entry_unlock(named_entry);
} else if (ip_kotype(port) == IKOT_MEM_OBJ_CONTROL) {
- memory_object_control_t control;
- control = (memory_object_control_t) port;
- if (control == NULL) {
- return KERN_INVALID_ARGUMENT;
- }
- object = memory_object_control_to_vm_object(control);
- if (object == VM_OBJECT_NULL) {
- return KERN_INVALID_ARGUMENT;
- }
- vm_object_reference(object);
+ panic("unexpected IKOT_MEM_OBJ_CONTROL: %p", port);
} else {
return KERN_INVALID_ARGUMENT;
}
return KERN_INVALID_HOST;
}
- assert(host_priv == &realhost);
-
new_manager = *default_manager;
lck_mtx_lock(&memory_manager_default_lock);
current_manager = memory_manager_default;
return object->object_is_shared_cache;
}
-static ZONE_DECLARE(mem_obj_control_zone, "mem_obj_control",
- sizeof(struct memory_object_control), ZC_NOENCRYPT);
-
__private_extern__ memory_object_control_t
memory_object_control_allocate(
vm_object_t object)
{
- memory_object_control_t control;
-
- control = (memory_object_control_t)zalloc(mem_obj_control_zone);
- if (control != MEMORY_OBJECT_CONTROL_NULL) {
- control->moc_object = object;
- control->moc_ikot = IKOT_MEM_OBJ_CONTROL; /* fake ip_kotype */
- }
- return control;
+ return object;
}
__private_extern__ void
memory_object_control_collapse(
- memory_object_control_t control,
+ memory_object_control_t *control,
vm_object_t object)
{
- assert((control->moc_object != VM_OBJECT_NULL) &&
- (control->moc_object != object));
- control->moc_object = object;
+ *control = object;
}
__private_extern__ vm_object_t
memory_object_control_to_vm_object(
memory_object_control_t control)
{
- if (control == MEMORY_OBJECT_CONTROL_NULL ||
- control->moc_ikot != IKOT_MEM_OBJ_CONTROL) {
- return VM_OBJECT_NULL;
- }
-
- return control->moc_object;
+ return control;
}
__private_extern__ vm_object_t
*/
void
memory_object_control_deallocate(
- memory_object_control_t control)
+ __unused memory_object_control_t control)
{
- zfree(mem_obj_control_zone, control);
}
void
memory_object_control_disable(
- memory_object_control_t control)
+ memory_object_control_t *control)
{
- assert(control->moc_object != VM_OBJECT_NULL);
- control->moc_object = VM_OBJECT_NULL;
+ assert(*control != VM_OBJECT_NULL);
+ *control = VM_OBJECT_NULL;
}
void
__private_extern__
void memory_object_control_collapse(
- memory_object_control_t control,
+ memory_object_control_t *control,
vm_object_t object);
__private_extern__
memory_object_control_t control);
extern void memory_object_control_disable(
- memory_object_control_t control);
+ memory_object_control_t *control);
extern
memory_object_control_t convert_port_to_mo_control(
extern void *pmap_steal_freeable_memory(vm_size_t size); /* Early memory allocation */
extern uint_t pmap_free_pages(void); /* report remaining unused physical pages */
+#if defined(__arm__) || defined(__arm64__)
+extern uint_t pmap_free_pages_span(void); /* report phys address range of unused physical pages */
+#endif /* defined(__arm__) || defined(__arm64__) */
extern void pmap_startup(vm_offset_t *startp, vm_offset_t *endp); /* allocate vm_page structs */
extern uint32_t pmap_lookup_in_static_trust_cache(const uint8_t cdhash[CS_CDHASH_LEN]);
extern bool pmap_lookup_in_loaded_trust_caches(const uint8_t cdhash[CS_CDHASH_LEN]);
+extern void pmap_set_compilation_service_cdhash(const uint8_t cdhash[CS_CDHASH_LEN]);
+extern bool pmap_match_compilation_service_cdhash(const uint8_t cdhash[CS_CDHASH_LEN]);
+
extern bool pmap_in_ppl(void);
extern void *pmap_claim_reserved_ppl_page(void);
extern ledger_t pmap_ledger_alloc(void);
extern void pmap_ledger_free(ledger_t);
+extern bool pmap_is_bad_ram(ppnum_t ppn);
+extern void pmap_retire_page(ppnum_t ppn);
extern kern_return_t pmap_cs_allow_invalid(pmap_t pmap);
#if __arm64__
#include <vm/vm_protos.h>
#include <vm/vm_kern.h>
-
/*
* APPLE PROTECT MEMORY PAGER
*
*/
typedef struct apple_protect_pager {
/* mandatory generic header */
- struct memory_object ap_pgr_hdr;
+ struct memory_object ap_pgr_hdr;
/* pager-specific data */
queue_chain_t pager_queue; /* next & prev pagers */
- struct os_refcnt ref_count; /* reference count */
- boolean_t is_ready; /* is this pager ready ? */
- boolean_t is_mapped; /* is this mem_obj mapped ? */
+#if MEMORY_OBJECT_HAS_REFCOUNT
+#define ap_pgr_hdr_ref ap_pgr_hdr.mo_ref
+#else
+ os_ref_atomic_t ap_pgr_hdr_ref; /* reference count */
+#endif
+ bool is_ready; /* is this pager ready ? */
+ bool is_mapped; /* is this mem_obj mapped ? */
+ bool is_cached; /* is this pager cached ? */
vm_object_t backing_object; /* VM obj w/ encrypted data */
vm_object_offset_t backing_offset;
vm_object_offset_t crypto_backing_offset; /* for key... */
* List of memory objects managed by this EMM.
* The list is protected by the "apple_protect_pager_lock" lock.
*/
-int apple_protect_pager_count = 0; /* number of pagers */
-int apple_protect_pager_count_mapped = 0; /* number of unmapped pagers */
+unsigned int apple_protect_pager_count = 0; /* number of pagers */
+unsigned int apple_protect_pager_count_mapped = 0; /* number of unmapped pagers */
queue_head_t apple_protect_pager_queue = QUEUE_HEAD_INITIALIZER(apple_protect_pager_queue);
LCK_GRP_DECLARE(apple_protect_pager_lck_grp, "apple_protect");
LCK_MTX_DECLARE(apple_protect_pager_lock, &apple_protect_pager_lck_grp);
/*
* Maximum number of unmapped pagers we're willing to keep around.
*/
-int apple_protect_pager_cache_limit = 20;
+unsigned int apple_protect_pager_cache_limit = 20;
/*
* Statistics & counters.
*/
-int apple_protect_pager_count_max = 0;
-int apple_protect_pager_count_unmapped_max = 0;
-int apple_protect_pager_num_trim_max = 0;
-int apple_protect_pager_num_trim_total = 0;
+unsigned int apple_protect_pager_count_max = 0;
+unsigned int apple_protect_pager_count_unmapped_max = 0;
+unsigned int apple_protect_pager_num_trim_max = 0;
+unsigned int apple_protect_pager_num_trim_total = 0;
vm_object_offset_t crypto_backing_offset,
struct pager_crypt_info *crypt_info,
vm_object_offset_t crypto_start,
- vm_object_offset_t crypto_end);
+ vm_object_offset_t crypto_end,
+ boolean_t cache_pager);
apple_protect_pager_t apple_protect_pager_lookup(memory_object_t mem_obj);
void apple_protect_pager_dequeue(apple_protect_pager_t pager);
void apple_protect_pager_deallocate_internal(apple_protect_pager_t pager,
pager = apple_protect_pager_lookup(mem_obj);
assert(pager->is_ready);
- assert(os_ref_get_count(&pager->ref_count) > 1); /* pager is alive and mapped */
+ assert(os_ref_get_count_raw(&pager->ap_pgr_hdr_ref) > 1); /* pager is alive and mapped */
PAGER_DEBUG(PAGER_PAGEIN, ("apple_protect_pager_data_request: %p, %llx, %x, %x, pager %p\n", mem_obj, offset, length, protection_required, pager));
retval = kr;
goto done;
}
- dst_object = mo_control->moc_object;
+ dst_object = memory_object_control_to_vm_object(mo_control);
assert(dst_object != VM_OBJECT_NULL);
/*
pager = apple_protect_pager_lookup(mem_obj);
lck_mtx_lock(&apple_protect_pager_lock);
- os_ref_retain_locked(&pager->ref_count);
+ os_ref_retain_locked_raw(&pager->ap_pgr_hdr_ref, NULL);
lck_mtx_unlock(&apple_protect_pager_lock);
}
boolean_t locked)
{
boolean_t needs_trimming;
- int count_unmapped;
+ unsigned int count_unmapped;
+ os_ref_count_t ref_count;
if (!locked) {
lck_mtx_lock(&apple_protect_pager_lock);
}
/* drop a reference on this pager */
- os_ref_count_t ref_count = os_ref_release_locked(&pager->ref_count);
+ ref_count = os_ref_release_locked_raw(&pager->ap_pgr_hdr_ref, NULL);
if (ref_count == 1) {
/*
lck_mtx_lock(&apple_protect_pager_lock);
assert(pager->is_ready);
- assert(os_ref_get_count(&pager->ref_count) > 0); /* pager is alive */
+ assert(os_ref_get_count_raw(&pager->ap_pgr_hdr_ref) > 0); /* pager is alive */
if (pager->is_mapped == FALSE) {
/*
* First mapping of this pager: take an extra reference
* are removed.
*/
pager->is_mapped = TRUE;
- os_ref_retain_locked(&pager->ref_count);
+ os_ref_retain_locked_raw(&pager->ap_pgr_hdr_ref, NULL);
apple_protect_pager_count_mapped++;
}
lck_mtx_unlock(&apple_protect_pager_lock);
memory_object_t mem_obj)
{
apple_protect_pager_t pager;
- int count_unmapped;
+ unsigned int count_unmapped;
PAGER_DEBUG(PAGER_ALL,
("apple_protect_pager_last_unmap: %p\n", mem_obj));
assert(mem_obj->mo_pager_ops == &apple_protect_pager_ops);
pager = (apple_protect_pager_t)(uintptr_t) mem_obj;
- assert(os_ref_get_count(&pager->ref_count) > 0);
+ assert(os_ref_get_count_raw(&pager->ap_pgr_hdr_ref) > 0);
return pager;
}
vm_object_offset_t crypto_backing_offset,
struct pager_crypt_info *crypt_info,
vm_object_offset_t crypto_start,
- vm_object_offset_t crypto_end)
+ vm_object_offset_t crypto_end,
+ boolean_t cache_pager)
{
apple_protect_pager_t pager, pager2;
memory_object_control_t control;
pager->ap_pgr_hdr.mo_control = MEMORY_OBJECT_CONTROL_NULL;
pager->is_ready = FALSE;/* not ready until it has a "name" */
- os_ref_init_count(&pager->ref_count, NULL, 2); /* existence reference (for the cache) and another for the caller */
+ /* one reference for the caller */
+ os_ref_init_count_raw(&pager->ap_pgr_hdr_ref, NULL, 1);
pager->is_mapped = FALSE;
+ if (cache_pager) {
+ /* extra reference for the cache */
+ os_ref_retain_locked_raw(&pager->ap_pgr_hdr_ref, NULL);
+ pager->is_cached = true;
+ } else {
+ pager->is_cached = false;
+ }
pager->backing_object = backing_object;
pager->backing_offset = backing_offset;
pager->crypto_backing_offset = crypto_backing_offset;
vm_object_offset_t crypto_backing_offset,
struct pager_crypt_info *crypt_info,
vm_object_offset_t crypto_start,
- vm_object_offset_t crypto_end)
+ vm_object_offset_t crypto_end,
+ boolean_t cache_pager)
{
apple_protect_pager_t pager;
struct pager_crypt_info *old_crypt_info, *new_crypt_info;
crypt_info_deallocate(old_crypt_info);
assert(old_crypt_info->crypt_refcnt > 0);
/* give extra reference on pager to the caller */
- os_ref_retain_locked(&pager->ref_count);
+ os_ref_retain_locked_raw(&pager->ap_pgr_hdr_ref, NULL);
break;
}
}
crypto_backing_offset,
new_crypt_info,
crypto_start,
- crypto_end);
+ crypto_end,
+ cache_pager);
}
if (pager == APPLE_PROTECT_PAGER_NULL) {
/* could not create a new pager */
{
apple_protect_pager_t pager, prev_pager;
queue_head_t trim_queue;
- int num_trim;
- int count_unmapped;
+ unsigned int num_trim;
+ unsigned int count_unmapped;
lck_mtx_lock(&apple_protect_pager_lock);
prev_pager = (apple_protect_pager_t)
queue_prev(&pager->pager_queue);
- if (os_ref_get_count(&pager->ref_count) == 2 &&
+ if (pager->is_cached &&
+ os_ref_get_count_raw(&pager->ap_pgr_hdr_ref) == 2 &&
pager->is_ready &&
!pager->is_mapped) {
/* this pager can be trimmed */
pager,
apple_protect_pager_t,
pager_queue);
+ assert(pager->is_cached);
+ pager->is_cached = false;
pager->pager_queue.next = NULL;
pager->pager_queue.prev = NULL;
/*
* has already been dequeued, but we still need to remove
* a reference.
*/
- os_ref_count_t __assert_only count = os_ref_release_locked(&pager->ref_count);
+ os_ref_count_t __assert_only count;
+ count = os_ref_release_locked_raw(&pager->ap_pgr_hdr_ref, NULL);
assert(count == 1);
apple_protect_pager_terminate_internal(pager);
}
* the boot-arg & device-tree code.
*/
-#if CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
#if CONFIG_FREEZE
int vm_compressor_mode = VM_PAGER_FREEZER_DEFAULT;
int vm_compressor_mode = VM_PAGER_NOT_CONFIGURED;
#endif /* CONFIG_FREEZE */
-#else /* CONFIG_EMBEDDED */
+#else /* !XNU_TARGET_OS_OSX */
int vm_compressor_mode = VM_PAGER_COMPRESSOR_WITH_SWAP;
-#endif /* CONFIG_EMBEDDED */
+#endif /* !XNU_TARGET_OS_OSX */
TUNABLE(uint32_t, vm_compression_limit, "vm_compression_limit", 0);
int vm_compressor_is_active = 0;
static void vm_compressor_compact_and_swap(boolean_t);
static void vm_compressor_age_swapped_in_segments(boolean_t);
-#if !CONFIG_EMBEDDED
+#if XNU_TARGET_OS_OSX
static void vm_compressor_take_paging_space_action(void);
-#endif
+#endif /* XNU_TARGET_OS_OSX */
void compute_swapout_target_age(void);
TUNABLE(bool, kill_on_no_paging_space, "-kill_on_no_paging_space", false);
#endif /* DEVELOPMENT || DEBUG */
-#if !CONFIG_EMBEDDED
+#if XNU_TARGET_OS_OSX
static uint32_t no_paging_space_action_in_progress = 0;
extern void memorystatus_send_low_swap_note(void);
}
}
}
-#endif /* !CONFIG_EMBEDDED */
+#endif /* XNU_TARGET_OS_OSX */
void
assert((C_SEGMENTS_PER_PAGE * sizeof(union c_segu)) == PAGE_SIZE);
-#ifdef CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
vm_compressor_minorcompact_threshold_divisor = 20;
vm_compressor_majorcompact_threshold_divisor = 30;
vm_compressor_unthrottle_threshold_divisor = 40;
vm_compressor_catchup_threshold_divisor = 60;
-#else
+#else /* !XNU_TARGET_OS_OSX */
if (max_mem <= (3ULL * 1024ULL * 1024ULL * 1024ULL)) {
vm_compressor_minorcompact_threshold_divisor = 11;
vm_compressor_majorcompact_threshold_divisor = 13;
vm_compressor_unthrottle_threshold_divisor = 35;
vm_compressor_catchup_threshold_divisor = 50;
}
-#endif
+#endif /* !XNU_TARGET_OS_OSX */
queue_init(&c_bad_list_head);
queue_init(&c_age_list_head);
compressor_pool_max_size = C_SEG_MAX_LIMIT;
compressor_pool_max_size *= C_SEG_BUFSIZE;
-#if !CONFIG_EMBEDDED
+#if XNU_TARGET_OS_OSX
if (vm_compression_limit == 0) {
if (max_mem <= (4ULL * 1024ULL * 1024ULL * 1024ULL)) {
{
int old_state = c_seg->c_state;
-#if !CONFIG_EMBEDDED
+#if XNU_TARGET_OS_OSX
#if DEVELOPMENT || DEBUG
if (new_state != C_IS_FILLING) {
LCK_MTX_ASSERT(&c_seg->c_lock, LCK_MTX_ASSERT_OWNED);
}
LCK_MTX_ASSERT(c_list_lock, LCK_MTX_ASSERT_OWNED);
#endif
-#endif /* !CONFIG_EMBEDDED */
+#endif /* XNU_TARGET_OS_OSX */
switch (old_state) {
case C_IS_EMPTY:
assert(new_state == C_IS_FILLING || new_state == C_IS_FREE);
goto check_if_low_space;
}
}
+
+#if (XNU_TARGET_OS_OSX && __arm64__)
+ /*
+ * Thrashing detection disabled.
+ */
+#else /* (XNU_TARGET_OS_OSX && __arm64__) */
+
compute_swapout_target_age();
if (swapout_target_age) {
if (swapout_target_age) {
should_swap = TRUE;
}
+#endif /* (XNU_TARGET_OS_OSX && __arm64__) */
check_if_low_space:
VM_DEBUG_CONSTANT_EVENT(vm_compressor_do_delayed_compactions, VM_COMPRESSOR_DO_DELAYED_COMPACTIONS, DBG_FUNC_START, c_minor_count, flush_all, 0, 0);
-#if !CONFIG_EMBEDDED
+#if XNU_TARGET_OS_OSX
LCK_MTX_ASSERT(c_list_lock, LCK_MTX_ASSERT_OWNED);
-#endif /* !CONFIG_EMBEDDED */
+#endif /* XNU_TARGET_OS_OSX */
while (!queue_empty(&c_minor_list_head) && needs_to_swap == FALSE) {
c_seg = (c_segment_t)queue_first(&c_minor_list_head);
int min_needed;
int size_to_populate;
-#if !CONFIG_EMBEDDED
+#if XNU_TARGET_OS_OSX
if (vm_compressor_low_on_space()) {
vm_compressor_take_paging_space_action();
}
-#endif /* !CONFIG_EMBEDDED */
+#endif /* XNU_TARGET_OS_OSX */
if ((c_seg = *current_chead) == NULL) {
uint32_t c_segno;
vm_swap_consider_defragmenting(VM_SWAP_FLAGS_NONE);
}
-#if CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
if ((c_minor_count && COMPRESSOR_NEEDS_TO_MINOR_COMPACT()) || vm_compressor_needs_to_major_compact()) {
vm_wake_compactor_swapper();
}
-#endif
+#endif /* !XNU_TARGET_OS_OSX */
return retval;
}
#define VM_PAGE_COMPRESSOR_SWAP_CATCHUP_THRESHOLD (((AVAILABLE_MEMORY) * 10) / (vm_compressor_catchup_threshold_divisor ? vm_compressor_catchup_threshold_divisor : 10))
#define VM_PAGE_COMPRESSOR_HARD_THROTTLE_THRESHOLD (((AVAILABLE_MEMORY) * 9) / (vm_compressor_catchup_threshold_divisor ? vm_compressor_catchup_threshold_divisor : 9))
-#ifdef CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
#define AVAILABLE_NON_COMPRESSED_MIN 20000
#define COMPRESSOR_NEEDS_TO_SWAP() (((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_SWAP_THRESHOLD) || \
(AVAILABLE_NON_COMPRESSED_MEMORY < AVAILABLE_NON_COMPRESSED_MIN)) ? 1 : 0)
-#else
+#else /* !XNU_TARGET_OS_OSX */
#define COMPRESSOR_NEEDS_TO_SWAP() ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_SWAP_THRESHOLD) ? 1 : 0)
-#endif
+#endif /* !XNU_TARGET_OS_OSX */
#define HARD_THROTTLE_LIMIT_REACHED() ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_HARD_THROTTLE_THRESHOLD) ? 1 : 0)
#define SWAPPER_NEEDS_TO_UNTHROTTLE() ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) ? 1 : 0)
#define COMPRESSOR_NEEDS_TO_MINOR_COMPACT() ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) ? 1 : 0)
-#ifdef CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
#define COMPRESSOR_FREE_RESERVED_LIMIT 28
-#else
+#else /* !XNU_TARGET_OS_OSX */
#define COMPRESSOR_FREE_RESERVED_LIMIT 128
-#endif
+#endif /* !XNU_TARGET_OS_OSX */
uint32_t vm_compressor_get_encode_scratch_size(void) __pure2;
uint32_t vm_compressor_get_decode_scratch_size(void) __pure2;
boolean_t vm_swap_force_defrag = FALSE, vm_swap_force_reclaim = FALSE;
-#if CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
/*
* For CONFIG_FREEZE, we scale the c_segments_limit based on the
((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
#define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
-#else /* CONFIG_EMBEDDED */
+#else /* !XNU_TARGET_OS_OSX */
#define VM_MAX_SWAP_FILE_NUM 100
#define VM_SWAPFILE_DELAYED_TRIM_MAX 128
((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
#define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
-#endif /* CONFIG_EMBEDDED */
+#endif /* !XNU_TARGET_OS_OSX */
#define VM_SWAP_SHOULD_RECLAIM() (((vm_swap_force_reclaim == TRUE) || ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) >= SWAPFILE_RECLAIM_THRESHOLD_SEGS)) ? 1 : 0)
#define VM_SWAP_SHOULD_ABORT_RECLAIM() (((vm_swap_force_reclaim == FALSE) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) <= SWAPFILE_RECLAIM_MINIMUM_SEGS)) ? 1 : 0)
proc_set_thread_policy_with_tid(kernel_task, thread->thread_id,
TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
-#if CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
/*
* dummy value until the swap file gets created
* when we drive the first c_segment_t to the
* know the true size we have to work with
*/
c_overage_swapped_limit = 16;
-#endif
+#endif /* !XNU_TARGET_OS_OSX */
vm_num_swap_files_config = VM_MAX_SWAP_FILE_NUM;
+#if DEVELOPMENT || DEBUG
+ typeof(vm_num_swap_files_config) parsed_vm_max_num_swap_files = 0;
+ if (PE_parse_boot_argn("vm_max_num_swap_files", &parsed_vm_max_num_swap_files, sizeof(parsed_vm_max_num_swap_files))) {
+ if (parsed_vm_max_num_swap_files > 0) {
+ vm_num_swap_files_config = parsed_vm_max_num_swap_files;
+ } else {
+ printf("WARNING: Ignoring vm_max_num_swap_files=%d boot-arg. Value must be > 0\n", parsed_vm_max_num_swap_files);
+ }
+ }
+#endif
+ printf("Maximum number of VM swap files: %d\n", vm_num_swap_files_config);
printf("VM Swap Subsystem is ON\n");
}
vm_compressor_catchup_threshold_divisor = 30;
}
}
-#if !CONFIG_EMBEDDED
+#if XNU_TARGET_OS_OSX
vnode_setswapmount(vp);
vm_swappin_avail = vnode_getswappin_avail(vp);
if (vm_swappin_avail) {
vm_swappin_enabled = TRUE;
}
-#endif
+#endif /* XNU_TARGET_OS_OSX */
vm_swapfile_close((uint64_t)pathname, vp);
}
kheap_free(KHEAP_TEMP, pathname, namelen);
c_seg->c_store.c_swap_handle = f_offset;
- VM_STAT_INCR_BY(swapouts, size >> PAGE_SHIFT);
+ counter_add(&vm_statistics_swapouts, size >> PAGE_SHIFT);
if (c_seg->c_bytes_used) {
OSAddAtomic64(-c_seg->c_bytes_used, &compressor_bytes_used);
lck_mtx_unlock(&vm_swap_data_lock);
thread_wakeup((event_t) &vm_num_swap_files);
-#if CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
if (vm_num_swap_files == 1) {
c_overage_swapped_limit = (uint32_t)size / C_SEG_BUFSIZE;
c_overage_swapped_limit /= 2;
}
}
-#endif
+#endif /* !XNU_TARGET_OS_OSX */
break;
} else {
size = size / 2;
C_SEG_WRITE_PROTECT(c_seg);
#endif
if (retval == 0) {
- VM_STAT_INCR_BY(swapins, size >> PAGE_SHIFT);
+ counter_add(&vm_statistics_swapins, size >> PAGE_SHIFT);
} else {
vm_swap_get_failures++;
}
vnode_put(swf->swp_vp);
}
- VM_STAT_INCR_BY(swapins, c_size >> PAGE_SHIFT);
+ counter_add(&vm_statistics_swapins, c_size >> PAGE_SHIFT);
if (vm_swap_put(addr, &f_offset, c_size, c_seg, NULL)) {
vm_offset_t c_buffer;
goto swap_io_failed;
}
- VM_STAT_INCR_BY(swapouts, c_size >> PAGE_SHIFT);
+ counter_add(&vm_statistics_swapouts, c_size >> PAGE_SHIFT);
lck_mtx_lock_spin_always(&c_seg->c_lock);
#include <libkern/crypto/aes.h>
#include <kern/host_statistics.h>
-#if CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
#define MIN_SWAP_FILE_SIZE (64 * 1024 * 1024ULL)
#define MAX_SWAP_FILE_SIZE (128 * 1024 * 1024ULL)
-#else /* CONFIG_EMBEDDED */
+#else /* !XNU_TARGET_OS_OSX */
#define MIN_SWAP_FILE_SIZE (256 * 1024 * 1024ULL)
#define MAX_SWAP_FILE_SIZE (1 * 1024 * 1024 * 1024ULL)
-#endif /* CONFIG_EMBEDDED */
+#endif /* !XNU_TARGET_OS_OSX */
#define COMPRESSED_SWAP_CHUNK_SIZE (C_SEG_BUFSIZE)
/* pager-specific data */
lck_mtx_t cpgr_lock;
- unsigned int cpgr_references;
+#if MEMORY_OBJECT_HAS_REFCOUNT
+#define cpgr_references cpgr_hdr.mo_ref
+#else
+ os_ref_atomic_t cpgr_references;
+#endif
unsigned int cpgr_num_slots;
unsigned int cpgr_num_slots_occupied;
union {
}
compressor_pager_lock(pager);
- assert(pager->cpgr_references > 0);
- pager->cpgr_references++;
+ os_ref_retain_locked_raw(&pager->cpgr_references, NULL);
compressor_pager_unlock(pager);
}
}
compressor_pager_lock(pager);
- if (--pager->cpgr_references > 0) {
+ if (os_ref_release_locked_raw(&pager->cpgr_references, NULL) > 0) {
compressor_pager_unlock(pager);
return;
}
}
compressor_pager_lock_init(pager);
- pager->cpgr_references = 1;
+ os_ref_init_raw(&pager->cpgr_references, NULL);
pager->cpgr_num_slots = (uint32_t)(new_size / PAGE_SIZE);
pager->cpgr_num_slots_occupied = 0;
sizeof(struct compressor_pager), ZC_NOENCRYPT,
ZONE_ID_ANY, ^(zone_t z){
#if defined(__LP64__)
- zone_set_submap_idx(z, Z_SUBMAP_IDX_VA_RESTRICTED_MAP);
+ zone_set_submap_idx(z, Z_SUBMAP_IDX_VA_RESTRICTED);
#else
(void)z;
#endif /* defined(__LP64__) */
compressor_slots_zones_names[idx],
compressor_slots_zones_sizes[idx], ZC_NONE,
ZONE_ID_ANY, ^(zone_t z){
- zone_set_submap_idx(z, Z_SUBMAP_IDX_VA_RESTRICTED_MAP);
+ zone_set_submap_idx(z, Z_SUBMAP_IDX_VA_RESTRICTED);
});
}
#endif /* defined(__LP64__) */
#include <kern/kern_types.h>
#include <kern/host_statistics.h>
-#include <kern/counters.h>
+#include <kern/counter.h>
#include <kern/task.h>
#include <kern/thread.h>
#include <kern/sched_prim.h>
uint64_t vm_hard_throttle_threshold;
+#if DEBUG || DEVELOPMENT
+static bool vmtc_panic_instead = false;
+#endif /* DEBUG || DEVELOPMENT */
OS_ALWAYS_INLINE
boolean_t
#define VM_STAT_DECOMPRESSIONS() \
MACRO_BEGIN \
- VM_STAT_INCR(decompressions); \
+ counter_inc(&vm_statistics_decompressions); \
current_thread()->decompressions++; \
MACRO_END
PE_parse_boot_argn("vm_protect_privileged_from_untrusted",
&vm_protect_privileged_from_untrusted,
sizeof(vm_protect_privileged_from_untrusted));
+
+#if DEBUG || DEVELOPMENT
+ (void)PE_parse_boot_argn("text_corruption_panic", &vmtc_panic_instead, sizeof(vmtc_panic_instead));
+#endif /* DEBUG || DEVELOPMENT */
}
__startup_func
} else {
vm_page_zero_fill(m);
- VM_STAT_INCR(zero_fill_count);
+ counter_inc(&vm_statistics_zero_fill_count);
DTRACE_VM2(zfod, int, 1, (uint64_t *), NULL);
}
assert(!m->vmp_laundry);
#endif
wait_result = PAGE_SLEEP(object, m, interruptible);
- counter(c_vm_fault_page_block_busy_kernel++);
-
if (wait_result != THREAD_AWAKENED) {
vm_fault_cleanup(object, first_m);
thread_interrupt_level(interruptible_state);
vm_fault_cleanup(object, first_m);
- counter(c_vm_fault_page_block_backoff_kernel++);
vm_object_lock(object);
assert(object->ref_count > 0);
*/
vm_object_reference_locked(object);
vm_fault_cleanup(object, first_m);
- counter(c_vm_fault_page_block_backoff_kernel++);
vm_object_lock(object);
assert(object->ref_count > 0);
vm_fault_cleanup(object, first_m);
- counter(c_vm_fault_page_block_backoff_kernel++);
-
vm_object_lock(object);
assert(object->ref_count > 0);
vm_object_unlock(object);
my_fault = DBG_COW_FAULT;
- VM_STAT_INCR(cow_faults);
+ counter_inc(&vm_statistics_cow_faults);
DTRACE_VM2(cow_fault, int, 1, (uint64_t *), NULL);
current_task()->cow_faults++;
vm_object_reference_locked(copy_object);
vm_object_unlock(copy_object);
vm_fault_cleanup(object, first_m);
- counter(c_vm_fault_page_block_backoff_kernel++);
vm_object_lock(copy_object);
assert(copy_object->ref_count > 0);
- VM_OBJ_RES_DECR(copy_object);
vm_object_lock_assert_exclusive(copy_object);
copy_object->ref_count--;
assert(copy_object->ref_count > 0);
if (copy_m == VM_PAGE_NULL) {
RELEASE_PAGE(m);
- VM_OBJ_RES_DECR(copy_object);
vm_object_lock_assert_exclusive(copy_object);
copy_object->ref_count--;
assert(copy_object->ref_count > 0);
copy_object->ref_count--;
assert(copy_object->ref_count > 0);
- VM_OBJ_RES_DECR(copy_object);
vm_object_unlock(copy_object);
break;
fault_type = (change_wiring ? VM_PROT_NONE : caller_prot);
- VM_STAT_INCR(faults);
- current_task()->faults++;
+ counter_inc(&vm_statistics_faults);
+ counter_inc(¤t_task()->faults);
original_fault_type = fault_type;
need_copy = FALSE;
if (result == THREAD_WAITING) {
result = thread_block(THREAD_CONTINUE_NULL);
-
- counter(c_vm_fault_page_block_busy_kernel++);
}
if (result == THREAD_AWAKENED || result == THREAD_RESTART) {
goto RetryFault;
vm_fault_collapse_total++;
type_of_fault = DBG_COW_FAULT;
- VM_STAT_INCR(cow_faults);
+ counter_inc(&vm_statistics_cow_faults);
DTRACE_VM2(cow_fault, int, 1, (uint64_t *), NULL);
current_task()->cow_faults++;
* lock across the zero fill.
*/
vm_page_zero_fill(m);
- VM_STAT_INCR(zero_fill_count);
+ counter_inc(&vm_statistics_zero_fill_count);
DTRACE_VM2(zfod, int, 1, (uint64_t *), NULL);
}
if (page_needs_data_sync) {
vm_map_offset_t fault_phys_offset;
struct vm_object_fault_info fault_info = {};
- VM_STAT_INCR(faults);
+ counter_inc(&vm_statistics_faults);
if (thread != THREAD_NULL && thread->task != TASK_NULL) {
- thread->task->faults++;
+ counter_inc(&thread->task->faults);
}
/*
}
}
-void
-vm_page_validate_cs(
- vm_page_t page,
- vm_map_size_t fault_page_size,
- vm_map_offset_t fault_phys_offset)
+static void
+vm_page_map_and_validate_cs(
+ vm_object_t object,
+ vm_page_t page)
{
- vm_object_t object;
vm_object_offset_t offset;
vm_map_offset_t koffset;
vm_map_size_t ksize;
boolean_t busy_page;
boolean_t need_unmap;
- object = VM_PAGE_OBJECT(page);
- vm_object_lock_assert_held(object);
-
- if (vm_page_validate_cs_fast(page, fault_page_size, fault_phys_offset)) {
- return;
- }
vm_object_lock_assert_exclusive(object);
assert(object->code_signed);
vm_object_paging_end(object);
}
+void
+vm_page_validate_cs(
+ vm_page_t page,
+ vm_map_size_t fault_page_size,
+ vm_map_offset_t fault_phys_offset)
+{
+ vm_object_t object;
+
+ object = VM_PAGE_OBJECT(page);
+ vm_object_lock_assert_held(object);
+
+ if (vm_page_validate_cs_fast(page, fault_page_size, fault_phys_offset)) {
+ return;
+ }
+ vm_page_map_and_validate_cs(object, page);
+}
+
void
vm_page_validate_cs_mapped_chunk(
vm_page_t page,
*vmrtfrv = numextracted;
return early_exit;
}
+
+/*
+ * Only allow one diagnosis to be in flight at a time, to avoid
+ * creating too much additional memory usage.
+ */
+static volatile uint_t vmtc_diagnosing;
+unsigned int vmtc_total;
+unsigned int vmtc_undiagnosed;
+unsigned int vmtc_not_eligible;
+unsigned int vmtc_copyin_fail;
+unsigned int vmtc_not_found;
+unsigned int vmtc_one_bit_flip;
+unsigned int vmtc_byte_counts[MAX_TRACK_POWER2 + 1];
+
+#if DEVELOPMENT || DEBUG
+/*
+ * Keep around the last diagnosed corruption buffers to aid in debugging.
+ */
+static size_t vmtc_last_buffer_size;
+static uint64_t *vmtc_last_before_buffer = NULL;
+static uint64_t *vmtc_last_after_buffer = NULL;
+#endif /* DEVELOPMENT || DEBUG */
+
+/*
+ * Set things up so we can diagnose a potential text page corruption.
+ */
+static uint64_t *
+vmtc_text_page_diagnose_setup(
+ vm_map_offset_t code_addr)
+{
+ uint64_t *buffer;
+ size_t size = MIN(vm_map_page_size(current_map()), PAGE_SIZE);
+
+ (void)OSAddAtomic(1, &vmtc_total);
+
+ /*
+ * If another is being diagnosed, skip this one.
+ */
+ if (!OSCompareAndSwap(0, 1, &vmtc_diagnosing)) {
+ (void)OSAddAtomic(1, &vmtc_undiagnosed);
+ return NULL;
+ }
+
+ /*
+ * Get the contents of the corrupt page.
+ */
+ buffer = kheap_alloc(KHEAP_DEFAULT, size, Z_WAITOK);
+ if (copyin((user_addr_t)vm_map_trunc_page(code_addr, size - 1), buffer, size) != 0) {
+ /* copyin error, so undo things */
+ kheap_free(KHEAP_DEFAULT, buffer, size);
+ (void)OSAddAtomic(1, &vmtc_undiagnosed);
+ ++vmtc_copyin_fail;
+ if (!OSCompareAndSwap(1, 0, &vmtc_diagnosing)) {
+ panic("Bad compare and swap in setup!");
+ }
+ return NULL;
+ }
+ return buffer;
+}
+
+/*
+ * Diagnose the text page by comparing its contents with
+ * the one we've previously saved.
+ */
+static void
+vmtc_text_page_diagnose(
+ vm_map_offset_t code_addr,
+ uint64_t *old_code_buffer)
+{
+ uint64_t *new_code_buffer;
+ size_t size = MIN(vm_map_page_size(current_map()), PAGE_SIZE);
+ uint_t count = (uint_t)size / sizeof(uint64_t);
+ uint_t diff_count = 0;
+ bool bit_flip = false;
+ uint_t b;
+ uint64_t *new;
+ uint64_t *old;
+
+ new_code_buffer = kheap_alloc(KHEAP_DEFAULT, size, Z_WAITOK);
+ if (copyin((user_addr_t)vm_map_trunc_page(code_addr, size - 1), new_code_buffer, size) != 0) {
+ /* copyin error, so undo things */
+ (void)OSAddAtomic(1, &vmtc_undiagnosed);
+ ++vmtc_copyin_fail;
+ goto done;
+ }
+
+ new = new_code_buffer;
+ old = old_code_buffer;
+ for (; count-- > 0; ++new, ++old) {
+ if (*new == *old) {
+ continue;
+ }
+
+ /*
+ * On first diff, check for a single bit flip
+ */
+ if (diff_count == 0) {
+ uint64_t x = (*new ^ *old);
+ assert(x != 0);
+ if ((x & (x - 1)) == 0) {
+ bit_flip = true;
+ ++diff_count;
+ continue;
+ }
+ }
+
+ /*
+ * count up the number of different bytes.
+ */
+ for (b = 0; b < sizeof(uint64_t); ++b) {
+ char *n = (char *)new;
+ char *o = (char *)old;
+ if (n[b] != o[b]) {
+ ++diff_count;
+ }
+ }
+
+ /* quit counting when too many */
+ if (diff_count > (1 << MAX_TRACK_POWER2)) {
+ break;
+ }
+ }
+
+ if (diff_count > 1) {
+ bit_flip = false;
+ }
+
+ if (diff_count == 0) {
+ ++vmtc_not_found;
+ } else if (bit_flip) {
+ ++vmtc_one_bit_flip;
+ ++vmtc_byte_counts[0];
+ } else {
+ for (b = 0; b <= MAX_TRACK_POWER2; ++b) {
+ if (diff_count <= (1 << b)) {
+ ++vmtc_byte_counts[b];
+ break;
+ }
+ }
+ if (diff_count > (1 << MAX_TRACK_POWER2)) {
+ ++vmtc_byte_counts[MAX_TRACK_POWER2];
+ }
+ }
+
+done:
+ /*
+ * Free up the code copy buffers, but save the last
+ * set on development / debug kernels in case they
+ * can provide evidence for debugging memory stomps.
+ */
+#if DEVELOPMENT || DEBUG
+ if (vmtc_last_before_buffer != NULL) {
+ kheap_free(KHEAP_DEFAULT, vmtc_last_before_buffer, vmtc_last_buffer_size);
+ }
+ if (vmtc_last_after_buffer != NULL) {
+ kheap_free(KHEAP_DEFAULT, vmtc_last_after_buffer, vmtc_last_buffer_size);
+ }
+ vmtc_last_before_buffer = old_code_buffer;
+ vmtc_last_after_buffer = new_code_buffer;
+ vmtc_last_buffer_size = size;
+#else /* DEVELOPMENT || DEBUG */
+ kheap_free(KHEAP_DEFAULT, new_code_buffer, size);
+ kheap_free(KHEAP_DEFAULT, old_code_buffer, size);
+#endif /* DEVELOPMENT || DEBUG */
+
+ /*
+ * We're finished, so clear the diagnosing flag.
+ */
+ if (!OSCompareAndSwap(1, 0, &vmtc_diagnosing)) {
+ panic("Bad compare and swap in diagnose!");
+ }
+}
+
+/*
+ * For the given map, virt address, find the object, offset, and page.
+ * This has to lookup the map entry, verify protections, walk any shadow chains.
+ * If found, returns with the object locked.
+ */
+static kern_return_t
+vmtc_revalidate_lookup(
+ vm_map_t map,
+ vm_map_offset_t vaddr,
+ vm_object_t *ret_object,
+ vm_object_offset_t *ret_offset,
+ vm_page_t *ret_page)
+{
+ vm_object_t object;
+ vm_object_offset_t offset;
+ vm_page_t page;
+ kern_return_t kr = KERN_SUCCESS;
+ uint8_t object_lock_type = OBJECT_LOCK_EXCLUSIVE;
+ vm_map_version_t version;
+ boolean_t wired;
+ struct vm_object_fault_info fault_info = {};
+ vm_map_t real_map = NULL;
+ vm_prot_t prot;
+ vm_object_t shadow;
+
+ /*
+ * Find the object/offset for the given location/map.
+ * Note this returns with the object locked.
+ */
+restart:
+ vm_map_lock_read(map);
+ object = VM_OBJECT_NULL; /* in case we come around the restart path */
+ kr = vm_map_lookup_locked(&map, vaddr, VM_PROT_READ,
+ object_lock_type, &version, &object, &offset, &prot, &wired,
+ &fault_info, &real_map, NULL);
+ vm_map_unlock_read(map);
+ if (real_map != NULL && real_map != map) {
+ vm_map_unlock(real_map);
+ }
+
+ /*
+ * If there's no mapping here, or if we fail because the page
+ * wasn't mapped executable, we can ignore this.
+ */
+ if (kr != KERN_SUCCESS ||
+ object == NULL ||
+ !(prot & VM_PROT_EXECUTE)) {
+ kr = KERN_FAILURE;
+ goto done;
+ }
+
+ /*
+ * Chase down any shadow chains to find the actual page.
+ */
+ for (;;) {
+ /*
+ * See if the page is on the current object.
+ */
+ page = vm_page_lookup(object, vm_object_trunc_page(offset));
+ if (page != NULL) {
+ /* restart the lookup */
+ if (page->vmp_restart) {
+ vm_object_unlock(object);
+ goto restart;
+ }
+
+ /*
+ * If this page is busy, we need to wait for it.
+ */
+ if (page->vmp_busy) {
+ PAGE_SLEEP(object, page, TRUE);
+ vm_object_unlock(object);
+ goto restart;
+ }
+ break;
+ }
+
+ /*
+ * If the object doesn't have the page and
+ * has no shadow, then we can quit.
+ */
+ shadow = object->shadow;
+ if (shadow == NULL) {
+ kr = KERN_FAILURE;
+ goto done;
+ }
+
+ /*
+ * Move to the next object
+ */
+ offset += object->vo_shadow_offset;
+ vm_object_lock(shadow);
+ vm_object_unlock(object);
+ object = shadow;
+ shadow = VM_OBJECT_NULL;
+ }
+ *ret_object = object;
+ *ret_offset = vm_object_trunc_page(offset);
+ *ret_page = page;
+
+done:
+ if (kr != KERN_SUCCESS && object != NULL) {
+ vm_object_unlock(object);
+ }
+ return kr;
+}
+
+/*
+ * Check if a page is wired, needs extra locking.
+ */
+static bool
+is_page_wired(vm_page_t page)
+{
+ bool result;
+ vm_page_lock_queues();
+ result = VM_PAGE_WIRED(page);
+ vm_page_unlock_queues();
+ return result;
+}
+
+/*
+ * A fatal process error has occurred in the given task.
+ * Recheck the code signing of the text page at the given
+ * address to check for a text page corruption.
+ *
+ * Returns KERN_FAILURE if a page was found to be corrupt
+ * by failing to match its code signature. KERN_SUCCESS
+ * means the page is either valid or we don't have the
+ * information to say it's corrupt.
+ */
+kern_return_t
+revalidate_text_page(task_t task, vm_map_offset_t code_addr)
+{
+ kern_return_t kr;
+ vm_map_t map;
+ vm_object_t object = NULL;
+ vm_object_offset_t offset;
+ vm_page_t page = NULL;
+ struct vnode *vnode;
+ bool do_invalidate = false;
+ uint64_t *diagnose_buffer = NULL;
+
+ map = task->map;
+ if (task->map == NULL) {
+ return KERN_SUCCESS;
+ }
+
+ kr = vmtc_revalidate_lookup(map, code_addr, &object, &offset, &page);
+ if (kr != KERN_SUCCESS) {
+ goto done;
+ }
+
+ /*
+ * The object needs to have a pager.
+ */
+ if (object->pager == NULL) {
+ goto done;
+ }
+
+ /*
+ * Needs to be a vnode backed page to have a signature.
+ */
+ vnode = vnode_pager_lookup_vnode(object->pager);
+ if (vnode == NULL) {
+ goto done;
+ }
+
+ /*
+ * Object checks to see if we should proceed.
+ */
+ if (!object->code_signed || /* no code signature to check */
+ object->internal || /* internal objects aren't signed */
+ object->terminating || /* the object and its pages are already going away */
+ !object->pager_ready) { /* this should happen, but check shouldn't hurt */
+ goto done;
+ }
+
+ /*
+ * Check the code signature of the page in question.
+ */
+ vm_page_map_and_validate_cs(object, page);
+
+ /*
+ * At this point:
+ * vmp_cs_validated |= validated (set if a code signature exists)
+ * vmp_cs_tainted |= tainted (set if code signature violation)
+ * vmp_cs_nx |= nx; ??
+ *
+ * if vmp_pmapped then have to pmap_disconnect..
+ * other flags to check on object or page?
+ */
+ if (page->vmp_cs_tainted != VMP_CS_ALL_FALSE) {
+#if DEBUG || DEVELOPMENT
+ /*
+ * On development builds, a boot-arg can be used to cause
+ * a panic, instead of a quiet repair.
+ */
+ if (vmtc_panic_instead) {
+ panic("Text page corruption detected: vm_page_t 0x%llx\n", (long long)(uintptr_t)page);
+ }
+#endif /* DEBUG || DEVELOPMENT */
+
+ /*
+ * We're going to invalidate this page. Mark it as busy so we can
+ * drop the object lock and use copyin() to save its contents.
+ */
+ do_invalidate = true;
+ assert(!page->vmp_busy);
+ page->vmp_busy = TRUE;
+ vm_object_unlock(object);
+ diagnose_buffer = vmtc_text_page_diagnose_setup(code_addr);
+ }
+
+done:
+ if (do_invalidate) {
+ vm_object_lock(object);
+ assert(page->vmp_busy);
+ assert(VM_PAGE_OBJECT(page) == object); /* Since the page was busy, this shouldn't change */
+ assert(page->vmp_offset == offset);
+ PAGE_WAKEUP_DONE(page); /* make no longer busy */
+
+ /*
+ * Invalidate, i.e. toss, the corrupted page.
+ */
+ if (!page->vmp_cleaning &&
+ !page->vmp_laundry &&
+ !page->vmp_fictitious &&
+ !page->vmp_precious &&
+ !page->vmp_absent &&
+ !page->vmp_error &&
+ !page->vmp_dirty &&
+ !is_page_wired(page)) {
+ if (page->vmp_pmapped) {
+ int refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(page));
+ if (refmod & VM_MEM_MODIFIED) {
+ SET_PAGE_DIRTY(page, FALSE);
+ }
+ if (refmod & VM_MEM_REFERENCED) {
+ page->vmp_reference = TRUE;
+ }
+ }
+ /* If the page seems intentionally modified, don't trash it. */
+ if (!page->vmp_dirty) {
+ VM_PAGE_FREE(page);
+ } else {
+ (void)OSAddAtomic(1, &vmtc_not_eligible);
+ }
+ } else {
+ (void)OSAddAtomic(1, &vmtc_not_eligible);
+ }
+ vm_object_unlock(object);
+
+ /*
+ * Now try to diagnose the type of failure by faulting
+ * in a new copy and diff'ing it with what we saved.
+ */
+ if (diagnose_buffer) {
+ vmtc_text_page_diagnose(code_addr, diagnose_buffer);
+ }
+ return KERN_FAILURE;
+ }
+
+ if (object != NULL) {
+ vm_object_unlock(object);
+ }
+ return KERN_SUCCESS;
+}
+
+#if DEBUG || DEVELOPMENT
+/*
+ * For implementing unit tests - ask the pmap to corrupt a text page.
+ * We have to find the page, to get the physical address, then invoke
+ * the pmap.
+ */
+extern kern_return_t vm_corrupt_text_addr(uintptr_t);
+
+kern_return_t
+vm_corrupt_text_addr(uintptr_t va)
+{
+ task_t task = current_task();
+ vm_map_t map;
+ kern_return_t kr = KERN_SUCCESS;
+ vm_object_t object = VM_OBJECT_NULL;
+ vm_object_offset_t offset;
+ vm_page_t page = NULL;
+ pmap_paddr_t pa;
+
+ map = task->map;
+ if (task->map == NULL) {
+ printf("corrupt_text_addr: no map\n");
+ return KERN_FAILURE;
+ }
+
+ kr = vmtc_revalidate_lookup(map, (vm_map_offset_t)va, &object, &offset, &page);
+ if (kr != KERN_SUCCESS) {
+ printf("corrupt_text_addr: page lookup failed\n");
+ return kr;
+ }
+ /* get the physical address to use */
+ pa = ptoa(VM_PAGE_GET_PHYS_PAGE(page)) + (va - vm_object_trunc_page(va));
+
+ /*
+ * Check we have something we can work with.
+ * Due to racing with pageout as we enter the sysctl,
+ * it's theoretically possible to have the page disappear, just
+ * before the lookup.
+ *
+ * That's highly likely to happen often. I've filed a radar 72857482
+ * to bubble up the error here to the sysctl result and have the
+ * test not FAIL in that case.
+ */
+ if (page->vmp_busy) {
+ printf("corrupt_text_addr: vmp_busy\n");
+ kr = KERN_FAILURE;
+ }
+ if (page->vmp_cleaning) {
+ printf("corrupt_text_addr: vmp_cleaning\n");
+ kr = KERN_FAILURE;
+ }
+ if (page->vmp_laundry) {
+ printf("corrupt_text_addr: vmp_cleaning\n");
+ kr = KERN_FAILURE;
+ }
+ if (page->vmp_fictitious) {
+ printf("corrupt_text_addr: vmp_fictitious\n");
+ kr = KERN_FAILURE;
+ }
+ if (page->vmp_precious) {
+ printf("corrupt_text_addr: vmp_precious\n");
+ kr = KERN_FAILURE;
+ }
+ if (page->vmp_absent) {
+ printf("corrupt_text_addr: vmp_absent\n");
+ kr = KERN_FAILURE;
+ }
+ if (page->vmp_error) {
+ printf("corrupt_text_addr: vmp_error\n");
+ kr = KERN_FAILURE;
+ }
+ if (page->vmp_dirty) {
+ printf("corrupt_text_addr: vmp_dirty\n");
+ kr = KERN_FAILURE;
+ }
+ if (is_page_wired(page)) {
+ printf("corrupt_text_addr: wired\n");
+ kr = KERN_FAILURE;
+ }
+ if (!page->vmp_pmapped) {
+ printf("corrupt_text_addr: !vmp_pmapped\n");
+ kr = KERN_FAILURE;
+ }
+
+ if (kr == KERN_SUCCESS) {
+ printf("corrupt_text_addr: using physaddr 0x%llx\n", (long long)pa);
+ kr = pmap_test_text_corruption(pa);
+ if (kr != KERN_SUCCESS) {
+ printf("corrupt_text_addr: pmap error %d\n", kr);
+ }
+ } else {
+ printf("corrupt_text_addr: object %p\n", object);
+ printf("corrupt_text_addr: offset 0x%llx\n", (uint64_t)offset);
+ printf("corrupt_text_addr: va 0x%llx\n", (uint64_t)va);
+ printf("corrupt_text_addr: vm_object_trunc_page(va) 0x%llx\n", (uint64_t)vm_object_trunc_page(va));
+ printf("corrupt_text_addr: vm_page_t %p\n", page);
+ printf("corrupt_text_addr: ptoa(PHYS_PAGE) 0x%llx\n", (uint64_t)ptoa(VM_PAGE_GET_PHYS_PAGE(page)));
+ printf("corrupt_text_addr: using physaddr 0x%llx\n", (uint64_t)pa);
+ }
+
+ if (object != VM_OBJECT_NULL) {
+ vm_object_unlock(object);
+ }
+ return kr;
+}
+#endif /* DEBUG || DEVELOPMENT */
/* pager-specific data */
queue_chain_t pager_queue; /* next & prev pagers */
- unsigned int ref_count; /* reference count */
- int is_ready; /* is this pager ready ? */
- int is_mapped; /* is this mem_obj mapped ? */
+#if MEMORY_OBJECT_HAS_REFCOUNT
+#define fourk_pgr_hdr_ref fourk_pgr_hdr.mo_ref
+#else
+ os_ref_atomic_t fourk_pgr_hdr_ref;
+#endif
+ bool is_ready; /* is this pager ready ? */
+ bool is_mapped; /* is this mem_obj mapped ? */
struct fourk_pager_backing slots[FOURK_PAGER_SLOTS]; /* backing for each
* 4K-chunk */
} *fourk_pager_t;
pager = fourk_pager_lookup(mem_obj);
lck_mtx_lock(&fourk_pager_lock);
- assert(pager->ref_count > 0);
- pager->ref_count++;
+ os_ref_retain_locked_raw(&pager->fourk_pgr_hdr_ref, NULL);
lck_mtx_unlock(&fourk_pager_lock);
}
{
boolean_t needs_trimming;
int count_unmapped;
+ os_ref_count_t ref_count;
if (!locked) {
lck_mtx_lock(&fourk_pager_lock);
}
/* drop a reference on this pager */
- pager->ref_count--;
+ ref_count = os_ref_release_locked_raw(&pager->fourk_pgr_hdr_ref, NULL);
- if (pager->ref_count == 1) {
+ if (ref_count == 1) {
/*
* Only the "named" reference is left, which means that
* no one is really holding on to this pager anymore.
/* the pager is all ours: no need for the lock now */
lck_mtx_unlock(&fourk_pager_lock);
fourk_pager_terminate_internal(pager);
- } else if (pager->ref_count == 0) {
+ } else if (ref_count == 0) {
/*
* Dropped the existence reference; the memory object has
* been terminated. Do some final cleanup and release the
lck_mtx_lock(&fourk_pager_lock);
assert(pager->is_ready);
- assert(pager->ref_count > 0); /* pager is alive */
+ assert(os_ref_get_count_raw(&pager->fourk_pgr_hdr_ref) > 0); /* pager is alive */
if (pager->is_mapped == FALSE) {
/*
* First mapping of this pager: take an extra reference
* are removed.
*/
pager->is_mapped = TRUE;
- pager->ref_count++;
+ os_ref_retain_locked_raw(&pager->fourk_pgr_hdr_ref, NULL);
fourk_pager_count_mapped++;
}
lck_mtx_unlock(&fourk_pager_lock);
assert(mem_obj->mo_pager_ops == &fourk_pager_ops);
pager = (fourk_pager_t) mem_obj;
- assert(pager->ref_count > 0);
+ assert(os_ref_get_count_raw(&pager->fourk_pgr_hdr_ref) > 0);
return pager;
}
prev_pager = (fourk_pager_t)
queue_prev(&pager->pager_queue);
- if (pager->ref_count == 2 &&
+ if (os_ref_get_count_raw(&pager->fourk_pgr_hdr_ref) == 2 &&
pager->is_ready &&
!pager->is_mapped) {
/* this pager can be trimmed */
pager_queue);
pager->pager_queue.next = NULL;
pager->pager_queue.prev = NULL;
- assert(pager->ref_count == 2);
+ assert(os_ref_get_count_raw(&pager->fourk_pgr_hdr_ref) == 2);
/*
* We can't call deallocate_internal() because the pager
* has already been dequeued, but we still need to remove
* a reference.
*/
- pager->ref_count--;
+ (void)os_ref_release_locked_raw(&pager->fourk_pgr_hdr_ref, NULL);
fourk_pager_terminate_internal(pager);
}
}
return VM_OBJECT_NULL;
}
- assert(pager->ref_count > 0);
+ assert(os_ref_get_count_raw(&pager->fourk_pgr_hdr_ref) > 0);
assert(pager->fourk_pgr_hdr.mo_control != MEMORY_OBJECT_CONTROL_NULL);
object = memory_object_control_to_vm_object(pager->fourk_pgr_hdr.mo_control);
assert(object != VM_OBJECT_NULL);
pager->fourk_pgr_hdr.mo_pager_ops = &fourk_pager_ops;
pager->fourk_pgr_hdr.mo_control = MEMORY_OBJECT_CONTROL_NULL;
- pager->ref_count = 2; /* existence + setup reference */
- pager->is_ready = FALSE;/* not ready until it has a "name" */
+ os_ref_init_count_raw(&pager->fourk_pgr_hdr_ref, NULL, 2); /* existence + setup reference */
+ pager->is_ready = FALSE; /* not ready until it has a "name" */
pager->is_mapped = FALSE;
for (i = 0; i < FOURK_PAGER_SLOTS; i++) {
pager = fourk_pager_lookup(mem_obj);
assert(pager->is_ready);
- assert(pager->ref_count > 1); /* pager is alive and mapped */
+ assert(os_ref_get_count_raw(&pager->fourk_pgr_hdr_ref) > 1); /* pager is alive and mapped */
PAGER_DEBUG(PAGER_PAGEIN, ("fourk_pager_data_request: %p, %llx, %x, %x, pager %p\n", mem_obj, offset, length, protection_required, pager));
retval = kr;
goto done;
}
- dst_object = mo_control->moc_object;
+ dst_object = memory_object_control_to_vm_object(mo_control);
assert(dst_object != VM_OBJECT_NULL);
#if __x86_64__ || __arm__ || __arm64__
return KERN_INVALID_ARGUMENT;
}
- assert(pager->ref_count > 0);
+ assert(os_ref_get_count_raw(&pager->fourk_pgr_hdr_ref) > 0);
assert(pager->fourk_pgr_hdr.mo_control != MEMORY_OBJECT_CONTROL_NULL);
if (index < 0 || index > FOURK_PAGER_SLOTS) {
TUNABLE(bool, iokit_iomd_setownership_enabled,
"iokit_iomd_setownership_enabled", true);
-vm_offset_t kmapoff_kaddr;
-unsigned int kmapoff_pgcnt;
-
static inline void
vm_mem_bootstrap_log(const char *message)
{
void
vm_mem_bootstrap(void)
{
- vm_offset_t start, end;
+ vm_offset_t start, end, kmapoff_kaddr;
/*
* Initializes resident memory structures.
vm_mem_bootstrap_log("vm_object_bootstrap");
vm_object_bootstrap();
+ vm_retire_boot_pages();
+
kernel_startup_initialize_upto(STARTUP_SUB_VM_KERNEL);
vm_mem_bootstrap_log("vm_map_init");
* pointer packing schemes using KERNEL_PMAP_HEAP_RANGE_START as a base
* do not admit this address to be part of any zone submap.
*/
- kmapoff_pgcnt = (early_random() & 0x1ff) + 1; /* 9 bits */
- if (vm_allocate_kernel(kernel_map, &kmapoff_kaddr,
- kmapoff_pgcnt * PAGE_SIZE_64, VM_FLAGS_ANYWHERE, VM_KERN_MEMORY_OSFMK) != KERN_SUCCESS) {
- panic("cannot vm_allocate %u kernel_map pages", kmapoff_pgcnt);
+ uint32_t kmapoff_pgcnt = (early_random() & 0x1ff) + 1; /* 9 bits */
+ if (kernel_memory_allocate(kernel_map, &kmapoff_kaddr,
+ ptoa(kmapoff_pgcnt), 0, KMA_KOBJECT | KMA_PERMANENT | KMA_VAONLY,
+ VM_KERN_MEMORY_OSFMK) != KERN_SUCCESS) {
+ panic("cannot kernel_memory_allocate %u pages", kmapoff_pgcnt);
}
vm_mem_bootstrap_log("pmap_init");
vm_offset_t mask,
ppnum_t max_pnum,
ppnum_t pnum_mask,
- int flags,
+ kma_flags_t flags,
vm_tag_t tag)
{
vm_object_t object;
vm_offset_t *addrp,
vm_size_t size,
vm_offset_t mask,
- int flags,
- vm_tag_t tag)
+ kma_flags_t flags,
+ vm_tag_t tag)
{
vm_object_t object;
vm_object_offset_t offset;
vm_page_t wired_page_list = NULL;
int guard_page_count = 0;
int wired_page_count = 0;
- int page_grab_count = 0;
- int i;
int vm_alloc_flags;
vm_map_kernel_flags_t vmk_flags;
vm_prot_t kma_prot;
-#if DEVELOPMENT || DEBUG
- task_t task = current_task();
-#endif /* DEVELOPMENT || DEBUG */
if (startup_phase < STARTUP_SUB_KMEM) {
panic("kernel_memory_allocate: VM is not ready");
assert(wired_page_count * PAGE_SIZE_64 == fill_size);
#if DEBUG || DEVELOPMENT
- VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_START, size, 0, 0, 0);
+ VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_START,
+ size, 0, 0, 0);
#endif
- for (i = 0; i < guard_page_count; i++) {
- for (;;) {
- mem = vm_page_grab_guard();
-
- if (mem != VM_PAGE_NULL) {
- break;
- }
- if (flags & KMA_NOPAGEWAIT) {
- kr = KERN_RESOURCE_SHORTAGE;
- goto out;
- }
- vm_page_more_fictitious();
+ for (int i = 0; i < guard_page_count; i++) {
+ mem = vm_page_grab_guard((flags & KMA_NOPAGEWAIT) == 0);
+ if (mem == VM_PAGE_NULL) {
+ kr = KERN_RESOURCE_SHORTAGE;
+ goto out;
}
mem->vmp_snext = guard_page_list;
guard_page_list = mem;
}
if (!(flags & (KMA_VAONLY | KMA_PAGEABLE))) {
- for (i = 0; i < wired_page_count; i++) {
- for (;;) {
- if (flags & KMA_LOMEM) {
- mem = vm_page_grablo();
- } else {
- mem = vm_page_grab();
- }
-
- if (mem != VM_PAGE_NULL) {
- break;
- }
-
- if (flags & KMA_NOPAGEWAIT) {
- kr = KERN_RESOURCE_SHORTAGE;
- goto out;
- }
- if ((flags & KMA_LOMEM) && (vm_lopage_needed == TRUE)) {
- kr = KERN_RESOURCE_SHORTAGE;
- goto out;
- }
-
- /* VM privileged threads should have waited in vm_page_grab() and not get here. */
- assert(!(current_thread()->options & TH_OPT_VMPRIV));
-
- uint64_t unavailable = (vm_page_wire_count + vm_page_free_target) * PAGE_SIZE;
- if (unavailable > max_mem || map_size > (max_mem - unavailable)) {
- kr = KERN_RESOURCE_SHORTAGE;
- goto out;
- }
- VM_PAGE_WAIT();
- }
- page_grab_count++;
- if (KMA_ZERO & flags) {
- vm_page_zero_fill(mem);
- }
- mem->vmp_snext = wired_page_list;
- wired_page_list = mem;
+ kr = vm_page_alloc_list(wired_page_count, flags,
+ &wired_page_list);
+ if (kr != KERN_SUCCESS) {
+ goto out;
}
}
}
#if DEBUG || DEVELOPMENT
- VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END, page_grab_count, 0, 0, 0);
- if (task != NULL) {
- ledger_credit(task->ledger, task_ledgers.pages_grabbed_kern, page_grab_count);
- }
+ VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END,
+ wired_page_count, 0, 0, 0);
#endif
-
/*
* Return the memory, not zeroed.
*/
}
#if DEBUG || DEVELOPMENT
- VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END, page_grab_count, 0, 0, 0);
- if (task != NULL && kr == KERN_SUCCESS) {
- ledger_credit(task->ledger, task_ledgers.pages_grabbed_kern, page_grab_count);
- }
+ VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END,
+ wired_page_count, 0, 0, 0);
#endif
-
return kr;
}
-kern_return_t
-kernel_memory_populate(
+void
+kernel_memory_populate_with_pages(
vm_map_t map,
vm_offset_t addr,
vm_size_t size,
- int flags,
+ vm_page_t page_list,
+ kma_flags_t flags,
vm_tag_t tag)
{
- vm_object_t object;
- vm_object_offset_t offset, pg_offset;
- kern_return_t kr, pe_result;
- vm_page_t mem;
- vm_page_t page_list = NULL;
- int page_count = 0;
- int page_grab_count = 0;
- int i;
-
-#if DEBUG || DEVELOPMENT
- task_t task = current_task();
- VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_START, size, 0, 0, 0);
-#endif
-
- page_count = (int) (size / PAGE_SIZE_64);
-
- assert((flags & (KMA_COMPRESSOR | KMA_KOBJECT)) != (KMA_COMPRESSOR | KMA_KOBJECT));
+ vm_object_t object;
+ kern_return_t pe_result;
+ vm_page_t mem;
+ int page_count = atop_64(size);
if (flags & KMA_COMPRESSOR) {
- pg_offset = page_count * PAGE_SIZE_64;
-
- do {
- for (;;) {
- mem = vm_page_grab();
-
- if (mem != VM_PAGE_NULL) {
- break;
- }
-
- VM_PAGE_WAIT();
- }
- page_grab_count++;
- if (KMA_ZERO & flags) {
- vm_page_zero_fill(mem);
- }
- mem->vmp_snext = page_list;
- page_list = mem;
-
- pg_offset -= PAGE_SIZE_64;
-
- kr = pmap_enter_options(kernel_pmap,
- addr + pg_offset, VM_PAGE_GET_PHYS_PAGE(mem),
- VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE,
- PMAP_OPTIONS_INTERNAL, NULL);
- assert(kr == KERN_SUCCESS);
- } while (pg_offset);
-
- offset = addr;
- object = compressor_object;
-
- vm_object_lock(object);
-
- for (pg_offset = 0;
- pg_offset < size;
- pg_offset += PAGE_SIZE_64) {
- mem = page_list;
- page_list = mem->vmp_snext;
- mem->vmp_snext = NULL;
-
- vm_page_insert(mem, object, offset + pg_offset);
- assert(mem->vmp_busy);
-
- mem->vmp_busy = FALSE;
- mem->vmp_pmapped = TRUE;
- mem->vmp_wpmapped = TRUE;
- mem->vmp_q_state = VM_PAGE_USED_BY_COMPRESSOR;
- }
- vm_object_unlock(object);
-
-#if KASAN
- if (map == compressor_map) {
- kasan_notify_address_nopoison(addr, size);
- } else {
- kasan_notify_address(addr, size);
- }
-#endif
-
-#if DEBUG || DEVELOPMENT
- VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END, page_grab_count, 0, 0, 0);
- if (task != NULL) {
- ledger_credit(task->ledger, task_ledgers.pages_grabbed_kern, page_grab_count);
- }
-#endif
- return KERN_SUCCESS;
+ panic("%s(%p,0x%llx,0x%llx,0x%x): KMA_COMPRESSOR", __func__,
+ map, (uint64_t) addr, (uint64_t) size, flags);
}
- for (i = 0; i < page_count; i++) {
- for (;;) {
- if (flags & KMA_LOMEM) {
- mem = vm_page_grablo();
- } else {
- mem = vm_page_grab();
- }
-
- if (mem != VM_PAGE_NULL) {
- break;
- }
-
- if (flags & KMA_NOPAGEWAIT) {
- kr = KERN_RESOURCE_SHORTAGE;
- goto out;
- }
- if ((flags & KMA_LOMEM) &&
- (vm_lopage_needed == TRUE)) {
- kr = KERN_RESOURCE_SHORTAGE;
- goto out;
- }
- VM_PAGE_WAIT();
- }
- page_grab_count++;
- if (KMA_ZERO & flags) {
- vm_page_zero_fill(mem);
- }
- mem->vmp_snext = page_list;
- page_list = mem;
- }
if (flags & KMA_KOBJECT) {
- offset = addr;
object = kernel_object;
vm_object_lock(object);
* take reference on object;
* unlock map;
*/
- panic("kernel_memory_populate(%p,0x%llx,0x%llx,0x%x): "
- "!KMA_KOBJECT",
+ panic("%s(%p,0x%llx,0x%llx,0x%x): !KMA_KOBJECT", __func__,
map, (uint64_t) addr, (uint64_t) size, flags);
}
- for (pg_offset = 0;
+ for (vm_object_offset_t pg_offset = 0;
pg_offset < size;
pg_offset += PAGE_SIZE_64) {
if (page_list == NULL) {
- panic("kernel_memory_populate: page_list == NULL");
+ panic("%s: page_list too short", __func__);
}
mem = page_list;
assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
mem->vmp_q_state = VM_PAGE_IS_WIRED;
mem->vmp_wire_count++;
- if (__improbable(mem->vmp_wire_count == 0)) {
- panic("kernel_memory_populate(%p): wire_count overflow", mem);
+ if (mem->vmp_wire_count == 0) {
+ panic("%s(%p): wire_count overflow", __func__, mem);
}
- vm_page_insert_wired(mem, object, offset + pg_offset, tag);
+ vm_page_insert_wired(mem, object, addr + pg_offset, tag);
mem->vmp_busy = FALSE;
mem->vmp_pmapped = TRUE;
assert(pe_result == KERN_SUCCESS);
if (flags & KMA_NOENCRYPT) {
- bzero(CAST_DOWN(void *, (addr + pg_offset)), PAGE_SIZE);
+ __nosan_bzero(CAST_DOWN(void *, (addr + pg_offset)), PAGE_SIZE);
pmap_set_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
}
}
+ if (page_list) {
+ panic("%s: page_list too long", __func__);
+ }
vm_object_unlock(object);
vm_page_lockspin_queues();
vm_page_wire_count += page_count;
vm_page_unlock_queues();
- vm_tag_update_size(tag, ptoa_64(page_count));
-
-#if DEBUG || DEVELOPMENT
- VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END, page_grab_count, 0, 0, 0);
- if (task != NULL) {
- ledger_credit(task->ledger, task_ledgers.pages_grabbed_kern, page_grab_count);
- }
-#endif
+ vm_tag_update_size(tag, size);
#if KASAN
if (map == compressor_map) {
kasan_notify_address(addr, size);
}
#endif
- return KERN_SUCCESS;
+}
-out:
- if (page_list) {
- vm_page_free_list(page_list, FALSE);
- }
+kern_return_t
+kernel_memory_populate(
+ vm_map_t map,
+ vm_offset_t addr,
+ vm_size_t size,
+ kma_flags_t flags,
+ vm_tag_t tag)
+{
+ vm_object_t object;
+ vm_object_offset_t offset, pg_offset;
+ kern_return_t kr = KERN_SUCCESS;
+ vm_page_t mem;
+ vm_page_t page_list = NULL;
+ int page_count = atop_64(size);
#if DEBUG || DEVELOPMENT
- VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END, page_grab_count, 0, 0, 0);
- if (task != NULL && kr == KERN_SUCCESS) {
- ledger_credit(task->ledger, task_ledgers.pages_grabbed_kern, page_grab_count);
- }
+ VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_START,
+ size, 0, 0, 0);
#endif
+ assert((flags & (KMA_COMPRESSOR | KMA_KOBJECT)) != (KMA_COMPRESSOR | KMA_KOBJECT));
+
+ if (flags & KMA_COMPRESSOR) {
+ pg_offset = page_count * PAGE_SIZE_64;
+
+ do {
+ for (;;) {
+ mem = vm_page_grab();
+
+ if (mem != VM_PAGE_NULL) {
+ break;
+ }
+
+ VM_PAGE_WAIT();
+ }
+ if (KMA_ZERO & flags) {
+ vm_page_zero_fill(mem);
+ }
+ mem->vmp_snext = page_list;
+ page_list = mem;
+
+ pg_offset -= PAGE_SIZE_64;
+
+ kr = pmap_enter_options(kernel_pmap,
+ addr + pg_offset, VM_PAGE_GET_PHYS_PAGE(mem),
+ VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE,
+ PMAP_OPTIONS_INTERNAL, NULL);
+ assert(kr == KERN_SUCCESS);
+ } while (pg_offset);
+
+ offset = addr;
+ object = compressor_object;
+
+ vm_object_lock(object);
+
+ for (pg_offset = 0;
+ pg_offset < size;
+ pg_offset += PAGE_SIZE_64) {
+ mem = page_list;
+ page_list = mem->vmp_snext;
+ mem->vmp_snext = NULL;
+
+ vm_page_insert(mem, object, offset + pg_offset);
+ assert(mem->vmp_busy);
+
+ mem->vmp_busy = FALSE;
+ mem->vmp_pmapped = TRUE;
+ mem->vmp_wpmapped = TRUE;
+ mem->vmp_q_state = VM_PAGE_USED_BY_COMPRESSOR;
+ }
+ vm_object_unlock(object);
+
+#if KASAN
+ if (map == compressor_map) {
+ kasan_notify_address_nopoison(addr, size);
+ } else {
+ kasan_notify_address(addr, size);
+ }
+#endif
+
+#if DEBUG || DEVELOPMENT
+ task_t task = current_task();
+ if (task != NULL) {
+ ledger_credit(task->ledger, task_ledgers.pages_grabbed_kern, page_count);
+ }
+#endif
+ } else {
+ kr = vm_page_alloc_list(page_count, flags, &page_list);
+ if (kr == KERN_SUCCESS) {
+ kernel_memory_populate_with_pages(map, addr, size,
+ page_list, flags, tag);
+ }
+ }
+
+#if DEBUG || DEVELOPMENT
+ VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END,
+ page_count, 0, 0, 0);
+#endif
return kr;
}
vm_map_t map,
vm_offset_t addr,
vm_size_t size,
- int flags,
+ kma_flags_t flags,
vm_tag_t tag)
{
vm_object_t object;
vm_offset_t *addrp,
vm_size_t size,
vm_tag_t tag,
- int flags)
+ kma_flags_t flags)
{
kern_return_t kr = kernel_memory_allocate(map, addrp, size, 0, flags, tag);
if (kr == KERN_SUCCESS) {
return kr;
}
+/*
+ * Routine: copyoutmap_atomic{32, 64}
+ * Purpose:
+ * Like copyoutmap, except that the operation is atomic.
+ * Takes in value rather than *fromdata pointer.
+ */
+kern_return_t
+copyoutmap_atomic32(
+ vm_map_t map,
+ uint32_t value,
+ vm_map_address_t toaddr)
+{
+ kern_return_t kr = KERN_SUCCESS;
+ vm_map_t oldmap;
+
+ if (vm_map_pmap(map) == pmap_kernel()) {
+ /* assume a correct toaddr */
+ *(uint32_t *)toaddr = value;
+ } else if (current_map() == map) {
+ if (copyout_atomic32(value, toaddr) != 0) {
+ kr = KERN_INVALID_ADDRESS;
+ }
+ } else {
+ vm_map_reference(map);
+ oldmap = vm_map_switch(map);
+ if (copyout_atomic32(value, toaddr) != 0) {
+ kr = KERN_INVALID_ADDRESS;
+ }
+ vm_map_switch(oldmap);
+ vm_map_deallocate(map);
+ }
+ return kr;
+}
+
+kern_return_t
+copyoutmap_atomic64(
+ vm_map_t map,
+ uint64_t value,
+ vm_map_address_t toaddr)
+{
+ kern_return_t kr = KERN_SUCCESS;
+ vm_map_t oldmap;
+
+ if (vm_map_pmap(map) == pmap_kernel()) {
+ /* assume a correct toaddr */
+ *(uint64_t *)toaddr = value;
+ } else if (current_map() == map) {
+ if (copyout_atomic64(value, toaddr) != 0) {
+ kr = KERN_INVALID_ADDRESS;
+ }
+ } else {
+ vm_map_reference(map);
+ oldmap = vm_map_switch(map);
+ if (copyout_atomic64(value, toaddr) != 0) {
+ kr = KERN_INVALID_ADDRESS;
+ }
+ vm_map_switch(oldmap);
+ vm_map_deallocate(map);
+ }
+ return kr;
+}
+
/*
*
* The following two functions are to be used when exposing kernel
#include <kern/locks.h>
+struct vm_page;
+
+__options_decl(kma_flags_t, uint32_t, {
+ KMA_NONE = 0x00000000,
+ KMA_HERE = 0x00000001,
+ KMA_NOPAGEWAIT = 0x00000002,
+ KMA_KOBJECT = 0x00000004,
+ KMA_LOMEM = 0x00000008,
+ KMA_GUARD_FIRST = 0x00000010,
+ KMA_GUARD_LAST = 0x00000020,
+ KMA_PERMANENT = 0x00000040,
+ KMA_NOENCRYPT = 0x00000080,
+ KMA_KSTACK = 0x00000100,
+ KMA_VAONLY = 0x00000200,
+ /*
+ * Pages belonging to the compressor are not on the paging queues,
+ * nor are they counted as wired.
+ */
+ KMA_COMPRESSOR = 0x00000400,
+ KMA_ATOMIC = 0x00000800,
+ KMA_ZERO = 0x00001000,
+ KMA_PAGEABLE = 0x00002000,
+ KMA_KHEAP = 0x00004000, /* Pages belonging to zones backing one of kalloc_heap. */
+});
+
extern kern_return_t kernel_memory_allocate(
vm_map_t map,
vm_offset_t *addrp,
vm_size_t size,
vm_offset_t mask,
- int flags,
+ kma_flags_t flags,
vm_tag_t tag);
-/* flags for kernel_memory_allocate */
-#define KMA_HERE 0x01
-#define KMA_NOPAGEWAIT 0x02
-#define KMA_KOBJECT 0x04
-#define KMA_LOMEM 0x08
-#define KMA_GUARD_FIRST 0x10
-#define KMA_GUARD_LAST 0x20
-#define KMA_PERMANENT 0x40
-#define KMA_NOENCRYPT 0x80
-#define KMA_KSTACK 0x100
-#define KMA_VAONLY 0x200
-#define KMA_COMPRESSOR 0x400 /* Pages belonging to the compressor are not on the paging queues, nor are they counted as wired. */
-#define KMA_ATOMIC 0x800
-#define KMA_ZERO 0x1000
-#define KMA_PAGEABLE 0x2000
-#define KMA_KHEAP 0x4000 /* Pages belonging to zones backing one of kalloc_heap. */
-
extern kern_return_t kmem_alloc(
vm_map_t map,
vm_offset_t *addrp,
vm_offset_t mask,
ppnum_t max_pnum,
ppnum_t pnum_mask,
- int flags,
+ kma_flags_t flags,
vm_tag_t tag);
extern kern_return_t kmem_alloc_flags(
vm_offset_t *addrp,
vm_size_t size,
vm_tag_t tag,
- int flags);
+ kma_flags_t flags);
extern kern_return_t kmem_alloc_pageable(
vm_map_t map,
vm_size_t size,
vm_tag_t tag) __XNU_INTERNAL(kmem_alloc_kobject);
+extern void kernel_memory_populate_with_pages(
+ vm_map_t map,
+ vm_offset_t addr,
+ vm_size_t size,
+ struct vm_page *page_list,
+ kma_flags_t flags,
+ vm_tag_t tag);
+
extern kern_return_t kernel_memory_populate(
vm_map_t map,
vm_offset_t addr,
vm_size_t size,
- int flags,
+ kma_flags_t flags,
vm_tag_t tag);
extern void kernel_memory_depopulate(
vm_map_t map,
vm_offset_t addr,
vm_size_t size,
- int flags,
+ kma_flags_t flags,
vm_tag_t tag);
extern kern_return_t memory_object_iopl_request(
#if VM_MAX_TAG_ZONES
extern void vm_allocation_zones_init(void);
-extern void vm_tag_will_update_zone(vm_tag_t tag, uint32_t zidx);
-extern void vm_tag_update_zone_size(vm_tag_t tag, uint32_t zidx, int64_t delta, int64_t dwaste);
-
-extern vm_allocation_zone_total_t ** vm_allocation_zone_totals;
+extern vm_tag_t vm_tag_will_update_zone(vm_tag_t tag, uint32_t zidx, uint32_t zflags);
+extern void vm_tag_update_zone_size(vm_tag_t tag, uint32_t zidx, long delta);
#endif /* VM_MAX_TAG_ZONES */
vm_map_offset_t toaddr,
vm_size_t length);
+extern kern_return_t copyoutmap_atomic32(
+ vm_map_t map,
+ uint32_t value,
+ vm_map_offset_t toaddr);
+
+extern kern_return_t copyoutmap_atomic64(
+ vm_map_t map,
+ uint64_t value,
+ vm_map_offset_t toaddr);
+
extern kern_return_t kmem_alloc_external(
vm_map_t map,
vm_offset_t *addrp,
* Virtual memory mapping module.
*/
-#include <task_swapper.h>
#include <mach_assert.h>
#include <vm/vm_options.h>
#include <kern/assert.h>
#include <kern/backtrace.h>
-#include <kern/counters.h>
+#include <kern/counter.h>
#include <kern/exc_guard.h>
#include <kern/kalloc.h>
#include <kern/zalloc_internal.h>
vm_map_t map,
vm_map_offset_t addr,
vm_map_size_t size,
- vm_prot_t required_protection,
boolean_t copy,
struct vm_map_header *map_header,
vm_prot_t *cur_protection,
}
/*
- * Placeholder object for submap operations. This object is dropped
- * into the range by a call to vm_map_find, and removed when
- * vm_map_submap creates the submap.
+ * vm_map_require:
+ *
+ * Ensures that the argument is memory allocated from the genuine
+ * vm map zone. (See zone_id_require_allow_foreign).
*/
-
-vm_object_t vm_submap_object;
+void
+vm_map_require(vm_map_t map)
+{
+ zone_id_require_allow_foreign(ZONE_ID_VM_MAP, sizeof(struct _vm_map), map);
+}
static __startup_data vm_offset_t map_data;
static __startup_data vm_size_t map_data_size;
vm_object_offset_t crypto_start, crypto_end;
int vm_flags;
vm_map_kernel_flags_t vmk_flags;
+ boolean_t cache_pager;
vm_flags = 0;
vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
crypto_backing_offset = VME_OFFSET(&tmp_entry);
}
+ cache_pager = TRUE;
+#if XNU_TARGET_OS_OSX
+ if (vm_map_is_alien(map)) {
+ cache_pager = FALSE;
+ }
+#endif /* XNU_TARGET_OS_OSX */
+
/*
* Lookup (and create if necessary) the protected memory object
* matching that VM object.
crypto_backing_offset,
crypt_info,
crypto_start,
- crypto_end);
+ crypto_end,
+ cache_pager);
/* release extra ref on protected object */
vm_object_deallocate(protected_object);
sizeof(debug4k_filter));
#endif /* MACH_ASSERT */
- vm_map_zone = zone_create(VM_MAP_ZONE_NAME, sizeof(struct _vm_map),
- VM_MAP_ZFLAGS);
+ vm_map_zone = zone_create_ext(VM_MAP_ZONE_NAME, sizeof(struct _vm_map),
+ VM_MAP_ZFLAGS, ZONE_ID_VM_MAP, NULL);
vm_map_entry_zone = zone_create(mez_name, sizeof(struct vm_map_entry),
ZC_NOENCRYPT | ZC_NOGZALLOC | ZC_NOCALLOUT);
*/
vm_map_entry_reserved_zone = zone_create_ext(VME_RESERVED_ZONE_NAME,
sizeof(struct vm_map_entry), VM_MAP_RESERVED_ZFLAGS,
- ZONE_ID_ANY, ^(zone_t z) {
- zone_set_noexpand(z, 64 * kentry_data_size);
- });
+ ZONE_ID_ANY, NULL);
vm_map_copy_zone = zone_create_ext("VM map copies", sizeof(struct vm_map_copy),
ZC_NOENCRYPT | ZC_CACHING, ZONE_ID_VM_MAP_COPY, NULL);
/*
* Add the stolen memory to zones, adjust zone size and stolen counts.
*/
- zcram(vm_map_zone, map_data, map_data_size);
- zcram(vm_map_entry_reserved_zone, kentry_data, kentry_data_size);
- zcram(vm_map_holes_zone, map_holes_data, map_holes_data_size);
+ zone_cram_foreign(vm_map_zone, map_data, map_data_size);
+ zone_cram_foreign(vm_map_entry_reserved_zone, kentry_data, kentry_data_size);
+ zone_cram_foreign(vm_map_holes_zone, map_holes_data, map_holes_data_size);
/*
* Since these are covered by zones, remove them from stolen page accounting.
vm_map_steal_memory(void)
{
uint16_t kentry_initial_pages;
+ uint16_t zone_foreign_pages;
map_data_size = zone_get_foreign_alloc_size(VM_MAP_ZONE_NAME,
sizeof(struct _vm_map), VM_MAP_ZFLAGS, 1);
* scheme is activated and/or entries are available from the general
* map entry pool.
*/
-#if defined(__LP64__)
- kentry_initial_pages = 10;
+#if defined(__LP64__)
+ kentry_initial_pages = (uint16_t)atop(16 * 4096);
#else
kentry_initial_pages = 6;
#endif
kentry_initial_pages *= 1024;
}
#endif
+ if (PE_parse_boot_argn("zone_foreign_pages", &zone_foreign_pages,
+ sizeof(zone_foreign_pages))) {
+ kentry_initial_pages = zone_foreign_pages;
+ }
kentry_data_size = zone_get_foreign_alloc_size(VME_RESERVED_ZONE_NAME,
sizeof(struct vm_map_entry), VM_MAP_RESERVED_ZFLAGS,
void
vm_kernel_reserved_entry_init(void)
{
- zone_prio_refill_configure(vm_map_entry_reserved_zone);
+ zone_replenish_configure(vm_map_entry_reserved_zone);
/*
* Once we have our replenish thread set up, we can start using the vm_map_holes zone.
*/
- zone_prio_refill_configure(vm_map_holes_zone);
+ zone_replenish_configure(vm_map_holes_zone);
vm_map_supports_hole_optimization = TRUE;
}
result->vmmap_high_start = 0;
#endif
os_ref_init_count(&result->map_refcnt, &map_refgrp, 1);
-#if TASK_SWAPPER
- result->res_count = 1;
- result->sw_state = MAP_SW_IN;
-#endif /* TASK_SWAPPER */
result->pmap = pmap;
result->min_offset = min;
result->max_offset = max;
result->jit_entry_exists = FALSE;
result->is_alien = FALSE;
result->reserved_regions = FALSE;
+ result->single_jit = FALSE;
/* "has_corpse_footprint" and "holelistenabled" are mutually exclusive */
if (options & VM_MAP_CREATE_CORPSE_FOOTPRINT) {
#define vm_map_copy_entry_unlink(copy, entry) \
_vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
-#if MACH_ASSERT && TASK_SWAPPER
-/*
- * vm_map_res_reference:
- *
- * Adds another valid residence count to the given map.
- *
- * Map is locked so this function can be called from
- * vm_map_swapin.
- *
- */
-void
-vm_map_res_reference(vm_map_t map)
-{
- /* assert map is locked */
- assert(map->res_count >= 0);
- assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
- if (map->res_count == 0) {
- lck_mtx_unlock(&map->s_lock);
- vm_map_lock(map);
- vm_map_swapin(map);
- lck_mtx_lock(&map->s_lock);
- ++map->res_count;
- vm_map_unlock(map);
- } else {
- ++map->res_count;
- }
-}
-
-/*
- * vm_map_reference_swap:
- *
- * Adds valid reference and residence counts to the given map.
- *
- * The map may not be in memory (i.e. zero residence count).
- *
- */
-void
-vm_map_reference_swap(vm_map_t map)
-{
- assert(map != VM_MAP_NULL);
- lck_mtx_lock(&map->s_lock);
- assert(map->res_count >= 0);
- assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
- os_ref_retain_locked(&map->map_refcnt);
- vm_map_res_reference(map);
- lck_mtx_unlock(&map->s_lock);
-}
-
-/*
- * vm_map_res_deallocate:
- *
- * Decrement residence count on a map; possibly causing swapout.
- *
- * The map must be in memory (i.e. non-zero residence count).
- *
- * The map is locked, so this function is callable from vm_map_deallocate.
- *
- */
-void
-vm_map_res_deallocate(vm_map_t map)
-{
- assert(map->res_count > 0);
- if (--map->res_count == 0) {
- lck_mtx_unlock(&map->s_lock);
- vm_map_lock(map);
- vm_map_swapout(map);
- vm_map_unlock(map);
- lck_mtx_lock(&map->s_lock);
- }
- assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
-}
-#endif /* MACH_ASSERT && TASK_SWAPPER */
-
/*
* vm_map_destroy:
*
return victim_pid;
}
-#if TASK_SWAPPER
-/*
- * vm_map_swapin/vm_map_swapout
- *
- * Swap a map in and out, either referencing or releasing its resources.
- * These functions are internal use only; however, they must be exported
- * because they may be called from macros, which are exported.
- *
- * In the case of swapout, there could be races on the residence count,
- * so if the residence count is up, we return, assuming that a
- * vm_map_deallocate() call in the near future will bring us back.
- *
- * Locking:
- * -- We use the map write lock for synchronization among races.
- * -- The map write lock, and not the simple s_lock, protects the
- * swap state of the map.
- * -- If a map entry is a share map, then we hold both locks, in
- * hierarchical order.
- *
- * Synchronization Notes:
- * 1) If a vm_map_swapin() call happens while swapout in progress, it
- * will block on the map lock and proceed when swapout is through.
- * 2) A vm_map_reference() call at this time is illegal, and will
- * cause a panic. vm_map_reference() is only allowed on resident
- * maps, since it refuses to block.
- * 3) A vm_map_swapin() call during a swapin will block, and
- * proceeed when the first swapin is done, turning into a nop.
- * This is the reason the res_count is not incremented until
- * after the swapin is complete.
- * 4) There is a timing hole after the checks of the res_count, before
- * the map lock is taken, during which a swapin may get the lock
- * before a swapout about to happen. If this happens, the swapin
- * will detect the state and increment the reference count, causing
- * the swapout to be a nop, thereby delaying it until a later
- * vm_map_deallocate. If the swapout gets the lock first, then
- * the swapin will simply block until the swapout is done, and
- * then proceed.
- *
- * Because vm_map_swapin() is potentially an expensive operation, it
- * should be used with caution.
- *
- * Invariants:
- * 1) A map with a residence count of zero is either swapped, or
- * being swapped.
- * 2) A map with a non-zero residence count is either resident,
- * or being swapped in.
- */
-
-int vm_map_swap_enable = 1;
-
-void
-vm_map_swapin(vm_map_t map)
-{
- vm_map_entry_t entry;
-
- if (!vm_map_swap_enable) { /* debug */
- return;
- }
-
- /*
- * Map is locked
- * First deal with various races.
- */
- if (map->sw_state == MAP_SW_IN) {
- /*
- * we raced with swapout and won. Returning will incr.
- * the res_count, turning the swapout into a nop.
- */
- return;
- }
-
- /*
- * The residence count must be zero. If we raced with another
- * swapin, the state would have been IN; if we raced with a
- * swapout (after another competing swapin), we must have lost
- * the race to get here (see above comment), in which case
- * res_count is still 0.
- */
- assert(map->res_count == 0);
-
- /*
- * There are no intermediate states of a map going out or
- * coming in, since the map is locked during the transition.
- */
- assert(map->sw_state == MAP_SW_OUT);
-
- /*
- * We now operate upon each map entry. If the entry is a sub-
- * or share-map, we call vm_map_res_reference upon it.
- * If the entry is an object, we call vm_object_res_reference
- * (this may iterate through the shadow chain).
- * Note that we hold the map locked the entire time,
- * even if we get back here via a recursive call in
- * vm_map_res_reference.
- */
- entry = vm_map_first_entry(map);
-
- while (entry != vm_map_to_entry(map)) {
- if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
- if (entry->is_sub_map) {
- vm_map_t lmap = VME_SUBMAP(entry);
- lck_mtx_lock(&lmap->s_lock);
- vm_map_res_reference(lmap);
- lck_mtx_unlock(&lmap->s_lock);
- } else {
- vm_object_t object = VME_OBEJCT(entry);
- vm_object_lock(object);
- /*
- * This call may iterate through the
- * shadow chain.
- */
- vm_object_res_reference(object);
- vm_object_unlock(object);
- }
- }
- entry = entry->vme_next;
- }
- assert(map->sw_state == MAP_SW_OUT);
- map->sw_state = MAP_SW_IN;
-}
-
-void
-vm_map_swapout(vm_map_t map)
-{
- vm_map_entry_t entry;
-
- /*
- * Map is locked
- * First deal with various races.
- * If we raced with a swapin and lost, the residence count
- * will have been incremented to 1, and we simply return.
- */
- lck_mtx_lock(&map->s_lock);
- if (map->res_count != 0) {
- lck_mtx_unlock(&map->s_lock);
- return;
- }
- lck_mtx_unlock(&map->s_lock);
-
- /*
- * There are no intermediate states of a map going out or
- * coming in, since the map is locked during the transition.
- */
- assert(map->sw_state == MAP_SW_IN);
-
- if (!vm_map_swap_enable) {
- return;
- }
-
- /*
- * We now operate upon each map entry. If the entry is a sub-
- * or share-map, we call vm_map_res_deallocate upon it.
- * If the entry is an object, we call vm_object_res_deallocate
- * (this may iterate through the shadow chain).
- * Note that we hold the map locked the entire time,
- * even if we get back here via a recursive call in
- * vm_map_res_deallocate.
- */
- entry = vm_map_first_entry(map);
-
- while (entry != vm_map_to_entry(map)) {
- if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
- if (entry->is_sub_map) {
- vm_map_t lmap = VME_SUBMAP(entry);
- lck_mtx_lock(&lmap->s_lock);
- vm_map_res_deallocate(lmap);
- lck_mtx_unlock(&lmap->s_lock);
- } else {
- vm_object_t object = VME_OBJECT(entry);
- vm_object_lock(object);
- /*
- * This call may take a long time,
- * since it could actively push
- * out pages (if we implement it
- * that way).
- */
- vm_object_res_deallocate(object);
- vm_object_unlock(object);
- }
- }
- entry = entry->vme_next;
- }
- assert(map->sw_state == MAP_SW_IN);
- map->sw_state = MAP_SW_OUT;
-}
-
-#endif /* TASK_SWAPPER */
/*
* vm_map_lookup_entry: [ internal use only ]
assert(VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map)));
while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
- random_addr = ((vm_map_offset_t)random()) << VM_MAP_PAGE_SHIFT(map);
+ if (startup_phase < STARTUP_SUB_ZALLOC) {
+ random_addr = (vm_map_offset_t)early_random();
+ } else {
+ random_addr = (vm_map_offset_t)random();
+ }
+ random_addr <<= VM_MAP_PAGE_SHIFT(map);
random_addr = vm_map_trunc_page(
vm_map_min(map) + (random_addr % addr_space_size),
VM_MAP_PAGE_MASK(map));
boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
boolean_t is_submap = vmk_flags.vmkf_submap;
- boolean_t permanent = vmk_flags.vmkf_permanent;
+ boolean_t permanent = (((flags & VM_FLAGS_PERMANENT) != 0) || vmk_flags.vmkf_permanent);
boolean_t no_copy_on_read = vmk_flags.vmkf_no_copy_on_read;
boolean_t entry_for_jit = vmk_flags.vmkf_map_jit;
boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
(vm_tag_t) copy_vm_alias, /* see comment at end of vm_fault_unwire re. cast*/
copy_object,
copy_offset,
- ((copy_object == NULL) ? FALSE : copy),
+ ((copy_object == NULL)
+ ? FALSE
+ : (copy || copy_entry->needs_copy)),
cur_protection,
max_protection,
inheritance);
vm_object_lock(object);
object->ref_count++;
- vm_object_res_reference(object);
/*
* For "named" VM objects, let the pager know that the
* only.
*/
max_prot = new_prot & VM_PROT_ALL;
+ cur_prot = VM_PROT_NONE;
kflags = VM_MAP_KERNEL_FLAGS_NONE;
kflags.vmkf_remap_prot_copy = TRUE;
kflags.vmkf_overwrite_immutable = TRUE;
kr = vm_map_remap(map,
&new_start,
end - start,
- 0, /* mask */
+ 0, /* mask */
VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
kflags,
0,
map,
start,
- TRUE, /* copy-on-write remapping! */
- &cur_prot,
- &max_prot,
+ TRUE, /* copy-on-write remapping! */
+ &cur_prot, /* IN/OUT */
+ &max_prot, /* IN/OUT */
VM_INHERIT_DEFAULT);
if (kr != KERN_SUCCESS) {
return kr;
vm_map_t src_map,
vm_map_address_t src_addr,
vm_map_size_t len,
- vm_prot_t required_prot,
boolean_t do_copy,
vm_map_copy_t *copy_result, /* OUT */
- vm_prot_t *cur_prot, /* OUT */
- vm_prot_t *max_prot, /* OUT */
+ vm_prot_t *cur_prot, /* IN/OUT */
+ vm_prot_t *max_prot, /* IN/OUT */
vm_inherit_t inheritance,
vm_map_kernel_flags_t vmk_flags)
{
vm_map_copy_t copy;
kern_return_t kr;
+ vm_prot_t required_cur_prot, required_max_prot;
/*
* Check for copies of zero bytes.
DEBUG4K_SHARE("src_map %p src_addr 0x%llx src_end 0x%llx\n", src_map, (uint64_t)src_addr, (uint64_t)(src_addr + len));
}
+ required_cur_prot = *cur_prot;
+ required_max_prot = *max_prot;
+
/*
* Allocate a header element for the list.
*
kr = vm_map_remap_extract(src_map,
src_addr,
len,
- required_prot,
- do_copy, /* copy */
+ do_copy, /* copy */
©->cpy_hdr,
- cur_prot,
- max_prot,
+ cur_prot, /* IN/OUT */
+ max_prot, /* IN/OUT */
inheritance,
vmk_flags);
if (kr != KERN_SUCCESS) {
vm_map_copy_discard(copy);
return kr;
}
- assert((*cur_prot & required_prot) == required_prot);
- assert((*max_prot & required_prot) == required_prot);
+ if (required_cur_prot != VM_PROT_NONE) {
+ assert((*cur_prot & required_cur_prot) == required_cur_prot);
+ assert((*max_prot & required_max_prot) == required_max_prot);
+ }
*copy_result = copy;
return KERN_SUCCESS;
#endif /* PMAP_CREATE_FORCE_4K_PAGES */
new_pmap = pmap_create_options(ledger, (vm_map_size_t) 0, pmap_flags);
- vm_map_reference_swap(old_map);
+ vm_map_reference(old_map);
vm_map_lock(old_map);
map_create_options = 0;
return KERN_SUCCESS;
}
+uint64_t vm_map_lookup_locked_copy_slowly_count = 0;
+uint64_t vm_map_lookup_locked_copy_slowly_size = 0;
+uint64_t vm_map_lookup_locked_copy_slowly_max = 0;
+uint64_t vm_map_lookup_locked_copy_slowly_restart = 0;
+uint64_t vm_map_lookup_locked_copy_slowly_error = 0;
+uint64_t vm_map_lookup_locked_copy_strategically_count = 0;
+uint64_t vm_map_lookup_locked_copy_strategically_size = 0;
+uint64_t vm_map_lookup_locked_copy_strategically_max = 0;
+uint64_t vm_map_lookup_locked_copy_strategically_restart = 0;
+uint64_t vm_map_lookup_locked_copy_strategically_error = 0;
+uint64_t vm_map_lookup_locked_copy_shadow_count = 0;
+uint64_t vm_map_lookup_locked_copy_shadow_size = 0;
+uint64_t vm_map_lookup_locked_copy_shadow_max = 0;
/*
* vm_map_lookup_locked:
*
boolean_t mask_protections;
boolean_t force_copy;
boolean_t no_force_copy_if_executable;
+ boolean_t submap_needed_copy;
vm_prot_t original_fault_type;
vm_map_size_t fault_page_mask;
* returned locked.
*/
+ submap_needed_copy = FALSE;
submap_recurse:
if (entry->is_sub_map) {
vm_map_offset_t local_vaddr;
}
}
} else {
+ if (entry->needs_copy) {
+ submap_needed_copy = TRUE;
+ }
vm_map_lock_read(VME_SUBMAP(entry));
*var_map = VME_SUBMAP(entry);
/* leave map locked if it is a target */
vm_object_offset_t copy_offset;
vm_map_offset_t local_start;
vm_map_offset_t local_end;
- boolean_t copied_slowly = FALSE;
- vm_object_offset_t copied_slowly_phys_offset = 0;
+ boolean_t object_copied = FALSE;
+ vm_object_offset_t object_copied_offset = 0;
+ boolean_t object_copied_needs_copy = FALSE;
kern_return_t kr = KERN_SUCCESS;
if (vm_map_lock_read_to_write(map)) {
/* an entry in our space to the underlying */
/* object in the submap, bypassing the */
/* submap. */
-
- if (submap_entry->wired_count != 0 ||
- (sub_object->copy_strategy !=
- MEMORY_OBJECT_COPY_SYMMETRIC)) {
- if ((submap_entry->protection & VM_PROT_EXECUTE) &&
- no_force_copy_if_executable) {
-// printf("FBDP map %p entry %p start 0x%llx end 0x%llx wired %d strat %d\n", map, submap_entry, (uint64_t)local_start, (uint64_t)local_end, submap_entry->wired_count, sub_object->copy_strategy);
- if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
- vm_map_unlock(cow_sub_map_parent);
- }
- if ((*real_map != map)
- && (*real_map != cow_sub_map_parent)) {
- vm_map_unlock(*real_map);
- }
- *real_map = map;
- vm_map_lock_write_to_read(map);
- kr = KERN_PROTECTION_FAILURE;
- DTRACE_VM4(submap_no_copy_executable,
- vm_map_t, map,
- vm_object_offset_t, submap_entry_offset,
- vm_object_size_t, submap_entry_size,
- int, kr);
- return kr;
+ submap_entry_offset = VME_OFFSET(submap_entry);
+ submap_entry_size = submap_entry->vme_end - submap_entry->vme_start;
+
+ if ((submap_entry->wired_count != 0 ||
+ sub_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) &&
+ (submap_entry->protection & VM_PROT_EXECUTE) &&
+ no_force_copy_if_executable) {
+// printf("FBDP map %p entry %p start 0x%llx end 0x%llx wired %d strat %d\n", map, submap_entry, (uint64_t)local_start, (uint64_t)local_end, submap_entry->wired_count, sub_object->copy_strategy);
+ if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
+ vm_map_unlock(cow_sub_map_parent);
+ }
+ if ((*real_map != map)
+ && (*real_map != cow_sub_map_parent)) {
+ vm_map_unlock(*real_map);
}
+ *real_map = map;
+ vm_map_lock_write_to_read(map);
+ kr = KERN_PROTECTION_FAILURE;
+ DTRACE_VM4(submap_no_copy_executable,
+ vm_map_t, map,
+ vm_object_offset_t, submap_entry_offset,
+ vm_object_size_t, submap_entry_size,
+ int, kr);
+ return kr;
+ }
+ if (submap_entry->wired_count != 0) {
vm_object_reference(sub_object);
assertf(VM_MAP_PAGE_ALIGNED(VME_OFFSET(submap_entry), VM_MAP_PAGE_MASK(map)),
"submap_entry %p offset 0x%llx\n",
submap_entry, VME_OFFSET(submap_entry));
- submap_entry_offset = VME_OFFSET(submap_entry);
- submap_entry_size = submap_entry->vme_end - submap_entry->vme_start;
DTRACE_VM6(submap_copy_slowly,
vm_map_t, cow_sub_map_parent,
submap_entry_size,
FALSE,
©_object);
- copied_slowly = TRUE;
+ object_copied = TRUE;
+ object_copied_offset = 0;
/* 4k: account for extra offset in physical page */
- copied_slowly_phys_offset = submap_entry_offset - vm_object_trunc_page(submap_entry_offset);
+ object_copied_offset += submap_entry_offset - vm_object_trunc_page(submap_entry_offset);
+ object_copied_needs_copy = FALSE;
vm_object_deallocate(sub_object);
vm_map_lock(map);
vm_object_deallocate(copy_object);
copy_object = VM_OBJECT_NULL;
vm_map_lock_write_to_read(map);
- DTRACE_VM4(submap_copy_slowly,
+ DTRACE_VM4(submap_copy_error_slowly,
vm_object_t, sub_object,
vm_object_offset_t, submap_entry_offset,
vm_object_size_t, submap_entry_size,
int, kr);
+ vm_map_lookup_locked_copy_slowly_error++;
return kr;
}
vm_object_deallocate(copy_object);
copy_object = VM_OBJECT_NULL;
vm_map_lock_write_to_read(map);
+ vm_map_lookup_locked_copy_slowly_restart++;
+ goto RetrySubMap;
+ }
+ vm_map_lookup_locked_copy_slowly_count++;
+ vm_map_lookup_locked_copy_slowly_size += submap_entry_size;
+ if (submap_entry_size > vm_map_lookup_locked_copy_slowly_max) {
+ vm_map_lookup_locked_copy_slowly_max = submap_entry_size;
+ }
+ } else if (sub_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
+ submap_entry_offset = VME_OFFSET(submap_entry);
+ copy_object = VM_OBJECT_NULL;
+ object_copied_offset = submap_entry_offset;
+ object_copied_needs_copy = FALSE;
+ DTRACE_VM6(submap_copy_strategically,
+ vm_map_t, cow_sub_map_parent,
+ vm_map_offset_t, vaddr,
+ vm_map_t, map,
+ vm_object_size_t, submap_entry_size,
+ int, submap_entry->wired_count,
+ int, sub_object->copy_strategy);
+ kr = vm_object_copy_strategically(
+ sub_object,
+ submap_entry_offset,
+ submap_entry->vme_end - submap_entry->vme_start,
+ ©_object,
+ &object_copied_offset,
+ &object_copied_needs_copy);
+ if (kr == KERN_MEMORY_RESTART_COPY) {
+ old_start -= start_delta;
+ old_end += end_delta;
+ vm_object_deallocate(copy_object);
+ copy_object = VM_OBJECT_NULL;
+ vm_map_lock_write_to_read(map);
+ vm_map_lookup_locked_copy_strategically_restart++;
goto RetrySubMap;
}
+ if (kr != KERN_SUCCESS) {
+ if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
+ vm_map_unlock(cow_sub_map_parent);
+ }
+ if ((*real_map != map)
+ && (*real_map != cow_sub_map_parent)) {
+ vm_map_unlock(*real_map);
+ }
+ *real_map = map;
+ vm_object_deallocate(copy_object);
+ copy_object = VM_OBJECT_NULL;
+ vm_map_lock_write_to_read(map);
+ DTRACE_VM4(submap_copy_error_strategically,
+ vm_object_t, sub_object,
+ vm_object_offset_t, submap_entry_offset,
+ vm_object_size_t, submap_entry_size,
+ int, kr);
+ vm_map_lookup_locked_copy_strategically_error++;
+ return kr;
+ }
+ assert(copy_object != VM_OBJECT_NULL);
+ assert(copy_object != sub_object);
+ object_copied = TRUE;
+ vm_map_lookup_locked_copy_strategically_count++;
+ vm_map_lookup_locked_copy_strategically_size += submap_entry_size;
+ if (submap_entry_size > vm_map_lookup_locked_copy_strategically_max) {
+ vm_map_lookup_locked_copy_strategically_max = submap_entry_size;
+ }
} else {
/* set up shadow object */
+ object_copied = FALSE;
copy_object = sub_object;
vm_object_lock(sub_object);
vm_object_reference_locked(sub_object);
VM_MAP_PAGE_SIZE(map),
submap_entry->vme_start,
prot);
+ vm_map_lookup_locked_copy_shadow_count++;
+ vm_map_lookup_locked_copy_shadow_size += submap_entry_size;
+ if (submap_entry_size > vm_map_lookup_locked_copy_shadow_max) {
+ vm_map_lookup_locked_copy_shadow_max = submap_entry_size;
+ }
}
/*
uint64_t, (uint64_t)entry->vme_start,
uint64_t, (uint64_t)entry->vme_end,
vm_map_offset_t, vaddr,
- int, copied_slowly);
+ int, object_copied);
return KERN_INVALID_ADDRESS;
}
entry->protection &= ~VM_PROT_EXECUTE;
}
- if (copied_slowly) {
- VME_OFFSET_SET(entry, local_start - old_start + copied_slowly_phys_offset);
- entry->needs_copy = FALSE;
+ if (object_copied) {
+ VME_OFFSET_SET(entry, local_start - old_start + object_copied_offset);
+ entry->needs_copy = object_copied_needs_copy;
entry->is_shared = FALSE;
} else {
- VME_OFFSET_SET(entry, copy_offset);
+ assert(VME_OBJECT(entry) != VM_OBJECT_NULL);
+ assert(VME_OBJECT(entry)->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
assert(entry->wired_count == 0);
+ VME_OFFSET_SET(entry, copy_offset);
entry->needs_copy = TRUE;
- if (entry->inheritance == VM_INHERIT_SHARE) {
- entry->inheritance = VM_INHERIT_COPY;
- }
if (map != old_map) {
entry->is_shared = TRUE;
}
}
}
+ if (submap_needed_copy && (prot & VM_PROT_WRITE)) {
+ /*
+ * We went through a "needs_copy" submap without triggering
+ * a copy, so granting write access to the page would bypass
+ * that submap's "needs_copy".
+ */
+ assert(!(fault_type & VM_PROT_WRITE));
+ assert(!*wired);
+ assert(!force_copy);
+ // printf("FBDP %d[%s] submap_needed_copy for %p 0x%llx\n", proc_selfpid(), proc_name_address(current_task()->bsd_info), map, vaddr);
+ prot &= ~VM_PROT_WRITE;
+ }
+
/*
* Create an object if necessary.
*/
vm_region_submap_short_info_64_t short_info;
boolean_t do_region_footprint;
int effective_page_size, effective_page_shift;
+ boolean_t submap_needed_copy;
if (map == VM_MAP_NULL) {
/* no address space to work on */
user_address = *address;
user_max_depth = *nesting_depth;
+ submap_needed_copy = FALSE;
if (not_in_kdp) {
vm_map_lock_read(map);
* Get down to the next submap level.
*/
+ if (curr_entry->needs_copy) {
+ /* everything below this is effectively copy-on-write */
+ submap_needed_copy = TRUE;
+ }
+
/*
* Lock the next level and unlock the current level,
* unless we need to keep it locked to access the "next_entry"
submap_info->shadow_depth = 0;
submap_info->external_pager = 0;
submap_info->share_mode = SM_PRIVATE;
+ if (submap_needed_copy) {
+ submap_info->share_mode = SM_COW;
+ }
submap_info->is_submap = 0;
submap_info->behavior = VM_BEHAVIOR_DEFAULT;
submap_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
short_info->external_pager = 0;
short_info->shadow_depth = 0;
short_info->share_mode = SM_PRIVATE;
+ if (submap_needed_copy) {
+ short_info->share_mode = SM_COW;
+ }
short_info->ref_count = 1;
}
*nesting_depth = 0;
extended.share_mode == SM_SHARED) {
extended.share_mode = SM_PRIVATE;
}
+ if (submap_needed_copy) {
+ extended.share_mode = SM_COW;
+ }
} else {
if (curr_entry->use_pmap) {
extended.share_mode = SM_TRUESHARED;
{
vm_map_entry_t prev_entry;
- counter(c_vm_map_simplify_entry_called++);
-
prev_entry = this_entry->vme_prev;
if ((this_entry != vm_map_to_entry(map)) &&
}
vm_map_entry_dispose(map, prev_entry);
SAVE_HINT_MAP_WRITE(map, this_entry);
- counter(c_vm_map_simplified++);
}
}
vm_map_simplify_entry(map, this_entry);
vm_map_simplify_entry(map, this_entry->vme_next);
}
- counter(c_vm_map_simplify_called++);
vm_map_unlock(map);
}
object->shadow == VM_OBJECT_NULL &&
object->internal &&
object->purgable == VM_PURGABLE_DENY &&
- object->copy_strategy != MEMORY_OBJECT_COPY_DELAY &&
- !object->true_share &&
object->wimg_bits == VM_WIMG_USE_DEFAULT &&
!object->code_signed) {
return TRUE;
return new_entry;
}
-int vm_remap_old_path = 0;
-int vm_remap_new_path = 0;
/*
* Routine: vm_map_remap_extract
*
vm_map_t map,
vm_map_offset_t addr,
vm_map_size_t size,
- vm_prot_t required_protection,
boolean_t copy,
struct vm_map_header *map_header,
- vm_prot_t *cur_protection,
- vm_prot_t *max_protection,
+ vm_prot_t *cur_protection, /* IN/OUT */
+ vm_prot_t *max_protection, /* IN/OUT */
/* What, no behavior? */
vm_inherit_t inheritance,
vm_map_kernel_flags_t vmk_flags)
vm_prot_t max_prot_for_prot_copy;
vm_map_offset_t effective_page_mask;
boolean_t pageable, same_map;
+ boolean_t vm_remap_legacy;
+ vm_prot_t required_cur_prot, required_max_prot;
pageable = vmk_flags.vmkf_copy_pageable;
same_map = vmk_flags.vmkf_copy_same_map;
assert(inheritance == VM_INHERIT_NONE ||
inheritance == VM_INHERIT_COPY ||
inheritance == VM_INHERIT_SHARE);
- assert(!(required_protection & ~VM_PROT_ALL));
+ assert(!(*cur_protection & ~VM_PROT_ALL));
+ assert(!(*max_protection & ~VM_PROT_ALL));
+ assert((*cur_protection & *max_protection) == *cur_protection);
/*
* Compute start and end of region.
vm_map_store_init( map_header );
if (copy && vmk_flags.vmkf_remap_prot_copy) {
+ /*
+ * Special case for vm_map_protect(VM_PROT_COPY):
+ * we want to set the new mappings' max protection to the
+ * specified *max_protection...
+ */
max_prot_for_prot_copy = *max_protection & VM_PROT_ALL;
+ /* ... but we want to use the vm_remap() legacy mode */
+ *max_protection = VM_PROT_NONE;
+ *cur_protection = VM_PROT_NONE;
} else {
max_prot_for_prot_copy = VM_PROT_NONE;
}
- *cur_protection = VM_PROT_ALL;
- *max_protection = VM_PROT_ALL;
+
+ if (*cur_protection == VM_PROT_NONE &&
+ *max_protection == VM_PROT_NONE) {
+ /*
+ * vm_remap() legacy mode:
+ * Extract all memory regions in the specified range and
+ * collect the strictest set of protections allowed on the
+ * entire range, so the caller knows what they can do with
+ * the remapped range.
+ * We start with VM_PROT_ALL and we'll remove the protections
+ * missing from each memory region.
+ */
+ vm_remap_legacy = TRUE;
+ *cur_protection = VM_PROT_ALL;
+ *max_protection = VM_PROT_ALL;
+ required_cur_prot = VM_PROT_NONE;
+ required_max_prot = VM_PROT_NONE;
+ } else {
+ /*
+ * vm_remap_new() mode:
+ * Extract all memory regions in the specified range and
+ * ensure that they have at least the protections specified
+ * by the caller via *cur_protection and *max_protection.
+ * The resulting mapping should have these protections.
+ */
+ vm_remap_legacy = FALSE;
+ if (copy) {
+ required_cur_prot = VM_PROT_NONE;
+ required_max_prot = VM_PROT_READ;
+ } else {
+ required_cur_prot = *cur_protection;
+ required_max_prot = *max_protection;
+ }
+ }
map_address = 0;
mapped_size = 0;
vm_map_t submap;
vm_map_offset_t submap_start;
vm_map_size_t submap_size;
+ boolean_t submap_needs_copy;
/*
- * No check for "required_protection" on "src_entry"
+ * No check for "required protection" on "src_entry"
* because the protections that matter are the ones
* on the submap's VM map entry, which will be checked
* during the call to vm_map_remap_extract() below.
}
submap_start = VME_OFFSET(src_entry) + src_start - src_entry->vme_start;
submap = VME_SUBMAP(src_entry);
+ if (copy) {
+ /*
+ * The caller wants a copy-on-write re-mapping,
+ * so let's extract from the submap accordingly.
+ */
+ submap_needs_copy = TRUE;
+ } else if (src_entry->needs_copy) {
+ /*
+ * The caller wants a shared re-mapping but the
+ * submap is mapped with "needs_copy", so its
+ * contents can't be shared as is. Extract the
+ * contents of the submap as "copy-on-write".
+ * The re-mapping won't be shared with the
+ * original mapping but this is equivalent to
+ * what happened with the original "remap from
+ * submap" code.
+ * The shared region is mapped "needs_copy", for
+ * example.
+ */
+ submap_needs_copy = TRUE;
+ } else {
+ /*
+ * The caller wants a shared re-mapping and
+ * this mapping can be shared (no "needs_copy"),
+ * so let's extract from the submap accordingly.
+ * Kernel submaps are mapped without
+ * "needs_copy", for example.
+ */
+ submap_needs_copy = FALSE;
+ }
vm_map_reference(submap);
vm_map_unlock(map);
src_entry = NULL;
+ if (vm_remap_legacy) {
+ *cur_protection = VM_PROT_NONE;
+ *max_protection = VM_PROT_NONE;
+ }
+
+ DTRACE_VM7(remap_submap_recurse,
+ vm_map_t, map,
+ vm_map_offset_t, addr,
+ vm_map_size_t, size,
+ boolean_t, copy,
+ vm_map_offset_t, submap_start,
+ vm_map_size_t, submap_size,
+ boolean_t, submap_needs_copy);
+
result = vm_map_remap_extract(submap,
submap_start,
submap_size,
- required_protection,
- copy,
+ submap_needs_copy,
map_header,
cur_protection,
max_protection,
return result;
}
- if ((src_entry->protection & required_protection)
- != required_protection) {
+ if (src_entry->is_sub_map) {
+ /* protections for submap mapping are irrelevant here */
+ } else if (((src_entry->protection & required_cur_prot) !=
+ required_cur_prot) ||
+ ((src_entry->max_protection & required_max_prot) !=
+ required_max_prot)) {
if (vmk_flags.vmkf_copy_single_object &&
mapped_size != 0) {
/*
break;
}
- if (src_entry->is_sub_map &&
- VM_MAP_PAGE_SHIFT(VME_SUBMAP(src_entry)) < PAGE_SHIFT) {
+ if (src_entry->is_sub_map) {
vm_map_t submap;
vm_map_offset_t submap_start;
vm_map_size_t submap_size;
vm_map_copy_t submap_copy;
vm_prot_t submap_curprot, submap_maxprot;
-
- vm_remap_new_path++;
+ boolean_t submap_needs_copy;
/*
- * No check for "required_protection" on "src_entry"
+ * No check for "required protection" on "src_entry"
* because the protections that matter are the ones
* on the submap's VM map entry, which will be checked
* during the call to vm_map_copy_extract() below.
submap = VME_SUBMAP(src_entry);
submap_start = VME_OFFSET(src_entry) + src_start - src_entry->vme_start;
submap_size = tmp_size;
+ if (copy) {
+ /*
+ * The caller wants a copy-on-write re-mapping,
+ * so let's extract from the submap accordingly.
+ */
+ submap_needs_copy = TRUE;
+ } else if (src_entry->needs_copy) {
+ /*
+ * The caller wants a shared re-mapping but the
+ * submap is mapped with "needs_copy", so its
+ * contents can't be shared as is. Extract the
+ * contents of the submap as "copy-on-write".
+ * The re-mapping won't be shared with the
+ * original mapping but this is equivalent to
+ * what happened with the original "remap from
+ * submap" code.
+ * The shared region is mapped "needs_copy", for
+ * example.
+ */
+ submap_needs_copy = TRUE;
+ } else {
+ /*
+ * The caller wants a shared re-mapping and
+ * this mapping can be shared (no "needs_copy"),
+ * so let's extract from the submap accordingly.
+ * Kernel submaps are mapped without
+ * "needs_copy", for example.
+ */
+ submap_needs_copy = FALSE;
+ }
/* extra ref to keep submap alive */
vm_map_reference(submap);
- DTRACE_VM6(remap_submap_recurse,
+ DTRACE_VM7(remap_submap_recurse,
vm_map_t, map,
vm_map_offset_t, addr,
vm_map_size_t, size,
boolean_t, copy,
vm_map_offset_t, submap_start,
- vm_map_size_t, submap_size);
+ vm_map_size_t, submap_size,
+ boolean_t, submap_needs_copy);
/*
* The map can be safely unlocked since we
vm_map_unlock(map);
src_entry = NULL; /* not valid once map is unlocked */
+ if (vm_remap_legacy) {
+ submap_curprot = VM_PROT_NONE;
+ submap_maxprot = VM_PROT_NONE;
+ if (max_prot_for_prot_copy) {
+ submap_maxprot = max_prot_for_prot_copy;
+ }
+ } else {
+ assert(!max_prot_for_prot_copy);
+ submap_curprot = *cur_protection;
+ submap_maxprot = *max_protection;
+ }
result = vm_map_copy_extract(submap,
submap_start,
submap_size,
- required_protection,
- copy,
+ submap_needs_copy,
&submap_copy,
&submap_curprot,
&submap_maxprot,
copy_entry = vm_map_copy_first_entry(submap_copy);
assert(!copy_entry->is_sub_map);
+ object = VME_OBJECT(copy_entry);
+
+ /*
+ * Prevent kernel_object from being exposed to
+ * user space.
+ */
+ if (__improbable(object == kernel_object)) {
+ printf("%d[%s]: rejecting attempt to extract from kernel_object\n",
+ proc_selfpid(),
+ (current_task()->bsd_info
+ ? proc_name_address(current_task()->bsd_info)
+ : "?"));
+ DTRACE_VM(extract_kernel_only);
+ result = KERN_INVALID_RIGHT;
+ vm_map_copy_discard(submap_copy);
+ submap_copy = VM_MAP_COPY_NULL;
+ vm_map_lock(map);
+ break;
+ }
+
vm_map_copy_entry_unlink(submap_copy, copy_entry);
copy_entry_size = copy_entry->vme_end - copy_entry->vme_start;
copy_entry->vme_start = map_address;
/* done with submap_copy */
vm_map_copy_discard(submap_copy);
- *cur_protection &= submap_curprot;
- *max_protection &= submap_maxprot;
+ if (vm_remap_legacy) {
+ *cur_protection &= submap_curprot;
+ *max_protection &= submap_maxprot;
+ }
/* re-acquire the map lock and continue to next entry */
vm_map_lock(map);
continue;
- } else if (src_entry->is_sub_map) {
- vm_remap_old_path++;
- DTRACE_VM4(remap_submap,
- vm_map_t, map,
- vm_map_offset_t, addr,
- vm_map_size_t, size,
- boolean_t, copy);
-
- vm_map_reference(VME_SUBMAP(src_entry));
- object = VM_OBJECT_NULL;
} else {
object = VME_OBJECT(src_entry);
+
+ /*
+ * Prevent kernel_object from being exposed to
+ * user space.
+ */
+ if (__improbable(object == kernel_object)) {
+ printf("%d[%s]: rejecting attempt to extract from kernel_object\n",
+ proc_selfpid(),
+ (current_task()->bsd_info
+ ? proc_name_address(current_task()->bsd_info)
+ : "?"));
+ DTRACE_VM(extract_kernel_only);
+ result = KERN_INVALID_RIGHT;
+ break;
+ }
+
if (src_entry->iokit_acct) {
/*
* This entry uses "IOKit accounting".
VME_OFFSET_SET(src_entry, 0);
VME_OBJECT_SET(src_entry, object);
assert(src_entry->use_pmap);
+ assert(!map->mapped_in_other_pmaps);
} else if (src_entry->wired_count ||
object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
/*
*/
object->copy_strategy =
MEMORY_OBJECT_COPY_DELAY;
+ object->true_share = TRUE;
}
vm_object_unlock(object);
}
new_entry->max_protection |= VM_PROT_WRITE;
} else {
new_entry->inheritance = inheritance;
+ if (!vm_remap_legacy) {
+ new_entry->protection = *cur_protection;
+ new_entry->max_protection = *max_protection;
+ }
}
VME_OFFSET_SET(new_entry, offset);
_vm_map_store_entry_link(map_header,
map_header->links.prev, new_entry);
- /*Protections for submap mapping are irrelevant here*/
- if (!src_entry->is_sub_map) {
+ /* protections for submap mapping are irrelevant here */
+ if (vm_remap_legacy && !src_entry->is_sub_map) {
*cur_protection &= src_entry->protection;
*max_protection &= src_entry->max_protection;
}
map->is_alien = true;
vm_map_unlock(map);
}
+
+void
+vm_map_single_jit(
+ vm_map_t map)
+{
+ vm_map_lock(map);
+ map->single_jit = true;
+ vm_map_unlock(map);
+}
#endif /* XNU_TARGET_OS_OSX */
void vm_map_copy_to_physcopy(vm_map_copy_t copy_map, vm_map_t target_map);
vmk_flags.vmkf_copy_pageable = TRUE;
vmk_flags.vmkf_copy_same_map = TRUE;
assert(adjusted_size != 0);
+ cur_prot = VM_PROT_NONE; /* legacy mode */
+ max_prot = VM_PROT_NONE; /* legacy mode */
kr = vm_map_copy_extract(map, adjusted_start, adjusted_size,
- VM_PROT_NONE, /* required_protection: no check here */
FALSE /* copy */,
©_map,
&cur_prot, &max_prot, VM_INHERIT_DEFAULT,
vm_named_entry_t named_entry;
- named_entry = (vm_named_entry_t) port->ip_kobject;
+ named_entry = (vm_named_entry_t) ipc_kobject_get(port);
named_entry_lock(named_entry);
copy_map = named_entry->backing.copy;
target_copy_map = copy_map;
vm_map_t src_map,
vm_map_offset_t memory_address,
boolean_t copy,
- vm_prot_t *cur_protection,
- vm_prot_t *max_protection,
+ vm_prot_t *cur_protection, /* IN/OUT */
+ vm_prot_t *max_protection, /* IN/OUT */
vm_inherit_t inheritance)
{
kern_return_t result;
result = vm_map_copy_extract(src_map,
memory_address,
size,
- VM_PROT_NONE, /* required_protection: no check here */
copy, ©_map,
- cur_protection,
- max_protection,
+ cur_protection, /* IN/OUT */
+ max_protection, /* IN/OUT */
inheritance,
vmk_flags);
if (result != KERN_SUCCESS) {
mach_destroy_memory_entry(port);
return VM_MAP_NULL;
}
- vm_map_reference_swap(map);
+ vm_map_reference(map);
mach_destroy_memory_entry(port);
break;
} else {
/*
* vm_map_reference:
*
- * Most code internal to the osfmk will go through a
- * macro defining this. This is always here for the
- * use of other kernel components.
+ * Takes a reference on the specified map.
*/
-#undef vm_map_reference
void
vm_map_reference(
vm_map_t map)
{
- if (map == VM_MAP_NULL) {
- return;
+ if (__probable(map != VM_MAP_NULL)) {
+ vm_map_require(map);
+ os_ref_retain(&map->map_refcnt);
}
-
- lck_mtx_lock(&map->s_lock);
-#if TASK_SWAPPER
- assert(map->res_count > 0);
- assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
- map->res_count++;
-#endif
- os_ref_retain_locked(&map->map_refcnt);
- lck_mtx_unlock(&map->s_lock);
}
/*
vm_map_deallocate(
vm_map_t map)
{
- unsigned int ref;
-
- if (map == VM_MAP_NULL) {
- return;
- }
-
- lck_mtx_lock(&map->s_lock);
- ref = os_ref_release_locked(&map->map_refcnt);
- if (ref > 0) {
- vm_map_res_deallocate(map);
- lck_mtx_unlock(&map->s_lock);
- return;
+ if (__probable(map != VM_MAP_NULL)) {
+ vm_map_require(map);
+ if (os_ref_release(&map->map_refcnt) == 0) {
+ vm_map_destroy(map, VM_MAP_REMOVE_NO_FLAGS);
+ }
}
- assert(os_ref_get_count(&map->map_refcnt) == 0);
- lck_mtx_unlock(&map->s_lock);
-
-#if TASK_SWAPPER
- /*
- * The map residence count isn't decremented here because
- * the vm_map_delete below will traverse the entire map,
- * deleting entries, and the residence counts on objects
- * and sharing maps will go away then.
- */
-#endif
-
- vm_map_destroy(map, VM_MAP_REMOVE_NO_FLAGS);
}
void
#ifdef MACH_KERNEL_PRIVATE
-#include <task_swapper.h>
#include <mach_assert.h>
#include <vm/vm_object.h>
vm_map_size_t size; /* virtual size */
vm_map_size_t user_wire_limit;/* rlimit on user locked memory */
vm_map_size_t user_wire_size; /* current size of user locked memory in this map */
-#if !CONFIG_EMBEDDED
+#if XNU_TARGET_OS_OSX
vm_map_offset_t vmmap_high_start;
-#endif
+#endif /* XNU_TARGET_OS_OSX */
union {
/*
#define first_free f_s._first_free
#define holes_list f_s._holes
- struct os_refcnt map_refcnt; /* Reference count */
-
-#if TASK_SWAPPER
- int res_count; /* Residence count (swap) */
- int sw_state; /* Swap state */
-#endif /* TASK_SWAPPER */
+ struct os_refcnt map_refcnt; /* Reference count */
unsigned int
/* boolean_t */ wait_for_space:1, /* Should callers wait for space? */
- /* boolean_t */ wiring_required:1, /* All memory wired? */
- /* boolean_t */ no_zero_fill:1, /*No zero fill absent pages */
- /* boolean_t */ mapped_in_other_pmaps:1, /*has this submap been mapped in maps that use a different pmap */
- /* boolean_t */ switch_protect:1, /* Protect map from write faults while switched */
- /* boolean_t */ disable_vmentry_reuse:1, /* All vm entries should keep using newer and higher addresses in the map */
- /* boolean_t */ map_disallow_data_exec:1, /* Disallow execution from data pages on exec-permissive architectures */
+ /* boolean_t */ wiring_required:1, /* All memory wired? */
+ /* boolean_t */ no_zero_fill:1, /* No zero fill absent pages */
+ /* boolean_t */ mapped_in_other_pmaps:1, /* has this submap been mapped in maps that use a different pmap */
+ /* boolean_t */ switch_protect:1, /* Protect map from write faults while switched */
+ /* boolean_t */ disable_vmentry_reuse:1, /* All vm entries should keep using newer and higher addresses in the map */
+ /* boolean_t */ map_disallow_data_exec:1, /* Disallow execution from data pages on exec-permissive architectures */
/* boolean_t */ holelistenabled:1,
/* boolean_t */ is_nested_map:1,
- /* boolean_t */ map_disallow_new_exec:1, /* Disallow new executable code */
+ /* boolean_t */ map_disallow_new_exec:1, /* Disallow new executable code */
/* boolean_t */ jit_entry_exists:1,
/* boolean_t */ has_corpse_footprint:1,
/* boolean_t */ terminated:1,
- /* boolean_t */ is_alien:1, /* for platform simulation, i.e. PLATFORM_IOS on OSX */
- /* boolean_t */ cs_enforcement:1, /* code-signing enforcement */
- /* boolean_t */ reserved_regions:1, /* has reserved regions. The map size that userspace sees should ignore these. */
- /* reserved */ pad:16;
+ /* boolean_t */ is_alien:1, /* for platform simulation, i.e. PLATFORM_IOS on OSX */
+ /* boolean_t */ cs_enforcement:1, /* code-signing enforcement */
+ /* boolean_t */ reserved_regions:1, /* has reserved regions. The map size that userspace sees should ignore these. */
+ /* boolean_t */ single_jit:1, /* only allow one JIT mapping */
+ /* reserved */ pad:15;
unsigned int timestamp; /* Version number */
};
#define vm_map_first_entry(map) ((map)->hdr.links.next)
#define vm_map_last_entry(map) ((map)->hdr.links.prev)
-#if TASK_SWAPPER
-/*
- * VM map swap states. There are no transition states.
- */
-#define MAP_SW_IN 1 /* map is swapped in; residence count > 0 */
-#define MAP_SW_OUT 2 /* map is out (res_count == 0 */
-#endif /* TASK_SWAPPER */
-
/*
* Type: vm_map_version_t [exported; contents invisible]
*
/* Physical map associated
* with this address map */
-/*
- * Macros/functions for map residence counts and swapin/out of vm maps
- */
-#if TASK_SWAPPER
-
-#if MACH_ASSERT
/* Gain a reference to an existing map */
extern void vm_map_reference(
vm_map_t map);
-/* Lose a residence count */
-extern void vm_map_res_deallocate(
- vm_map_t map);
-/* Gain a residence count on a map */
-extern void vm_map_res_reference(
- vm_map_t map);
-/* Gain reference & residence counts to possibly swapped-out map */
-extern void vm_map_reference_swap(
- vm_map_t map);
-
-#else /* MACH_ASSERT */
-
-#define vm_map_reference(map) \
-MACRO_BEGIN \
- vm_map_t Map = (map); \
- if (Map) { \
- lck_mtx_lock(&Map->s_lock); \
- Map->res_count++; \
- os_ref_retain(&Map->map_refcnt); \
- lck_mtx_unlock(&Map->s_lock); \
- } \
-MACRO_END
-
-#define vm_map_res_reference(map) \
-MACRO_BEGIN \
- vm_map_t Lmap = (map); \
- if (Lmap->res_count == 0) { \
- lck_mtx_unlock(&Lmap->s_lock);\
- vm_map_lock(Lmap); \
- vm_map_swapin(Lmap); \
- lck_mtx_lock(&Lmap->s_lock); \
- ++Lmap->res_count; \
- vm_map_unlock(Lmap); \
- } else \
- ++Lmap->res_count; \
-MACRO_END
-
-#define vm_map_res_deallocate(map) \
-MACRO_BEGIN \
- vm_map_t Map = (map); \
- if (--Map->res_count == 0) { \
- lck_mtx_unlock(&Map->s_lock); \
- vm_map_lock(Map); \
- vm_map_swapout(Map); \
- vm_map_unlock(Map); \
- lck_mtx_lock(&Map->s_lock); \
- } \
-MACRO_END
-
-#define vm_map_reference_swap(map) \
-MACRO_BEGIN \
- vm_map_t Map = (map); \
- lck_mtx_lock(&Map->s_lock); \
- os_ref_retain(&Map->map_refcnt);\
- vm_map_res_reference(Map); \
- lck_mtx_unlock(&Map->s_lock); \
-MACRO_END
-#endif /* MACH_ASSERT */
-
-extern void vm_map_swapin(
- vm_map_t map);
-
-extern void vm_map_swapout(
- vm_map_t map);
-
-#else /* TASK_SWAPPER */
-
-#define vm_map_reference(map) \
-MACRO_BEGIN \
- vm_map_t Map = (map); \
- if (Map) { \
- lck_mtx_lock(&Map->s_lock); \
- os_ref_retain(&Map->map_refcnt);\
- lck_mtx_unlock(&Map->s_lock); \
- } \
-MACRO_END
-
-#define vm_map_reference_swap(map) vm_map_reference(map)
-#define vm_map_res_reference(map)
-#define vm_map_res_deallocate(map)
-
-#endif /* TASK_SWAPPER */
/*
* Submap object. Must be used to create memory to be put
thread_wakeup((event_t)(&(map)->hdr))
-#define vm_map_ref_fast(map) \
- MACRO_BEGIN \
- lck_mtx_lock(&map->s_lock); \
- map->ref_count++; \
- vm_map_res_reference(map); \
- lck_mtx_unlock(&map->s_lock); \
- MACRO_END
-
-#define vm_map_dealloc_fast(map) \
- MACRO_BEGIN \
- int c; \
- \
- lck_mtx_lock(&map->s_lock); \
- c = --map->ref_count; \
- if (c > 0) \
- vm_map_res_deallocate(map); \
- lck_mtx_unlock(&map->s_lock); \
- if (c == 0) \
- vm_map_destroy(map); \
- MACRO_END
-
-
/* simplify map entries */
extern void vm_map_simplify_entry(
vm_map_t map,
extern kern_return_t vm_map_terminate(
vm_map_t map);
+extern void vm_map_require(
+ vm_map_t map);
+
#endif /* !XNU_KERNEL_PRIVATE */
/* Deallocate a region */
vm_map_t src_map,
vm_map_address_t src_addr,
vm_map_size_t len,
- vm_prot_t required_prot,
boolean_t copy,
vm_map_copy_t *copy_result, /* OUT */
vm_prot_t *cur_prot, /* OUT */
extern kern_return_t vm_map_raise_min_offset(
vm_map_t map,
vm_map_offset_t new_min_offset);
-#if !CONFIG_EMBEDDED
+#if XNU_TARGET_OS_OSX
extern void vm_map_set_high_start(
vm_map_t map,
vm_map_offset_t high_start);
-#endif
+#endif /* XNU_TARGET_OS_OSX */
extern vm_map_offset_t vm_compute_max_offset(
boolean_t is64);
#if XNU_TARGET_OS_OSX
extern void vm_map_mark_alien(vm_map_t map);
+extern void vm_map_single_jit(vm_map_t map);
#endif /* XNU_TARGET_OS_OSX */
extern kern_return_t vm_map_page_info(
VM_MAP_POLICY_ALLOW_MULTIPLE_JIT(
vm_map_t map __unused)
{
- if (VM_MAP_IS_ALIEN(map)) {
+ if (VM_MAP_IS_ALIEN(map) || map->single_jit) {
return false;
}
return true;
#include <debug.h>
#include <mach_pagemap.h>
-#include <task_swapper.h>
#include <mach/mach_types.h>
#include <mach/memory_object.h>
* memory object (kernel_object) to avoid wasting data structures.
*/
static struct vm_object kernel_object_store VM_PAGE_PACKED_ALIGNED;
-vm_object_t kernel_object;
+SECURITY_READ_ONLY_LATE(vm_object_t) kernel_object = &kernel_object_store;
static struct vm_object compressor_object_store VM_PAGE_PACKED_ALIGNED;
-vm_object_t compressor_object = &compressor_object_store;
+SECURITY_READ_ONLY_LATE(vm_object_t) compressor_object = &compressor_object_store;
+
+/*
+ * This object holds all pages that have been retired due to errors like ECC.
+ * The system should never use the page or look at its contents. The offset
+ * in this object is the same as the page's physical address.
+ */
+static struct vm_object retired_pages_object_store VM_PAGE_PACKED_ALIGNED;
+SECURITY_READ_ONLY_LATE(vm_object_t) retired_pages_object = &retired_pages_object_store;
/*
* The submap object is used as a placeholder for vm_map_submap
* here because it must be initialized here.
*/
static struct vm_object vm_submap_object_store VM_PAGE_PACKED_ALIGNED;
+SECURITY_READ_ONLY_LATE(vm_object_t) vm_submap_object = &vm_submap_object_store;
+
/*
* Virtual memory objects are initialized from
.vo_size = 0,
.memq_hint = VM_PAGE_NULL,
.ref_count = 1,
-#if TASK_SWAPPER
- .res_count = 1,
-#endif /* TASK_SWAPPER */
.resident_page_count = 0,
.wired_page_count = 0,
.reusable_page_count = 0,
ZC_NOENCRYPT | ZC_ALIGNMENT_REQUIRED,
ZONE_ID_ANY, ^(zone_t z){
#if defined(__LP64__)
- zone_set_submap_idx(z, Z_SUBMAP_IDX_VA_RESTRICTED_MAP);
+ zone_set_submap_idx(z, Z_SUBMAP_IDX_VA_RESTRICTED);
#else
(void)z;
#endif
* Initialize the "kernel object"
*/
- kernel_object = &kernel_object_store;
-
-/*
- * Note that in the following size specifications, we need to add 1 because
- * VM_MAX_KERNEL_ADDRESS (vm_last_addr) is a maximum address, not a size.
- */
-
- _vm_object_allocate(VM_MAX_KERNEL_ADDRESS + 1,
- kernel_object);
-
- _vm_object_allocate(VM_MAX_KERNEL_ADDRESS + 1,
- compressor_object);
+ /*
+ * Note that in the following size specifications, we need to add 1 because
+ * VM_MAX_KERNEL_ADDRESS (vm_last_addr) is a maximum address, not a size.
+ */
+ _vm_object_allocate(VM_MAX_KERNEL_ADDRESS + 1, kernel_object);
+ _vm_object_allocate(VM_MAX_KERNEL_ADDRESS + 1, compressor_object);
kernel_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
compressor_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
kernel_object->no_tag_update = TRUE;
+ /*
+ * The object to hold retired VM pages.
+ */
+ _vm_object_allocate(VM_MAX_KERNEL_ADDRESS + 1, retired_pages_object);
+ retired_pages_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
+
/*
* Initialize the "submap object". Make it as large as the
* kernel object so that no limit is imposed on submap sizes.
*/
- vm_submap_object = &vm_submap_object_store;
- _vm_object_allocate(VM_MAX_KERNEL_ADDRESS + 1,
- vm_submap_object);
+ _vm_object_allocate(VM_MAX_KERNEL_ADDRESS + 1, vm_submap_object);
vm_submap_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
/*
return;
}
- if (object == kernel_object || object == compressor_object) {
+ if (object == kernel_object || object == compressor_object || object == retired_pages_object) {
vm_object_lock_shared(object);
OSAddAtomic(-1, &object->ref_count);
if (object->ref_count == 0) {
if (object == kernel_object) {
panic("vm_object_deallocate: losing kernel_object\n");
+ } else if (object == retired_pages_object) {
+ panic("vm_object_deallocate: losing retired_pages_object\n");
} else {
panic("vm_object_deallocate: losing compressor_object\n");
}
if ((object->ref_count > 1) || object->terminating) {
vm_object_lock_assert_exclusive(object);
object->ref_count--;
- vm_object_res_deallocate(object);
if (object->ref_count == 1 &&
object->shadow != VM_OBJECT_NULL) {
continue;
}
- VM_OBJ_RES_DECR(object); /* XXX ? */
/*
* Terminate this object. If it had a shadow,
* then deallocate it; otherwise, if we need
if ((p->vmp_q_state != VM_PAGE_ON_ACTIVE_Q) && p->vmp_reference == TRUE) {
vm_page_activate(p);
- VM_STAT_INCR(reactivations);
+ counter_inc(&vm_statistics_reactivations);
vm_object_page_grab_reactivations++;
}
vm_page_unlock_queues();
vm_object_lock_assert_exclusive(object);
object->ref_count--;
assert(object->ref_count > 0);
- vm_object_res_deallocate(object);
vm_object_unlock(object);
return KERN_FAILURE;
}
object->pager = MEMORY_OBJECT_NULL;
if (pager != MEMORY_OBJECT_NULL) {
- memory_object_control_disable(object->pager_control);
+ memory_object_control_disable(&object->pager_control);
}
object->ref_count--;
-#if TASK_SWAPPER
- assert(object->res_count == 0);
-#endif /* TASK_SWAPPER */
-
assert(object->ref_count == 0);
/*
pmap_flush_context_init(&pmap_flush_context_storage);
}
- vm_page_lockspin_queues();
+ vm_page_lock_queues();
next = (vm_page_t)vm_page_queue_first(&object->memq);
loop_count = BATCH_LIMIT(V_O_R_MAX_BATCH);
- vm_page_lockspin_queues();
+ vm_page_lock_queues();
}
if (reap_type == REAP_DATA_FLUSH || reap_type == REAP_TERMINATE) {
if (p->vmp_busy || p->vmp_cleaning) {
old_pager = object->pager;
object->pager = MEMORY_OBJECT_NULL;
if (old_pager != MEMORY_OBJECT_NULL) {
- memory_object_control_disable(object->pager_control);
+ memory_object_control_disable(&object->pager_control);
}
/*
assert(new_copy->ref_count > 0);
new_copy->ref_count++; /* for old_copy->shadow ref. */
-#if TASK_SWAPPER
- if (old_copy->res_count) {
- VM_OBJ_RES_INCR(new_copy);
- VM_OBJ_RES_DECR(src_object);
- }
-#endif
-
vm_object_unlock(old_copy); /* done with old_copy */
}
assert(source->copy_strategy != MEMORY_OBJECT_COPY_NONE); /* Purgeable objects shouldn't have shadow objects. */
+#if 00
+ /*
+ * The following optimization does not work in the context of submaps
+ * (the shared region, in particular).
+ * This object might have only 1 reference (in the submap) but that
+ * submap can itself be mapped multiple times, so the object is
+ * actually indirectly referenced more than once...
+ */
if (vm_object_shadow_check &&
source->vo_size == length &&
source->ref_count == 1) {
/* things changed while we were locking "source"... */
vm_object_unlock(source);
}
+#endif /* 00 */
/*
* *offset is the map entry's offset into the VM object and
object->paging_offset =
backing_object->paging_offset + backing_offset;
if (object->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
- memory_object_control_collapse(object->pager_control,
+ memory_object_control_collapse(&object->pager_control,
object);
}
/* the backing_object has lost its pager: reset all fields */
vm_object_lock_assert_exclusive(object);
vm_object_lock_assert_exclusive(backing_object);
-#if TASK_SWAPPER
- /*
- * Do object reference in-line to
- * conditionally increment shadow's
- * residence count. If object is not
- * resident, leave residence count
- * on shadow alone.
- */
- if (backing_object->shadow != VM_OBJECT_NULL) {
- vm_object_lock(backing_object->shadow);
- vm_object_lock_assert_exclusive(backing_object->shadow);
- backing_object->shadow->ref_count++;
- if (object->res_count != 0) {
- vm_object_res_reference(backing_object->shadow);
- }
- vm_object_unlock(backing_object->shadow);
- }
-#else /* TASK_SWAPPER */
vm_object_reference(backing_object->shadow);
-#endif /* TASK_SWAPPER */
assert(!object->phys_contiguous);
assert(!backing_object->phys_contiguous);
(!backing_object->named && backing_object->ref_count > 1)) {
vm_object_lock_assert_exclusive(backing_object);
backing_object->ref_count--;
-#if TASK_SWAPPER
- if (object->res_count != 0) {
- vm_object_res_deallocate(backing_object);
- }
- assert(backing_object->ref_count > 0);
-#endif /* TASK_SWAPPER */
vm_object_unlock(backing_object);
} else {
/*
* the backing object.
*/
-#if TASK_SWAPPER
- if (object->res_count == 0) {
- /* XXX get a reference for the deallocate below */
- vm_object_res_reference(backing_object);
- }
-#endif /* TASK_SWAPPER */
/*
* vm_object_collapse (the caller of this function) is
* now called from contexts that may not guarantee that a
VM_PAGE_SET_PHYS_PAGE(m, base_page);
}
} else {
- while ((m = vm_page_grab_fictitious()) == VM_PAGE_NULL) {
- vm_page_more_fictitious();
- }
+ m = vm_page_grab_fictitious(TRUE);
/*
* private normally requires lock_queues but since we
object->named = TRUE;
vm_object_lock_assert_exclusive(object);
object->ref_count++;
- vm_object_res_reference(object);
while (!object->pager_ready) {
vm_object_sleep(object,
VM_OBJECT_EVENT_PAGER_READY,
vm_object_deallocate(object);
return KERN_SUCCESS;
}
- VM_OBJ_RES_DECR(object);
shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
if (object->ref_count == 1) {
}
-#if TASK_SWAPPER
-/*
- * vm_object_res_deallocate
- *
- * (recursively) decrement residence counts on vm objects and their shadows.
- * Called from vm_object_deallocate and when swapping out an object.
- *
- * The object is locked, and remains locked throughout the function,
- * even as we iterate down the shadow chain. Locks on intermediate objects
- * will be dropped, but not the original object.
- *
- * NOTE: this function used to use recursion, rather than iteration.
- */
-
-__private_extern__ void
-vm_object_res_deallocate(
- vm_object_t object)
-{
- vm_object_t orig_object = object;
- /*
- * Object is locked so it can be called directly
- * from vm_object_deallocate. Original object is never
- * unlocked.
- */
- assert(object->res_count > 0);
- while (--object->res_count == 0) {
- assert(object->ref_count >= object->res_count);
- vm_object_deactivate_all_pages(object);
- /* iterate on shadow, if present */
- if (object->shadow != VM_OBJECT_NULL) {
- vm_object_t tmp_object = object->shadow;
- vm_object_lock(tmp_object);
- if (object != orig_object) {
- vm_object_unlock(object);
- }
- object = tmp_object;
- assert(object->res_count > 0);
- } else {
- break;
- }
- }
- if (object != orig_object) {
- vm_object_unlock(object);
- }
-}
-
-/*
- * vm_object_res_reference
- *
- * Internal function to increment residence count on a vm object
- * and its shadows. It is called only from vm_object_reference, and
- * when swapping in a vm object, via vm_map_swap.
- *
- * The object is locked, and remains locked throughout the function,
- * even as we iterate down the shadow chain. Locks on intermediate objects
- * will be dropped, but not the original object.
- *
- * NOTE: this function used to use recursion, rather than iteration.
- */
-
-__private_extern__ void
-vm_object_res_reference(
- vm_object_t object)
-{
- vm_object_t orig_object = object;
- /*
- * Object is locked, so this can be called directly
- * from vm_object_reference. This lock is never released.
- */
- while ((++object->res_count == 1) &&
- (object->shadow != VM_OBJECT_NULL)) {
- vm_object_t tmp_object = object->shadow;
-
- assert(object->ref_count >= object->res_count);
- vm_object_lock(tmp_object);
- if (object != orig_object) {
- vm_object_unlock(object);
- }
- object = tmp_object;
- }
- if (object != orig_object) {
- vm_object_unlock(object);
- }
- assert(orig_object->ref_count >= orig_object->res_count);
-}
-#endif /* TASK_SWAPPER */
-
/*
* vm_object_reference:
*
/* "ref_count" refers to the object not its contents */
assert(object1->ref_count >= 1);
assert(object2->ref_count >= 1);
-#if TASK_SWAPPER
- /* "res_count" refers to the object not its contents */
-#endif
/* "resident_page_count" was updated above when transposing pages */
/* "wired_page_count" was updated above when transposing pages */
#if !VM_TAG_ACTIVE_UPDATE
__TRANSPOSE_FIELD(pager_control);
/* update the memory_objects' pointers back to the VM objects */
if (object1->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
- memory_object_control_collapse(object1->pager_control,
+ memory_object_control_collapse(&object1->pager_control,
object1);
}
if (object2->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
- memory_object_control_collapse(object2->pager_control,
+ memory_object_control_collapse(&object2->pager_control,
object2);
}
__TRANSPOSE_FIELD(copy_strategy);
* that could give us non-page-size aligned values if we start out with values that
* are odd multiples of PAGE_SIZE.
*/
-#if CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
unsigned int preheat_max_bytes = (1024 * 512);
-#else /* CONFIG_EMBEDDED */
+#else /* !XNU_TARGET_OS_OSX */
unsigned int preheat_max_bytes = MAX_UPL_TRANSFER_BYTES;
-#endif /* CONFIG_EMBEDDED */
+#endif /* !XNU_TARGET_OS_OSX */
unsigned int preheat_min_bytes = (1024 * 32);
min_ph_size = round_page(preheat_min_bytes);
max_ph_size = round_page(preheat_max_bytes);
-#if !CONFIG_EMBEDDED
+#if XNU_TARGET_OS_OSX
if (isSSD) {
min_ph_size /= 2;
max_ph_size /= 8;
max_ph_size = trunc_page(max_ph_size);
}
}
-#endif /* !CONFIG_EMBEDDED */
+#endif /* XNU_TARGET_OS_OSX */
if (min_ph_size < PAGE_SIZE) {
min_ph_size = PAGE_SIZE;
vm_object_unlock(object);
}
- if (__improbable(task->task_volatile_objects != 0 ||
- task->task_nonvolatile_objects != 0 ||
- task->task_owned_objects != 0)) {
+ if (__improbable(task->task_owned_objects != 0)) {
panic("%s(%p): volatile=%d nonvolatile=%d owned=%d q=%p q_first=%p q_last=%p",
__FUNCTION__,
task,
#include <debug.h>
#include <mach_assert.h>
#include <mach_pagemap.h>
-#include <task_swapper.h>
#include <mach/kern_return.h>
#include <mach/boolean.h>
extern
vm_object_t compressor_object; /* the single compressor object */
+extern
+vm_object_t retired_pages_object; /* holds VM pages which should never be used */
+
extern
unsigned int vm_object_absent_max; /* maximum number of absent pages
* at a time for each object */
__private_extern__ void _vm_object_allocate(vm_object_size_t size,
vm_object_t object);
-#if TASK_SWAPPER
-
-__private_extern__ void vm_object_res_reference(
- vm_object_t object);
-__private_extern__ void vm_object_res_deallocate(
- vm_object_t object);
-#define VM_OBJ_RES_INCR(object) (object)->res_count++
-#define VM_OBJ_RES_DECR(object) (object)->res_count--
-
-#else /* TASK_SWAPPER */
-
-#define VM_OBJ_RES_INCR(object)
-#define VM_OBJ_RES_DECR(object)
-#define vm_object_res_reference(object)
-#define vm_object_res_deallocate(object)
-
-#endif /* TASK_SWAPPER */
-
#define vm_object_reference_locked(object) \
MACRO_BEGIN \
vm_object_t RLObject = (object); \
assert((RLObject)->ref_count > 0); \
(RLObject)->ref_count++; \
assert((RLObject)->ref_count > 1); \
- vm_object_res_reference(RLObject); \
MACRO_END
-#define vm_object_reference_shared(object) \
- MACRO_BEGIN \
- vm_object_t RLObject = (object); \
- vm_object_lock_assert_shared(object); \
- assert((RLObject)->ref_count > 0); \
+#define vm_object_reference_shared(object) \
+ MACRO_BEGIN \
+ vm_object_t RLObject = (object); \
+ vm_object_lock_assert_shared(object); \
+ assert((RLObject)->ref_count > 0); \
OSAddAtomic(1, &(RLObject)->ref_count); \
- assert((RLObject)->ref_count > 0); \
- /* XXX we would need an atomic version of the following ... */ \
- vm_object_res_reference(RLObject); \
+ assert((RLObject)->ref_count > 0); \
MACRO_END
ppnum_t start,
ppnum_t end);
+extern void vm_page_create_retired(
+ ppnum_t pn);
+
extern vm_page_t kdp_vm_page_lookup(
vm_object_t object,
vm_object_offset_t offset);
vm_object_t object,
vm_object_offset_t offset);
-extern vm_page_t vm_page_grab_fictitious(void);
+extern vm_page_t vm_page_grab_fictitious(boolean_t canwait);
-extern vm_page_t vm_page_grab_guard(void);
+extern vm_page_t vm_page_grab_guard(boolean_t canwait);
extern void vm_page_release_fictitious(
vm_page_t page);
extern void vm_free_delayed_pages(void);
-extern void vm_page_more_fictitious(void);
-
-extern int vm_pool_low(void);
+extern bool vm_pool_low(void);
extern vm_page_t vm_page_grab(void);
extern vm_page_t vm_page_grab_options(int flags);
vm_object_t object,
vm_object_offset_t offset);
-extern vm_page_t vm_page_alloc_guard(
- vm_object_t object,
- vm_object_offset_t offset);
-
extern void vm_page_init(
vm_page_t page,
ppnum_t phys_page,
#else /* CONFIG_JETSAM */
-#if CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
#define VM_CHECK_MEMORYSTATUS do {} while(0)
-#else /* CONFIG_EMBEDDED */
+#else /* !XNU_TARGET_OS_OSX */
#define VM_CHECK_MEMORYSTATUS vm_pressure_response()
-#endif /* CONFIG_EMBEDDED */
+#endif /* !XNU_TARGET_OS_OSX */
#endif /* CONFIG_JETSAM */
* protected by the object lock.
*/
-#if CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
#define SET_PAGE_DIRTY(m, set_pmap_modified) \
MACRO_BEGIN \
vm_page_t __page__ = (m); \
} \
__page__->vmp_dirty = TRUE; \
MACRO_END
-#else /* CONFIG_EMBEDDED */
+#else /* !XNU_TARGET_OS_OSX */
#define SET_PAGE_DIRTY(m, set_pmap_modified) \
MACRO_BEGIN \
vm_page_t __page__ = (m); \
__page__->vmp_dirty = TRUE; \
MACRO_END
-#endif /* CONFIG_EMBEDDED */
+#endif /* !XNU_TARGET_OS_OSX */
#define PAGE_ASSERT_WAIT(m, interruptible) \
(((m)->vmp_wanted = TRUE), \
vm_page_free_unlocked(p, TRUE); \
MACRO_END
-#define VM_PAGE_GRAB_FICTITIOUS(M) \
- MACRO_BEGIN \
- while ((M = vm_page_grab_fictitious()) == VM_PAGE_NULL) \
- vm_page_more_fictitious(); \
- MACRO_END
-
#define VM_PAGE_WAIT() ((void)vm_page_wait(THREAD_UNINT))
#define vm_page_queue_lock (vm_page_locks.vm_page_queue_lock2)
extern void stop_secluded_suppression(task_t);
#endif /* CONFIG_SECLUDED_MEMORY */
+extern void vm_retire_boot_pages(void);
+extern uint32_t vm_retired_pages_count(void);
#endif /* _VM_VM_PAGE_H_ */
#include <mach/sdt.h>
#include <kern/kern_types.h>
-#include <kern/counters.h>
+#include <kern/counter.h>
#include <kern/host_statistics.h>
#include <kern/machine.h>
#include <kern/misc_protos.h>
boolean_t vps_dynamic_priority_enabled = FALSE;
#ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
-#ifdef CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
#define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 1024
-#else
+#else /* !XNU_TARGET_OS_OSX */
#define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096
-#endif
+#endif /* !XNU_TARGET_OS_OSX */
#endif
#ifndef VM_PAGEOUT_DEADLOCK_RELIEF
*/
#ifndef VM_PAGE_FREE_TARGET
-#ifdef CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
#define VM_PAGE_FREE_TARGET(free) (15 + (free) / 100)
-#else
+#else /* !XNU_TARGET_OS_OSX */
#define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80)
-#endif
+#endif /* !XNU_TARGET_OS_OSX */
#endif /* VM_PAGE_FREE_TARGET */
*/
#ifndef VM_PAGE_FREE_MIN
-#ifdef CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
#define VM_PAGE_FREE_MIN(free) (10 + (free) / 200)
-#else
+#else /* !XNU_TARGET_OS_OSX */
#define VM_PAGE_FREE_MIN(free) (10 + (free) / 100)
-#endif
+#endif /* !XNU_TARGET_OS_OSX */
#endif /* VM_PAGE_FREE_MIN */
-#ifdef CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
#define VM_PAGE_FREE_RESERVED_LIMIT 100
#define VM_PAGE_FREE_MIN_LIMIT 1500
#define VM_PAGE_FREE_TARGET_LIMIT 2000
-#else
+#else /* !XNU_TARGET_OS_OSX */
#define VM_PAGE_FREE_RESERVED_LIMIT 1700
#define VM_PAGE_FREE_MIN_LIMIT 3500
#define VM_PAGE_FREE_TARGET_LIMIT 4000
-#endif
+#endif /* !XNU_TARGET_OS_OSX */
/*
* When vm_page_free_count falls below vm_page_free_reserved,
#define VM_PAGE_REACTIVATE_LIMIT_MAX 20000
#ifndef VM_PAGE_REACTIVATE_LIMIT
-#ifdef CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
#define VM_PAGE_REACTIVATE_LIMIT(avail) (VM_PAGE_INACTIVE_TARGET(avail) / 2)
-#else
+#else /* !XNU_TARGET_OS_OSX */
#define VM_PAGE_REACTIVATE_LIMIT(avail) (MAX((avail) * 1 / 20,VM_PAGE_REACTIVATE_LIMIT_MAX))
-#endif
+#endif /* !XNU_TARGET_OS_OSX */
#endif /* VM_PAGE_REACTIVATE_LIMIT */
#define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM 1000
uint32_t vm_page_upl_tainted = 0;
uint32_t vm_page_iopl_tainted = 0;
-#if !CONFIG_EMBEDDED
+#if XNU_TARGET_OS_OSX
static boolean_t vm_pageout_waiter = FALSE;
-#endif /* !CONFIG_EMBEDDED */
+#endif /* XNU_TARGET_OS_OSX */
#if DEVELOPMENT || DEBUG
if (m->vmp_dirty) {
vm_page_unwire(m, TRUE); /* reactivates */
- VM_STAT_INCR(reactivations);
+ counter_inc(&vm_statistics_reactivations);
PAGE_WAKEUP_DONE(m);
} else {
vm_page_free(m); /* clears busy, etc. */
vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_added = (unsigned int)(tmp - last.vm_phantom_cache_added_ghost);
last.vm_phantom_cache_added_ghost = tmp;
- tmp64 = get_pages_grabbed_count();
+ tmp64 = counter_load(&vm_page_grab_count);
vm_pageout_stats[vm_pageout_stat_now].pages_grabbed = (unsigned int)(tmp64 - last_vm_page_pages_grabbed);
last_vm_page_pages_grabbed = tmp64;
iq->pgo_throttled = TRUE;
assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000 * NSEC_PER_USEC);
- counter(c_vm_pageout_scan_block++);
-
vm_page_unlock_queues();
assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
vm_pageout_vminfo.vm_pageout_filecache_min_reactivated++;
vm_page_activate(m);
- VM_STAT_INCR(reactivations);
+ counter_inc(&vm_statistics_reactivations);
#if CONFIG_BACKGROUND_QUEUE
#if DEVELOPMENT || DEBUG
if (*is_page_from_bg_q == TRUE) {
#endif /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */
} else {
vm_page_activate(m);
- VM_STAT_INCR(reactivations);
+ counter_inc(&vm_statistics_reactivations);
#if CONFIG_BACKGROUND_QUEUE
#if DEVELOPMENT || DEBUG
* The page was/is being used, so put back on active list.
*/
vm_page_activate(m);
- VM_STAT_INCR(reactivations);
+ counter_inc(&vm_statistics_reactivations);
inactive_burst_count = 0;
}
#if CONFIG_BACKGROUND_QUEUE
assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
vm_pageout_running = FALSE;
-#if !CONFIG_EMBEDDED
+#if XNU_TARGET_OS_OSX
if (vm_pageout_waiter) {
vm_pageout_waiter = FALSE;
thread_wakeup((event_t)&vm_pageout_waiter);
}
-#endif /* !CONFIG_EMBEDDED */
+#endif /* XNU_TARGET_OS_OSX */
lck_mtx_unlock(&vm_page_queue_free_lock);
vm_page_unlock_queues();
- counter(c_vm_pageout_block++);
thread_block((thread_continue_t)vm_pageout_continue);
/*NOTREACHED*/
}
-#if !CONFIG_EMBEDDED
+#if XNU_TARGET_OS_OSX
kern_return_t
vm_pageout_wait(uint64_t deadline)
{
return kr;
}
-#endif /* !CONFIG_EMBEDDED */
+#endif /* XNU_TARGET_OS_OSX */
static void
vm_object_owner_compressed_update(object,
+1);
}
- VM_STAT_INCR(compressions);
+ counter_inc(&vm_statistics_compressions);
if (m->vmp_tabled) {
vm_page_remove(m, TRUE);
return;
}
-#if CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
available_memory = (uint64_t) memorystatus_available_pages;
-#else /* CONFIG_EMBEDDED */
+#else /* !XNU_TARGET_OS_OSX */
available_memory = (uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY;
memorystatus_available_pages = (uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY;
-#endif /* CONFIG_EMBEDDED */
+#endif /* !XNU_TARGET_OS_OSX */
total_pages = (unsigned int) atop_64(max_mem);
#if CONFIG_SECLUDED_MEMORY
vm_pageout_garbage_collect(int collect)
{
if (collect) {
- if (is_zone_map_nearing_exhaustion()) {
+ if (zone_map_nearing_exhaustion()) {
/*
* Woken up by the zone allocator for zone-map-exhaustion jetsams.
*
* ok; if memory pressure persists, the thread will simply be woken
* up again.
*/
- consider_zone_gc(TRUE);
+ zone_gc(ZONE_GC_JETSAM);
} else {
/* Woken up by vm_pageout_scan or compute_pageout_gc_throttle. */
boolean_t buf_large_zfree = FALSE;
}
if (first_try == TRUE || buf_large_zfree == TRUE) {
/*
- * consider_zone_gc should be last, because the other operations
+ * zone_gc should be last, because the other operations
* might return memory to zones.
*/
- consider_zone_gc(FALSE);
+ zone_gc(ZONE_GC_TRIM);
}
first_try = FALSE;
} while (buf_large_zfree == TRUE && vm_page_free_count < vm_page_free_target);
thread_set_thread_name(vm_pageout_state.vm_pageout_external_iothread, "VM_pageout_external_iothread");
thread_deallocate(vm_pageout_state.vm_pageout_external_iothread);
- result = kernel_thread_start_priority((thread_continue_t)vm_pageout_garbage_collect, NULL,
+ result = kernel_thread_create((thread_continue_t)vm_pageout_garbage_collect, NULL,
BASEPRI_DEFAULT,
&thread);
if (result != KERN_SUCCESS) {
panic("vm_pageout_garbage_collect: create failed");
}
thread_set_thread_name(thread, "VM_pageout_garbage_collect");
+ if (thread->reserved_stack == 0) {
+ assert(thread->kernel_stack);
+ thread->reserved_stack = thread->kernel_stack;
+ }
+
+ thread_mtx_lock(thread);
+ thread_start(thread);
+ thread_mtx_unlock(thread);
+
thread_deallocate(thread);
#if VM_PRESSURE_EVENTS
assert(hinfo.max_cpus > 0);
-#if CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
vm_pageout_state.vm_compressor_thread_count = 1;
-#else
+#else /* !XNU_TARGET_OS_OSX */
if (hinfo.max_cpus > 4) {
vm_pageout_state.vm_compressor_thread_count = 2;
} else {
vm_pageout_state.vm_compressor_thread_count = 1;
}
-#endif
+#endif /* !XNU_TARGET_OS_OSX */
PE_parse_boot_argn("vmcomp_threads", &vm_pageout_state.vm_compressor_thread_count,
sizeof(vm_pageout_state.vm_compressor_thread_count));
#define MAX_DELAYED_WORK_CTX_ALLOCATED (512)
int vm_page_delayed_work_ctx_needed = 0;
-zone_t dw_ctx_zone = ZONE_NULL;
+SECURITY_READ_ONLY_LATE(zone_t) dw_ctx_zone;
void
vm_page_delayed_work_init_ctx(void)
{
- int nelems = 0, elem_size = 0;
-
- elem_size = sizeof(struct vm_page_delayed_work_ctx);
+ size_t elem_size = sizeof(struct vm_page_delayed_work_ctx);
dw_ctx_zone = zone_create_ext("delayed-work-ctx", elem_size,
ZC_NOGC, ZONE_ID_ANY, ^(zone_t z) {
- zone_set_exhaustible(z, MAX_DELAYED_WORK_CTX_ALLOCATED * elem_size);
+ zone_set_exhaustible(z, MAX_DELAYED_WORK_CTX_ALLOCATED);
});
- nelems = zfill(dw_ctx_zone, MIN_DELAYED_WORK_CTX_ALLOCATED);
- if (nelems < MIN_DELAYED_WORK_CTX_ALLOCATED) {
- printf("vm_page_delayed_work_init_ctx: Failed to preallocate minimum delayed work contexts (%d vs %d).\n", nelems, MIN_DELAYED_WORK_CTX_ALLOCATED);
-#if DEVELOPMENT || DEBUG
- panic("Failed to preallocate minimum delayed work contexts (%d vs %d).\n", nelems, MIN_DELAYED_WORK_CTX_ALLOCATED);
-#endif /* DEVELOPMENT || DEBUG */
- }
+ zone_fill_initially(dw_ctx_zone, MIN_DELAYED_WORK_CTX_ALLOCATED);
}
struct vm_page_delayed_work*
"object %p shadow_offset 0x%llx",
upl->map_object, upl->map_object->vo_shadow_offset);
- VM_PAGE_GRAB_FICTITIOUS(alias_page);
+ alias_page = vm_page_grab_fictitious(TRUE);
upl->flags |= UPL_SHADOWED;
}
if ((cntrl_flags & UPL_WILL_MODIFY) && must_throttle_writes() == TRUE) {
boolean_t isSSD = FALSE;
-#if CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
isSSD = TRUE;
-#else
+#else /* !XNU_TARGET_OS_OSX */
vnode_pager_get_isSSD(object->pager, &isSSD);
-#endif
+#endif /* !XNU_TARGET_OS_OSX */
vm_object_unlock(object);
OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
if ((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) {
vm_object_unlock(object);
- VM_PAGE_GRAB_FICTITIOUS(alias_page);
+ alias_page = vm_page_grab_fictitious(TRUE);
vm_object_lock(object);
}
if (cntrl_flags & UPL_COPYOUT_FROM) {
dst_page->vmp_clustered = TRUE;
if (!(cntrl_flags & UPL_FILE_IO)) {
- VM_STAT_INCR(pageins);
+ counter_inc(&vm_statistics_pageins);
}
}
}
try_next_page:
if (dwp->dw_mask) {
if (dwp->dw_mask & DW_vm_page_activate) {
- VM_STAT_INCR(reactivations);
+ counter_inc(&vm_statistics_reactivations);
}
VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
goto done;
}
-#if CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
if (map->pmap != kernel_pmap &&
(caller_flags & UPL_COPYOUT_FROM) &&
(entry->protection & VM_PROT_EXECUTE) &&
#endif /* DEVELOPMENT || DEBUG */
goto done;
}
-#endif /* CONFIG_EMBEDDED */
+#endif /* !XNU_TARGET_OS_OSX */
local_object = VME_OBJECT(entry);
assert(local_object != VM_OBJECT_NULL);
vm_map_t real_map;
vm_prot_t fault_type;
- if (entry->vme_start < VM_MAP_TRUNC_PAGE(offset, VM_MAP_PAGE_MASK(map)) ||
- entry->vme_end > VM_MAP_ROUND_PAGE(offset + *upl_size, VM_MAP_PAGE_MASK(map))) {
- /*
- * Clip the requested range first to minimize the
- * amount of potential copying...
- */
- if (vm_map_lock_read_to_write(map)) {
- goto REDISCOVER_ENTRY;
- }
- vm_map_lock_assert_exclusive(map);
- assert(VME_OBJECT(entry) == local_object);
- vm_map_clip_start(map, entry,
- VM_MAP_TRUNC_PAGE(offset, VM_MAP_PAGE_MASK(map)));
- vm_map_clip_end(map, entry,
- VM_MAP_ROUND_PAGE(offset + *upl_size, VM_MAP_PAGE_MASK(map)));
- vm_map_lock_write_to_read(map);
- }
-
local_map = map;
if (caller_flags & UPL_COPYOUT_FROM) {
assert(pg_num == new_offset / PAGE_SIZE);
if (lite_list[pg_num >> 5] & (1U << (pg_num & 31))) {
- VM_PAGE_GRAB_FICTITIOUS(alias_page);
+ alias_page = vm_page_grab_fictitious(TRUE);
vm_object_lock(object);
dwp->dw_mask |= DW_vm_page_activate | DW_PAGE_WAKEUP;
if (upl->flags & UPL_PAGEOUT) {
- VM_STAT_INCR(reactivations);
+ counter_inc(&vm_statistics_reactivations);
DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
}
} else {
if (hibernate_cleaning_in_progress == FALSE && !m->vmp_dirty && (upl->flags & UPL_PAGEOUT)) {
pgpgout_count++;
- VM_STAT_INCR(pageouts);
+ counter_inc(&vm_statistics_pageouts);
DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL);
dwp->dw_mask |= DW_enqueue_cleaned;
vm_page_unlock_queues();
if (need_unwire == TRUE) {
- VM_STAT_INCR(reactivations);
+ counter_inc(&vm_statistics_reactivations);
}
}
#if UPL_DEBUG
extern upl_t upl_associated_upl(upl_t upl);
extern void upl_set_associated_upl(upl_t upl, upl_t associated_upl);
+#ifndef MACH_KERNEL_PRIVATE
+typedef struct vm_page *vm_page_t;
+#endif
#ifdef XNU_KERNEL_PRIVATE
+#include <vm/vm_kern.h>
extern upl_size_t upl_adjusted_size(
upl_t upl,
upl_t upl_ptr,
vm_tag_t tag);
-#endif /* XNU_KERNEL_PRIVATE */
-
-extern struct vnode * upl_lookup_vnode(upl_t upl);
-
-#ifndef MACH_KERNEL_PRIVATE
-typedef struct vm_page *vm_page_t;
-#endif
-
-extern void vm_page_free_list(
+extern void vm_page_free_list(
vm_page_t mem,
boolean_t prepare_object);
extern kern_return_t vm_page_alloc_list(
int page_count,
- int flags,
- vm_page_t * list);
+ kma_flags_t flags,
+ vm_page_t *list);
+
+#endif /* XNU_KERNEL_PRIVATE */
+
+extern struct vnode * upl_lookup_vnode(upl_t upl);
extern void vm_page_set_offset(vm_page_t page, vm_object_offset_t offset);
extern vm_object_offset_t vm_page_get_offset(vm_page_t page);
extern kern_return_t mach_vm_pressure_level_monitor(boolean_t wait_for_pressure, unsigned int *pressure_level);
-#if !CONFIG_EMBEDDED
+#if XNU_TARGET_OS_OSX
extern kern_return_t vm_pageout_wait(uint64_t deadline);
-#endif
+#endif /* XNU_TARGET_OS_OSX */
#ifdef MACH_KERNEL_PRIVATE
uint32_t phantom_cache_eval_period_in_msecs = 250;
uint32_t phantom_cache_thrashing_threshold_ssd = 1000;
-#if CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
uint32_t phantom_cache_thrashing_threshold = 500;
-#else
+#else /* !XNU_TARGET_OS_OSX */
uint32_t phantom_cache_thrashing_threshold = 50;
-#endif
+#endif /* !XNU_TARGET_OS_OSX */
/*
* Number of consecutive thrashing periods required before
* vm_phantom_cache_check_pressure() returns true.
*/
-#if CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
unsigned phantom_cache_contiguous_periods = 4;
-#else
+#else /* !XNU_TARGET_OS_OSX */
unsigned phantom_cache_contiguous_periods = 2;
-#endif
+#endif /* !XNU_TARGET_OS_OSX */
clock_sec_t pc_start_of_eval_period_sec = 0;
clock_nsec_t pc_start_of_eval_period_nsec = 0;
if (!VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
return;
}
-#if CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
num_entries = (uint32_t)(((max_mem / PAGE_SIZE) / 10) / VM_GHOST_PAGES_PER_ENTRY);
-#else
+#else /* !XNU_TARGET_OS_OSX */
num_entries = (uint32_t)(((max_mem / PAGE_SIZE) / 4) / VM_GHOST_PAGES_PER_ENTRY);
-#endif
+#endif /* !XNU_TARGET_OS_OSX */
vm_phantom_cache_num_entries = 1;
while (vm_phantom_cache_num_entries < num_entries) {
extern mach_port_name_t ipc_port_copyout_send(
ipc_port_t sright,
ipc_space_t space);
+extern mach_port_name_t ipc_port_copyout_send_pinned(
+ ipc_port_t sright,
+ ipc_space_t space);
extern task_t port_name_to_task(
mach_port_name_t name);
+extern task_t port_name_to_task_read(
+ mach_port_name_t name);
extern task_t port_name_to_task_name(
mach_port_name_t name);
extern void ipc_port_release_send(
extern vm_map_offset_t get_map_min(vm_map_t);
extern vm_map_offset_t get_map_max(vm_map_t);
extern vm_map_size_t get_vmmap_size(vm_map_t);
+extern int get_task_page_size(task_t);
#if CONFIG_COREDUMP
extern int get_vmmap_entries(vm_map_t);
#endif
vm_object_offset_t crypto_backing_offset,
struct pager_crypt_info *crypt_info,
vm_object_offset_t crypto_start,
- vm_object_offset_t crypto_end);
+ vm_object_offset_t crypto_end,
+ boolean_t cache_pager);
#endif /* CONFIG_CODE_DECRYPTION */
struct vm_shared_region_slide_info;
extern int no_paging_space_action(void);
+/*
+ * counts updated by revalidate_text_page()
+ */
+extern unsigned int vmtc_total; /* total # of text page corruptions detected */
+extern unsigned int vmtc_undiagnosed; /* of that what wasn't diagnosed */
+extern unsigned int vmtc_not_eligible; /* failed to correct, due to page attributes */
+extern unsigned int vmtc_copyin_fail; /* of undiagnosed, copyin failure count */
+extern unsigned int vmtc_not_found; /* of diagnosed, no error found - code signing error? */
+extern unsigned int vmtc_one_bit_flip; /* of diagnosed, single bit errors */
+#define MAX_TRACK_POWER2 9 /* of diagnosed, counts of 1, 2, 4,... bytes corrupted */
+extern unsigned int vmtc_byte_counts[MAX_TRACK_POWER2 + 1];
+
+extern kern_return_t revalidate_text_page(task_t, vm_map_offset_t);
+
#define VM_TOGGLE_CLEAR 0
#define VM_TOGGLE_SET 1
#define VM_TOGGLE_GETVALUE 999
*/
owner = object->vo_owner;
if (owner != NULL && owner != VM_OBJECT_OWNER_DISOWNED) {
-#if CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
#if CONFIG_JETSAM
object_task_importance = proc_get_memstat_priority((struct proc *)get_bsdtask_info(owner), TRUE);
#endif /* CONFIG_JETSAM */
-#else /* CONFIG_EMBEDDED */
+#else /* !XNU_TARGET_OS_OSX */
object_task_importance = task_importance_estimate(owner);
-#endif /* CONFIG_EMBEDDED */
+#endif /* !XNU_TARGET_OS_OSX */
}
if (object_task_importance < best_object_task_importance) {
#include <mach/vm_prot.h>
#include <mach/vm_statistics.h>
#include <mach/sdt.h>
-#include <kern/counters.h>
+#include <kern/counter.h>
#include <kern/host_statistics.h>
#include <kern/sched_prim.h>
#include <kern/policy_internal.h>
* Updated and checked behind the vm_page_queues_lock. */
static void vm_page_free_prepare(vm_page_t page);
-static vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr);
+static vm_page_t vm_page_grab_fictitious_common(ppnum_t, boolean_t);
static void vm_tag_init(void);
/* for debugging purposes */
+SECURITY_READ_ONLY_EARLY(uint32_t) vm_packed_from_vm_pages_array_mask =
+ VM_PAGE_PACKED_FROM_ARRAY;
SECURITY_READ_ONLY_EARLY(vm_packing_params_t) vm_page_packing_params =
VM_PACKING_PARAMS(VM_PAGE_PACKED_PTR);
#define BUCKETS_PER_LOCK 16
-vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
-unsigned int vm_page_bucket_count = 0; /* How big is array? */
-unsigned int vm_page_hash_mask; /* Mask for hash function */
-unsigned int vm_page_hash_shift; /* Shift for hash function */
-uint32_t vm_page_bucket_hash; /* Basic bucket hash */
-unsigned int vm_page_bucket_lock_count = 0; /* How big is array of locks? */
+SECURITY_READ_ONLY_LATE(vm_page_bucket_t *) vm_page_buckets; /* Array of buckets */
+SECURITY_READ_ONLY_LATE(unsigned int) vm_page_bucket_count = 0; /* How big is array? */
+SECURITY_READ_ONLY_LATE(unsigned int) vm_page_hash_mask; /* Mask for hash function */
+SECURITY_READ_ONLY_LATE(unsigned int) vm_page_hash_shift; /* Shift for hash function */
+SECURITY_READ_ONLY_LATE(uint32_t) vm_page_bucket_hash; /* Basic bucket hash */
+SECURITY_READ_ONLY_LATE(unsigned int) vm_page_bucket_lock_count = 0; /* How big is array of locks? */
#ifndef VM_TAG_ACTIVE_UPDATE
#error VM_TAG_ACTIVE_UPDATE
#error VM_MAX_TAG_ZONES
#endif
-boolean_t vm_tag_active_update = VM_TAG_ACTIVE_UPDATE;
-lck_spin_t *vm_page_bucket_locks;
+/* for debugging */
+SECURITY_READ_ONLY_LATE(bool) vm_tag_active_update = VM_TAG_ACTIVE_UPDATE;
+SECURITY_READ_ONLY_LATE(lck_spin_t *) vm_page_bucket_locks;
vm_allocation_site_t vm_allocation_sites_static[VM_KERN_MEMORY_FIRST_DYNAMIC + 1];
vm_allocation_site_t * vm_allocation_sites[VM_MAX_TAG_VALUE];
#if VM_MAX_TAG_ZONES
-vm_allocation_zone_total_t ** vm_allocation_zone_totals;
+static vm_allocation_zone_total_t **vm_allocation_zone_totals;
#endif /* VM_MAX_TAG_ZONES */
vm_tag_t vm_allocation_tag_highest;
#endif /* VM_PAGE_FAKE_BUCKETS */
#endif /* VM_PAGE_BUCKETS_CHECK */
-
-
#if MACH_PAGE_HASH_STATS
/* This routine is only for debug. It is intended to be called by
* hand by a developer using a kernel debugger. This routine prints
LCK_GRP_DECLARE(vm_page_lck_grp_purge, "vm_page_purge");
LCK_GRP_DECLARE(vm_page_lck_grp_alloc, "vm_page_alloc");
LCK_GRP_DECLARE(vm_page_lck_grp_bucket, "vm_page_bucket");
-LCK_MTX_EARLY_DECLARE_ATTR(vm_page_alloc_lock, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
LCK_SPIN_DECLARE_ATTR(vm_objects_wired_lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
LCK_SPIN_DECLARE_ATTR(vm_allocation_sites_lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
return pmap_steal_memory_internal(size, TRUE);
}
+#if defined(__arm64__)
+/*
+ * Retire a page at startup.
+ * These pages will eventually wind up on the retired_pages_object
+ * in vm_retire_boot_pages().
+ */
+static vm_page_queue_head_t vm_page_queue_retired VM_PAGE_PACKED_ALIGNED;
+static void
+vm_page_retire_startup(vm_page_t p)
+{
+ p->vmp_q_state = VM_PAGE_NOT_ON_Q;
+ p->vmp_error = true;
+ p->vmp_unusual = true;
+ vm_page_queue_enter(&vm_page_queue_retired, p, vmp_pageq);
+ printf("To be retired at boot: page at 0x%llx\n", (long long)ptoa(VM_PAGE_GET_PHYS_PAGE(p)));
+}
+#endif /* defined(__arm64__) */
+
#if CONFIG_SECLUDED_MEMORY
/* boot-args to control secluded memory */
unsigned int secluded_mem_mb = 0; /* # of MBs of RAM to seclude */
* the memory needed to map what's being allocated, i.e. the page
* table entries. So the actual number of pages we get will be
* less than this. To do someday: include that in the computation.
+ *
+ * Also for ARM, we don't use the count of free_pages, but rather the
+ * range from last page to first page (ignore holes due to retired pages).
*/
+#if defined(__arm__) || defined(__arm64__)
+ mem_sz = pmap_free_pages_span() * (uint64_t)PAGE_SIZE;
+#else /* defined(__arm__) || defined(__arm64__) */
mem_sz = pmap_free_pages() * (uint64_t)PAGE_SIZE;
+#endif /* defined(__arm__) || defined(__arm64__) */
mem_sz += round_page(virtual_space_start) - virtual_space_start; /* Account for any slop */
npages = (uint_t)(mem_sz / (PAGE_SIZE + sizeof(*vm_pages))); /* scaled to include the vm_page_ts */
#endif
vm_delayed_count = 0;
+#if defined(__arm64__)
+ vm_page_queue_init(&vm_page_queue_retired);
+#endif /* defined(__arm64__) */
absolutetime_to_nanoseconds(mach_absolute_time(), &start_ns);
vm_pages_count = 0;
vm_first_phys_ppnum = phys_page;
patch_low_glo_vm_page_info((void *)vm_page_array_beginning_addr,
(void *)vm_page_array_ending_addr, vm_first_phys_ppnum);
+#if defined(__arm64__)
+ } else {
+ /*
+ * pmap_next_page() may skip over pages reported bad by iboot.
+ */
+ while (i < phys_page - vm_first_phys_ppnum && i < npages) {
+ ++vm_pages_count;
+ vm_page_init(&vm_pages[i], i + vm_first_phys_ppnum, FALSE);
+ vm_page_retire_startup(&vm_pages[i]);
+ ++i;
+ }
+ if (i >= npages) {
+ break;
+ }
+ assert(i == phys_page - vm_first_phys_ppnum);
+#endif /* defined(__arm64__) */
}
- assert((i + vm_first_phys_ppnum) == phys_page);
-#endif
+#endif /* defined(__arm__) || defined(__arm64__) */
#if defined(__x86_64__)
/* The x86 clump freeing code requires increasing ppn's to work correctly */
if (!vm_himemory_mode) {
do {
- vm_page_release_startup(&vm_pages[--i]);
+ if (!vm_pages[--i].vmp_error) { /* skip retired pages */
+ vm_page_release_startup(&vm_pages[i]);
+ }
} while (i != 0);
}
* Reflect size and usage information for vm_pages[].
*/
- z->countavail = (uint32_t)(vm_page_array_ending_addr - vm_pages);
- z->countfree = z->countavail - vm_pages_count;
+ z->z_elems_avail = (uint32_t)(vm_page_array_ending_addr - vm_pages);
+ z->z_elems_free = z->z_elems_avail - vm_pages_count;
zpercpu_get_cpu(z->z_stats, 0)->zs_mem_allocated =
vm_pages_count * sizeof(struct vm_page);
vm_page_array_zone_data_size = (uintptr_t)((void *)vm_page_array_ending_addr - (void *)vm_pages);
vm_page_zone_pages = atop(round_page((vm_offset_t)vm_page_array_zone_data_size));
- z->page_count += vm_page_zone_pages;
+ z->z_wired_cur += vm_page_zone_pages;
+ z->z_wired_hwm = z->z_wired_cur;
+ z->z_va_cur = z->z_wired_cur;
/* since zone accounts for these, take them out of stolen */
VM_PAGE_MOVE_STOLEN(vm_page_zone_pages);
});
~(VM_PAGE_PACKED_PTR_ALIGNMENT - 1);
vm_page_zone = zone_create_ext("vm pages", vm_page_with_ppnum_size,
- ZC_ALLOW_FOREIGN | ZC_NOGZALLOC | ZC_ALIGNMENT_REQUIRED |
- ZC_NOCALLOUT, ZONE_ID_ANY, ^(zone_t z) {
+ ZC_NOGZALLOC | ZC_ALIGNMENT_REQUIRED, ZONE_ID_ANY, ^(zone_t z) {
#if defined(__LP64__)
- zone_set_submap_idx(z, Z_SUBMAP_IDX_VA_RESTRICTED_MAP);
+ zone_set_submap_idx(z, Z_SUBMAP_IDX_VA_RESTRICTED);
#endif
- zone_set_exhaustible(z, 0);
+ /*
+ * The number "10" is a small number that is larger than the number
+ * of fictitious pages that any single caller will attempt to allocate
+ * without blocking.
+ *
+ * The largest such number at the moment is kernel_memory_allocate()
+ * when 2 guard pages are asked. 10 is simply a somewhat larger number,
+ * taking into account the 50% hysteresis the zone allocator uses.
+ *
+ * Note: this works at all because the zone allocator
+ * doesn't ever allocate fictitious pages.
+ */
+ z->z_elems_rsv = 10;
});
}
STARTUP(ZALLOC, STARTUP_RANK_SECOND, vm_page_module_init);
for (phys_page = start;
phys_page < end;
phys_page++) {
- while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
- == VM_PAGE_NULL) {
- vm_page_more_fictitious();
- }
-
+ m = vm_page_grab_fictitious_common(phys_page, TRUE);
m->vmp_fictitious = FALSE;
pmap_clear_noencrypt(phys_page);
}
}
+#if defined(__arm64__)
+/*
+ * Like vm_page_create(), except we want to immediately retire the page,
+ * not put it on the free list.
+ */
+void
+vm_page_create_retired(
+ ppnum_t phys_page)
+{
+ vm_page_t m;
+
+ m = vm_page_grab_fictitious_common(phys_page, TRUE);
+ m->vmp_fictitious = FALSE;
+ pmap_clear_noencrypt(phys_page);
+ m->vmp_error = true;
+ m->vmp_unusual = true;
+ vm_page_lock_queues();
+ m->vmp_q_state = VM_PAGE_IS_WIRED;
+ m->vmp_wire_count++;
+ vm_page_unlock_queues();
+
+ lck_mtx_lock(&vm_page_queue_free_lock);
+ vm_page_pages++;
+ lck_mtx_unlock(&vm_page_queue_free_lock);
+
+ vm_object_lock(retired_pages_object);
+ vm_page_insert_wired(m, retired_pages_object, ptoa(VM_PAGE_GET_PHYS_PAGE(m)), VM_KERN_MEMORY_RETIRED);
+ vm_object_unlock(retired_pages_object);
+ pmap_retire_page(VM_PAGE_GET_PHYS_PAGE(m));
+}
+#endif /* defined(__arm64__) */
+
/*
* vm_page_hash:
*
* Remove a fictitious page from the free list.
* Returns VM_PAGE_NULL if there are no free pages.
*/
-int c_vm_page_grab_fictitious = 0;
-int c_vm_page_grab_fictitious_failed = 0;
-int c_vm_page_release_fictitious = 0;
-int c_vm_page_more_fictitious = 0;
-vm_page_t
-vm_page_grab_fictitious_common(
- ppnum_t phys_addr)
+static vm_page_t
+vm_page_grab_fictitious_common(ppnum_t phys_addr, boolean_t canwait)
{
- vm_page_t m;
+ vm_page_t m;
- if ((m = (vm_page_t)zalloc_noblock(vm_page_zone))) {
+ m = zalloc_flags(vm_page_zone, canwait ? Z_WAITOK : Z_NOWAIT);
+ if (m) {
vm_page_init(m, phys_addr, FALSE);
m->vmp_fictitious = TRUE;
-
- c_vm_page_grab_fictitious++;
- } else {
- c_vm_page_grab_fictitious_failed++;
}
-
return m;
}
vm_page_t
-vm_page_grab_fictitious(void)
+vm_page_grab_fictitious(boolean_t canwait)
{
- return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
+ return vm_page_grab_fictitious_common(vm_page_fictitious_addr, canwait);
}
int vm_guard_count;
vm_page_t
-vm_page_grab_guard(void)
+vm_page_grab_guard(boolean_t canwait)
{
vm_page_t page;
- page = vm_page_grab_fictitious_common(vm_page_guard_addr);
+ page = vm_page_grab_fictitious_common(vm_page_guard_addr, canwait);
if (page) {
OSAddAtomic(1, &vm_guard_count);
}
OSAddAtomic(-1, &vm_guard_count);
}
- c_vm_page_release_fictitious++;
-
zfree(vm_page_zone, m);
}
-/*
- * vm_page_more_fictitious:
- *
- * Add more fictitious pages to the zone.
- * Allowed to block. This routine is way intimate
- * with the zones code, for several reasons:
- * 1. we need to carve some page structures out of physical
- * memory before zones work, so they _cannot_ come from
- * the zone restricted submap.
- * 2. the zone needs to be collectable in order to prevent
- * growth without bound. These structures are used by
- * the device pager (by the hundreds and thousands), as
- * private pages for pageout, and as blocking pages for
- * pagein. Temporary bursts in demand should not result in
- * permanent allocation of a resource.
- * 3. To smooth allocation humps, we allocate single pages
- * with kernel_memory_allocate(), and cram them into the
- * zone.
- */
-
-void
-vm_page_more_fictitious(void)
-{
- vm_offset_t addr;
- kern_return_t retval;
-
- c_vm_page_more_fictitious++;
-
- /*
- * Allocate a single page from the zone restricted submap. Do not wait
- * if no physical pages are immediately available, and do not zero the
- * space. We need our own blocking lock here to prevent having multiple,
- * simultaneous requests from piling up on the zone restricted submap
- * lock.
- * Exactly one (of our) threads should be potentially waiting on the map
- * lock. If winner is not vm-privileged, then the page allocation will
- * fail, and it will temporarily block here in the vm_page_wait().
- */
- lck_mtx_lock(&vm_page_alloc_lock);
- /*
- * If another thread allocated space, just bail out now.
- */
- if (os_atomic_load(&vm_page_zone->countfree, relaxed) > 5) {
- /*
- * The number "5" is a small number that is larger than the
- * number of fictitious pages that any single caller will
- * attempt to allocate. Otherwise, a thread will attempt to
- * acquire a fictitious page (vm_page_grab_fictitious), fail,
- * release all of the resources and locks already acquired,
- * and then call this routine. This routine finds the pages
- * that the caller released, so fails to allocate new space.
- * The process repeats infinitely. The largest known number
- * of fictitious pages required in this manner is 2. 5 is
- * simply a somewhat larger number.
- */
- lck_mtx_unlock(&vm_page_alloc_lock);
- return;
- }
-
- retval = kernel_memory_allocate(zone_submap(vm_page_zone),
- &addr, PAGE_SIZE, 0, KMA_ZERO | KMA_KOBJECT | KMA_NOPAGEWAIT,
- VM_KERN_MEMORY_ZONE);
-
- if (retval != KERN_SUCCESS) {
- /*
- * No page was available. Drop the
- * lock to give another thread a chance at it, and
- * wait for the pageout daemon to make progress.
- */
- lck_mtx_unlock(&vm_page_alloc_lock);
- vm_page_wait(THREAD_UNINT);
- return;
- }
-
- zcram(vm_page_zone, addr, PAGE_SIZE);
-
- lck_mtx_unlock(&vm_page_alloc_lock);
-}
-
-
/*
* vm_pool_low():
*
* can get memory without blocking. Advisory only, since the
* situation may change under us.
*/
-int
+bool
vm_pool_low(void)
{
/* No locking, at worst we will fib. */
VM_PAGE_ZERO_PAGEQ_ENTRY(mem);
- disable_preemption();
- *PERCPU_GET(vm_page_grab_count) += 1;
+ counter_inc(&vm_page_grab_count);
VM_DEBUG_EVENT(vm_page_grab, VM_PAGE_GRAB, DBG_FUNC_NONE, 0, 1, 0, 0);
- enable_preemption();
return mem;
}
-
/*
* vm_page_grab:
*
vm_page_grab_diags();
vm_offset_t pcpu_base = current_percpu_base();
- *PERCPU_GET_WITH_BASE(pcpu_base, vm_page_grab_count) += 1;
+ counter_inc_preemption_disabled(&vm_page_grab_count);
*PERCPU_GET_WITH_BASE(pcpu_base, free_pages) = mem->vmp_snext;
VM_DEBUG_EVENT(vm_page_grab, VM_PAGE_GRAB, DBG_FUNC_NONE, grab_options, 0, 0, 0);
if (mem) {
VM_CHECK_MEMORYSTATUS;
- disable_preemption();
vm_page_grab_diags();
- *PERCPU_GET(vm_page_grab_count) += 1;
+ counter_inc(&vm_page_grab_count);
VM_DEBUG_EVENT(vm_page_grab, VM_PAGE_GRAB, DBG_FUNC_NONE, grab_options, 0, 0, 0);
- enable_preemption();
return mem;
}
* satisfy this request
*/
vm_page_grab_diags();
- *PERCPU_GET_WITH_BASE(pcpu_base, vm_page_grab_count) += 1;
+ counter_inc_preemption_disabled(&vm_page_grab_count);
VM_DEBUG_EVENT(vm_page_grab, VM_PAGE_GRAB, DBG_FUNC_NONE, grab_options, 0, 0, 0);
mem = head;
assert(mem->vmp_q_state == VM_PAGE_ON_FREE_LOCAL_Q);
* context switch. Could be a perf. issue.
*/
- counter(c_vm_page_wait_block++);
-
if (need_wakeup) {
thread_wakeup((event_t)&vm_page_free_wanted);
}
wait_result = assert_wait(wait_event, interruptible);
lck_mtx_unlock(&vm_page_queue_free_lock);
- counter(c_vm_page_wait_block++);
if (need_wakeup) {
thread_wakeup((event_t)&vm_page_free_wanted);
return mem;
}
-/*
- * vm_page_alloc_guard:
- *
- * Allocate a fictitious page which will be used
- * as a guard page. The page will be inserted into
- * the object and returned to the caller.
- */
-
-vm_page_t
-vm_page_alloc_guard(
- vm_object_t object,
- vm_object_offset_t offset)
-{
- vm_page_t mem;
-
- vm_object_lock_assert_exclusive(object);
- mem = vm_page_grab_guard();
- if (mem == VM_PAGE_NULL) {
- return VM_PAGE_NULL;
- }
-
- vm_page_insert(mem, object, offset);
-
- return mem;
-}
-
-
-counter(unsigned int c_laundry_pages_freed = 0; )
-
/*
* vm_page_free_prepare:
*
* from its pageout queue and adjust the laundry accounting
*/
vm_pageout_steal_laundry(mem, TRUE);
- counter(++c_laundry_pages_freed);
}
vm_page_queues_remove(mem, TRUE);
unsigned int idx_last_contig_page_found = 0;
int free_considered = 0, free_available = 0;
int substitute_needed = 0;
- boolean_t wrapped, zone_gc_called = FALSE;
+ int zone_gc_called = 0;
+ boolean_t wrapped;
kern_return_t kr;
#if DEBUG
clock_sec_t tv_start_sec = 0, tv_end_sec = 0;
#if MACH_ASSERT
vm_page_verify_free_lists();
#endif
- if (m == NULL && zone_gc_called == FALSE) {
+ if (m == NULL && zone_gc_called < 2) {
printf("%s(num=%d,low=%d): found %d pages at 0x%llx...scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages... wired count is %d\n",
__func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
scanned, yielded, dumped_run, stolen_pages, compressed_pages, vm_page_wire_count);
(void)(*consider_buffer_cache_collect)(1);
}
- consider_zone_gc(FALSE);
+ zone_gc(zone_gc_called ? ZONE_GC_DRAIN : ZONE_GC_TRIM);
- zone_gc_called = TRUE;
+ zone_gc_called++;
printf("vm_page_find_contiguous: zone_gc called... wired count is %d\n", vm_page_wire_count);
goto full_scan_again;
kern_return_t
vm_page_alloc_list(
- int page_count,
- int flags,
- vm_page_t *list)
+ int page_count,
+ kma_flags_t flags,
+ vm_page_t *list)
{
- vm_page_t lo_page_list = VM_PAGE_NULL;
+ vm_page_t page_list = VM_PAGE_NULL;
vm_page_t mem;
- int i;
+ kern_return_t kr = KERN_SUCCESS;
+ int page_grab_count = 0;
+ mach_vm_size_t map_size = ptoa_64(page_count);
+#if DEVELOPMENT || DEBUG
+ task_t task = current_task();
+#endif /* DEVELOPMENT || DEBUG */
- if (!(flags & KMA_LOMEM)) {
- panic("vm_page_alloc_list: called w/o KMA_LOMEM");
- }
+ for (int i = 0; i < page_count; i++) {
+ for (;;) {
+ if (flags & KMA_LOMEM) {
+ mem = vm_page_grablo();
+ } else {
+ mem = vm_page_grab();
+ }
- for (i = 0; i < page_count; i++) {
- mem = vm_page_grablo();
+ if (mem != VM_PAGE_NULL) {
+ break;
+ }
- if (mem == VM_PAGE_NULL) {
- if (lo_page_list) {
- vm_page_free_list(lo_page_list, FALSE);
+ if (flags & KMA_NOPAGEWAIT) {
+ kr = KERN_RESOURCE_SHORTAGE;
+ goto out;
+ }
+ if ((flags & KMA_LOMEM) && (vm_lopage_needed == TRUE)) {
+ kr = KERN_RESOURCE_SHORTAGE;
+ goto out;
}
- *list = VM_PAGE_NULL;
+ /* VM privileged threads should have waited in vm_page_grab() and not get here. */
+ assert(!(current_thread()->options & TH_OPT_VMPRIV));
- return KERN_RESOURCE_SHORTAGE;
+ uint64_t unavailable = (vm_page_wire_count + vm_page_free_target) * PAGE_SIZE;
+ if (unavailable > max_mem || map_size > (max_mem - unavailable)) {
+ kr = KERN_RESOURCE_SHORTAGE;
+ goto out;
+ }
+ VM_PAGE_WAIT();
}
- mem->vmp_snext = lo_page_list;
- lo_page_list = mem;
+
+ page_grab_count++;
+ mem->vmp_snext = page_list;
+ page_list = mem;
}
- *list = lo_page_list;
- return KERN_SUCCESS;
+ if (KMA_ZERO & flags) {
+ for (mem = page_list; mem; mem = mem->vmp_snext) {
+ vm_page_zero_fill(mem);
+ }
+ }
+
+out:
+#if DEBUG || DEVELOPMENT
+ if (task != NULL) {
+ ledger_credit(task->ledger, task_ledgers.pages_grabbed_kern, page_grab_count);
+ }
+#endif
+
+ if (kr == KERN_SUCCESS) {
+ *list = page_list;
+ } else {
+ vm_page_free_list(page_list, FALSE);
+ }
+
+ return kr;
}
void
orig_wire_count = vm_page_wire_count;
(void)(*consider_buffer_cache_collect)(1);
- consider_zone_gc(FALSE);
+ zone_gc(ZONE_GC_DRAIN);
HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count - vm_page_wire_count);
vm_allocation_zone_totals[VM_KERN_MEMORY_KALLOC] = (vm_allocation_zone_total_t *) addr;
}
-void
-vm_tag_will_update_zone(vm_tag_t tag, uint32_t zidx)
+__attribute__((noinline))
+static vm_tag_t
+vm_tag_zone_stats_alloc(vm_tag_t tag, zalloc_flags_t flags)
{
- vm_allocation_zone_total_t * zone;
+ vm_allocation_zone_total_t *stats;
+ vm_size_t size = sizeof(*stats) * VM_MAX_TAG_ZONES;
+ stats = kheap_alloc(KHEAP_DATA_BUFFERS, size,
+ Z_VM_TAG(VM_KERN_MEMORY_DIAG) | Z_ZERO | flags);
+ if (!stats) {
+ return VM_KERN_MEMORY_NONE;
+ }
+ if (!os_atomic_cmpxchg(&vm_allocation_zone_totals[tag], NULL, stats, release)) {
+ kheap_free(KHEAP_DATA_BUFFERS, stats, size);
+ }
+ return tag;
+}
+
+vm_tag_t
+vm_tag_will_update_zone(vm_tag_t tag, uint32_t zidx, uint32_t zflags)
+{
assert(VM_KERN_MEMORY_NONE != tag);
assert(tag < VM_MAX_TAG_VALUE);
if (zidx >= VM_MAX_TAG_ZONES) {
- return;
+ return VM_KERN_MEMORY_NONE;
}
- zone = vm_allocation_zone_totals[tag];
- if (!zone) {
- zone = kalloc_tag(VM_MAX_TAG_ZONES * sizeof(*zone), VM_KERN_MEMORY_DIAG);
- if (!zone) {
- return;
- }
- bzero(zone, VM_MAX_TAG_ZONES * sizeof(*zone));
- if (!OSCompareAndSwapPtr(NULL, zone, &vm_allocation_zone_totals[tag])) {
- kfree(zone, VM_MAX_TAG_ZONES * sizeof(*zone));
- }
+ if (__probable(vm_allocation_zone_totals[tag])) {
+ return tag;
}
+ return vm_tag_zone_stats_alloc(tag, zflags);
}
void
-vm_tag_update_zone_size(vm_tag_t tag, uint32_t zidx, int64_t delta, int64_t dwaste)
+vm_tag_update_zone_size(vm_tag_t tag, uint32_t zidx, long delta)
{
- vm_allocation_zone_total_t * zone;
- uint32_t new;
+ vm_allocation_zone_total_t *stats;
+ vm_size_t value;
assert(VM_KERN_MEMORY_NONE != tag);
assert(tag < VM_MAX_TAG_VALUE);
return;
}
- zone = vm_allocation_zone_totals[tag];
- assert(zone);
- zone += zidx;
+ stats = vm_allocation_zone_totals[tag];
+ assert(stats);
+ stats += zidx;
- /* the zone is locked */
+ value = os_atomic_add(&stats->vazt_total, delta, relaxed);
if (delta < 0) {
- assertf(zone->total >= ((uint64_t)-delta), "zidx %d, tag %d, %p", zidx, tag, zone);
- zone->total += delta;
- } else {
- zone->total += delta;
- if (zone->total > zone->peak) {
- zone->peak = zone->total;
- }
- if (dwaste) {
- new = zone->waste;
- if (zone->wastediv < 65536) {
- zone->wastediv++;
- } else {
- new -= (new >> 16);
- }
- __assert_only bool ov = os_add_overflow(new, dwaste, &new);
- assert(!ov);
- zone->waste = new;
- }
+ assertf((long)value >= 0, "zidx %d, tag %d, %p", zidx, tag, stats);
+ return;
+ } else if (os_atomic_load(&stats->vazt_peak, relaxed) < value) {
+ os_atomic_max(&stats->vazt_peak, value, relaxed);
}
}
&& (zone = vm_allocation_zone_totals[idx])
&& (nextinfo < num_info)) {
for (zidx = 0; zidx < VM_MAX_TAG_ZONES; zidx++) {
- if (!zone[zidx].peak) {
+ if (!zone[zidx].vazt_peak) {
continue;
}
info[nextinfo] = info[idx];
info[nextinfo].zone = (uint16_t)zone_index_from_tag_index(zidx, &elem_size);
info[nextinfo].flags &= ~VM_KERN_SITE_WIRED;
info[nextinfo].flags |= VM_KERN_SITE_ZONE;
- info[nextinfo].size = zone[zidx].total;
- info[nextinfo].peak = zone[zidx].peak;
+ info[nextinfo].size = zone[zidx].vazt_total;
+ info[nextinfo].peak = zone[zidx].vazt_peak;
info[nextinfo].mapped = 0;
- if (zone[zidx].wastediv) {
- info[nextinfo].collectable_bytes = ((zone[zidx].waste * zone[zidx].total / elem_size) / zone[zidx].wastediv);
- }
nextinfo++;
}
}
continue;
}
for (uint32_t zidx = 0; zidx < VM_MAX_TAG_ZONES; zidx++) {
- if (zone[zidx].peak) {
- count++;
- }
+ count += (zone[zidx].vazt_peak != 0);
}
}
#endif
static void
vm_page_diagnose_zone(mach_memory_info_t *info, zone_t z)
{
- vm_page_diagnose_zone_stats(info, z->z_stats, z->percpu);
+ vm_page_diagnose_zone_stats(info, z->z_stats, z->z_percpu);
snprintf(info->name, sizeof(info->name),
"%s%s[raw]", zone_heap_name(z), z->z_name);
}
return KERN_ABORTED;
}
-#if CONFIG_EMBEDDED
+#if !XNU_TARGET_OS_OSX
wired_size = ptoa_64(vm_page_wire_count);
wired_reserved_size = ptoa_64(vm_page_wire_count_initial - vm_page_stolen_count);
-#else
+#else /* !XNU_TARGET_OS_OSX */
wired_size = ptoa_64(vm_page_wire_count + vm_lopage_free_count + vm_page_throttled_count);
wired_reserved_size = ptoa_64(vm_page_wire_count_initial - vm_page_stolen_count + vm_page_throttled_count);
-#endif
+#endif /* !XNU_TARGET_OS_OSX */
wired_managed_size = ptoa_64(vm_page_wire_count - vm_page_wire_count_initial);
wired_size += booter_size;
for (; zv; zv = zv->zv_next) {
vm_page_diagnose_zone_stats(counts + i, zv->zv_stats,
- z->percpu);
+ z->z_percpu);
snprintf(counts[i].name, sizeof(counts[i].name), "%s%s[%s]",
zone_heap_name(z), z->z_name, zv->zv_name);
i++;
}
#endif /* CONFIG_SECLUDED_MEMORY */
+
+/*
+ * Move the list of retired pages on the vm_page_queue_retired to
+ * their final resting place on retired_pages_object.
+ */
+void
+vm_retire_boot_pages(void)
+{
+#if defined(__arm64__)
+ vm_page_t p;
+
+ vm_object_lock(retired_pages_object);
+ while (!vm_page_queue_empty(&vm_page_queue_retired)) {
+ vm_page_queue_remove_first(&vm_page_queue_retired, p, vmp_pageq);
+ assert(p != NULL);
+ vm_page_lock_queues();
+ p->vmp_q_state = VM_PAGE_IS_WIRED;
+ p->vmp_wire_count++;
+ vm_page_unlock_queues();
+ vm_page_insert_wired(p, retired_pages_object, ptoa(VM_PAGE_GET_PHYS_PAGE(p)), VM_KERN_MEMORY_RETIRED);
+ vm_object_unlock(retired_pages_object);
+ pmap_retire_page(VM_PAGE_GET_PHYS_PAGE(p));
+ vm_object_lock(retired_pages_object);
+ }
+ vm_object_unlock(retired_pages_object);
+#endif /* defined(__arm64__) */
+}
+
+/*
+ * Returns the current number of retired pages, used for sysctl.
+ */
+uint32_t
+vm_retired_pages_count(void)
+{
+ return retired_pages_object->resident_page_count;
+}
/* delay in seconds before reclaiming an unused shared region */
TUNABLE_WRITEABLE(int, shared_region_destroy_delay, "vm_shared_region_destroy_delay", 120);
-struct vm_shared_region *init_task_shared_region = NULL;
+/*
+ * Cached pointer to the most recently mapped shared region from PID 1, which should
+ * be the most commonly mapped shared region in the system. There are many processes
+ * which do not use this, for a variety of reasons.
+ *
+ * The main consumer of this is stackshot.
+ */
+struct vm_shared_region *primary_system_shared_region = NULL;
-#ifndef CONFIG_EMBEDDED
+#if XNU_TARGET_OS_OSX
/*
* Only one cache gets to slide on Desktop, since we can't
* tear down slide info properly today and the desktop actually
* produces lots of shared caches.
*/
boolean_t shared_region_completed_slide = FALSE;
-#endif
+#endif /* XNU_TARGET_OS_OSX */
/* this lock protects all the shared region data structures */
static LCK_GRP_DECLARE(vm_shared_region_lck_grp, "vm shared region");
vm_prot_t prot); /* forward */
static int __commpage_setup = 0;
-#if !CONFIG_EMBEDDED
+#if XNU_TARGET_OS_OSX
static int __system_power_source = 1; /* init to extrnal power source */
static void post_sys_powersource_internal(int i, int internal);
-#endif
+#endif /* XNU_TARGET_OS_OSX */
extern u_int32_t random(void);
return shared_region;
}
-/*
- * Get the base address of the shared region.
- * That's the address at which it needs to be mapped in the process's address
- * space.
- * No need to lock since this data is set when the shared region is
- * created and is never modified after that. The caller must hold an extra
- * reference on the shared region to prevent it from being destroyed.
- */
-mach_vm_offset_t
-vm_shared_region_base_address(
- vm_shared_region_t shared_region)
-{
- SHARED_REGION_TRACE_DEBUG(
- ("shared_region: -> base_address(%p)\n",
- (void *)VM_KERNEL_ADDRPERM(shared_region)));
- assert(shared_region->sr_ref_count > 1);
- SHARED_REGION_TRACE_DEBUG(
- ("shared_region: base_address(%p) <- 0x%llx\n",
- (void *)VM_KERNEL_ADDRPERM(shared_region),
- (long long)shared_region->sr_base_address));
- return shared_region->sr_base_address;
-}
-
-/*
- * Get the size of the shared region.
- * That's the size that needs to be mapped in the process's address
- * space.
- * No need to lock since this data is set when the shared region is
- * created and is never modified after that. The caller must hold an extra
- * reference on the shared region to prevent it from being destroyed.
- */
-mach_vm_size_t
-vm_shared_region_size(
- vm_shared_region_t shared_region)
-{
- SHARED_REGION_TRACE_DEBUG(
- ("shared_region: -> size(%p)\n",
- (void *)VM_KERNEL_ADDRPERM(shared_region)));
- assert(shared_region->sr_ref_count > 1);
- SHARED_REGION_TRACE_DEBUG(
- ("shared_region: size(%p) <- 0x%llx\n",
- (void *)VM_KERNEL_ADDRPERM(shared_region),
- (long long)shared_region->sr_size));
- return shared_region->sr_size;
-}
-
-/*
- * Get the memory entry of the shared region.
- * That's the "memory object" that needs to be mapped in the process's address
- * space.
- * No need to lock since this data is set when the shared region is
- * created and is never modified after that. The caller must hold an extra
- * reference on the shared region to prevent it from being destroyed.
- */
-ipc_port_t
-vm_shared_region_mem_entry(
- vm_shared_region_t shared_region)
-{
- SHARED_REGION_TRACE_DEBUG(
- ("shared_region: -> mem_entry(%p)\n",
- (void *)VM_KERNEL_ADDRPERM(shared_region)));
- assert(shared_region->sr_ref_count > 1);
- SHARED_REGION_TRACE_DEBUG(
- ("shared_region: mem_entry(%p) <- %p\n",
- (void *)VM_KERNEL_ADDRPERM(shared_region),
- (void *)VM_KERNEL_ADDRPERM(shared_region->sr_mem_entry)));
- return shared_region->sr_mem_entry;
-}
-
vm_map_t
vm_shared_region_vm_map(
vm_shared_region_t shared_region)
SHARED_REGION_TRACE_DEBUG(
("shared_region: -> vm_map(%p)\n",
(void *)VM_KERNEL_ADDRPERM(shared_region)));
- assert(shared_region->sr_ref_count > 1);
+ assert(shared_region->sr_ref_count > 0);
sr_handle = shared_region->sr_mem_entry;
sr_mem_entry = (vm_named_entry_t) ip_get_kobject(sr_handle);
} else {
/* timer expired: let go of this shared region */
+ /* Make sure there's no cached pointer to the region. */
+ if (primary_system_shared_region == shared_region) {
+ primary_system_shared_region = NULL;
+ }
+
/*
* Remove it from the queue first, so no one can find
* it...
/*
* Gets the address of the first (in time) mapping in the shared region.
+ * If used during initial task setup by dyld, task should non-NULL.
*/
kern_return_t
vm_shared_region_start_address(
vm_shared_region_t shared_region,
- mach_vm_offset_t *start_address)
+ mach_vm_offset_t *start_address,
+ task_t task)
{
kern_return_t kr;
mach_vm_offset_t sr_base_address;
SHARED_REGION_TRACE_DEBUG(
("shared_region: -> start_address(%p)\n",
(void *)VM_KERNEL_ADDRPERM(shared_region)));
- assert(shared_region->sr_ref_count > 1);
vm_shared_region_lock();
*/
while (shared_region->sr_mapping_in_progress) {
/* wait for our turn... */
- assert(shared_region->sr_ref_count > 1);
vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
THREAD_UNINT);
}
assert(!shared_region->sr_mapping_in_progress);
- assert(shared_region->sr_ref_count > 1);
+ assert(shared_region->sr_ref_count > 0);
sr_base_address = shared_region->sr_base_address;
sr_first_mapping = shared_region->sr_first_mapping;
}
+ uint32_t slide = shared_region->sr_slide;
+
vm_shared_region_unlock();
+ /*
+ * Cache shared region info in the task for telemetry gathering, if we're
+ * passed in the task. No task lock here as we're still in intial task set up.
+ */
+ if (kr == KERN_SUCCESS && task != NULL && task->task_shared_region_slide == -1) {
+ uint_t sc_header_uuid_offset = offsetof(struct _dyld_cache_header, uuid);
+ if (copyin((user_addr_t)(*start_address + sc_header_uuid_offset),
+ (char *)&task->task_shared_region_uuid,
+ sizeof(task->task_shared_region_uuid)) == 0) {
+ task->task_shared_region_slide = slide;
+ }
+ }
+
SHARED_REGION_TRACE_DEBUG(
("shared_region: start_address(%p) <- 0x%llx\n",
(void *)VM_KERNEL_ADDRPERM(shared_region),
vm_shared_region_sleep(&sr->sr_mapping_in_progress, THREAD_UNINT);
}
assert(!sr->sr_mapping_in_progress);
- assert(sr->sr_ref_count > 1);
+ assert(sr->sr_ref_count > 0);
/* Just return if already done. */
if (task->shared_region_auth_remapped) {
/*
* Check that the object exactly covers the region to slide.
*/
- if (VME_OFFSET(tmp_entry) != si->si_start ||
- tmp_entry->vme_end - tmp_entry->vme_start != si->si_end - si->si_start) {
+ if (tmp_entry->vme_end - tmp_entry->vme_start != si->si_end - si->si_start) {
kr = KERN_FAILURE;
goto done;
}
vm_named_entry_t sr_mem_entry;
vm_shared_region_lock();
- assert(shared_region->sr_ref_count > 1);
+ assert(shared_region->sr_ref_count > 0);
while (shared_region->sr_mapping_in_progress) {
/* wait for our turn... */
THREAD_UNINT);
}
assert(!shared_region->sr_mapping_in_progress);
- assert(shared_region->sr_ref_count > 1);
+ assert(shared_region->sr_ref_count > 0);
/* let others know we're working in this shared region */
shared_region->sr_mapping_in_progress = TRUE;
if (reset_shared_region_state) {
vm_shared_region_lock();
- assert(shared_region->sr_ref_count > 1);
+ assert(shared_region->sr_ref_count > 0);
assert(shared_region->sr_mapping_in_progress);
/* we're done working on that shared region */
shared_region->sr_mapping_in_progress = FALSE;
}
/*
- * For now we only expect to see at most 2 regions to relocate/authenticate
- * per file. One that's VM_PROT_SLIDE and one VM_PROT_SLIDE | VM_PROT_NOAUTH.
+ * For now we only expect to see at most 4 regions to relocate/authenticate
+ * per file. One that's RW VM_PROT_SLIDE and one VM_PROT_SLIDE | VM_PROT_NOAUTH.
+ * And then RO VM_PROT_SLIDE and one VM_PROT_SLIDE | VM_PROT_NOAUTH.
*/
-#define VMSR_NUM_SLIDES 2
+#define VMSR_NUM_SLIDES 4
/*
* First part of vm_shared_region_map_file(). Split out to
unsigned int current_file_index = 0;
vm_shared_region_lock();
- assert(shared_region->sr_ref_count > 1);
+ assert(shared_region->sr_ref_count > 0);
/*
* Make sure we handle only one mapping at a time in a given
THREAD_UNINT);
}
assert(!shared_region->sr_mapping_in_progress);
- assert(shared_region->sr_ref_count > 1);
+ assert(shared_region->sr_ref_count > 0);
/* let others know we're working in this shared region */
shared_region->sr_mapping_in_progress = TRUE;
mach_vm_offset_t sfm_max_address = 0;
vm_map_t sr_map = NULL;
vm_map_offset_t lowest_unnestable_addr = 0;
- mach_vm_offset_t file_first_mappings[VMSR_NUM_SLIDES] = {(mach_vm_offset_t) -1, (mach_vm_offset_t) -1};
+ mach_vm_offset_t file_first_mappings[VMSR_NUM_SLIDES];
+ for (i = 0; i < VMSR_NUM_SLIDES; ++i) {
+ file_first_mappings[i] = (mach_vm_offset_t) -1;
+ }
kr = vm_shared_region_map_file_setup(shared_region, sr_file_mappings_count, sr_file_mappings,
&mappings_to_slide_cnt, &mappings_to_slide[0], slid_mappings, slid_file_controls,
}
vm_shared_region_lock();
- assert(shared_region->sr_ref_count > 1);
+ assert(shared_region->sr_ref_count > 0);
assert(shared_region->sr_mapping_in_progress);
/* set "sr_first_mapping"; dyld uses it to validate the shared cache */
}
#endif /* __has_feature(ptrauth_calls) */
+ /* Cache shared region info needed for telemetry in the task */
+ task_t task;
+ if (kr == KERN_SUCCESS && (task = current_task())->task_shared_region_slide == -1) {
+ mach_vm_offset_t start_address;
+ (void)vm_shared_region_start_address(shared_region, &start_address, task);
+ }
+
SHARED_REGION_TRACE_DEBUG(
("shared_region: map(%p) <- 0x%x \n",
(void *)VM_KERNEL_ADDRPERM(shared_region), kr));
int error;
size_t image_array_length;
struct _dyld_cache_image_text_info *sr_image_layout;
+ boolean_t locally_built = FALSE;
/*
if (error == 0) {
memcpy(&shared_region->sr_uuid, &sr_cache_header.uuid, sizeof(shared_region->sr_uuid));
shared_region->sr_uuid_copied = TRUE;
+ locally_built = sr_cache_header.locallyBuiltCache;
} else {
#if DEVELOPMENT || DEBUG
panic("shared_region: copyin shared_cache_header(sr_base_addr:0x%016llx sr_first_mapping:0x%016llx "
}
/*
- * If the shared cache is associated with the init task (and is therefore the system shared cache),
- * check whether it is a custom built shared cache and copy in the shared cache layout accordingly.
+ * We save a pointer to the shared cache mapped by the "init task", i.e. launchd. This is used by
+ * the stackshot code to reduce output size in the common case that everything maps the same shared cache.
+ * One gotcha is that "userspace reboots" can occur which can cause a new shared region to be the primary
+ * region. In that case, launchd re-exec's itself, so we may go through this path multiple times. We
+ * let the most recent one win.
+ *
+ * Check whether the shared cache is a custom built one and copy in the shared cache layout accordingly.
*/
- boolean_t is_init_task = (task_pid(current_task()) == 1);
+ bool is_init_task = (task_pid(current_task()) == 1);
if (shared_region->sr_uuid_copied && is_init_task) {
/* Copy in the shared cache layout if we're running with a locally built shared cache */
- if (sr_cache_header.locallyBuiltCache) {
+ if (locally_built) {
KDBG((MACHDBG_CODE(DBG_MACH_SHAREDREGION, PROCESS_SHARED_CACHE_LAYOUT)) | DBG_FUNC_START);
image_array_length = (size_t)(sr_cache_header.imagesTextCount * sizeof(struct _dyld_cache_image_text_info));
sr_image_layout = kheap_alloc(KHEAP_DATA_BUFFERS, image_array_length, Z_WAITOK);
error = copyin((user_addr_t)(shared_region->sr_base_address + shared_region->sr_first_mapping +
sr_cache_header.imagesTextOffset), (char *)sr_image_layout, image_array_length);
if (error == 0) {
+ if (sr_cache_header.imagesTextCount >= UINT32_MAX) {
+ panic("shared_region: sr_cache_header.imagesTextCount >= UINT32_MAX");
+ }
shared_region->sr_images = kalloc((vm_size_t)(sr_cache_header.imagesTextCount * sizeof(struct dyld_uuid_info_64)));
for (size_t index = 0; index < sr_cache_header.imagesTextCount; index++) {
memcpy((char *)&shared_region->sr_images[index].imageUUID, (char *)&sr_image_layout[index].uuid,
shared_region->sr_images[index].imageLoadAddress = sr_image_layout[index].loadAddress;
}
- assert(sr_cache_header.imagesTextCount < UINT32_MAX);
shared_region->sr_images_count = (uint32_t) sr_cache_header.imagesTextCount;
} else {
#if DEVELOPMENT || DEBUG
kheap_free(KHEAP_DATA_BUFFERS, sr_image_layout, image_array_length);
sr_image_layout = NULL;
}
- init_task_shared_region = shared_region;
+ primary_system_shared_region = shared_region;
}
/*
/* Comm page support */
/******************************************************************************/
-ipc_port_t commpage32_handle = IPC_PORT_NULL;
-ipc_port_t commpage64_handle = IPC_PORT_NULL;
-vm_named_entry_t commpage32_entry = NULL;
-vm_named_entry_t commpage64_entry = NULL;
-vm_map_t commpage32_map = VM_MAP_NULL;
-vm_map_t commpage64_map = VM_MAP_NULL;
+SECURITY_READ_ONLY_LATE(ipc_port_t) commpage32_handle = IPC_PORT_NULL;
+SECURITY_READ_ONLY_LATE(ipc_port_t) commpage64_handle = IPC_PORT_NULL;
+SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage32_entry = NULL;
+SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage64_entry = NULL;
+SECURITY_READ_ONLY_LATE(vm_map_t) commpage32_map = VM_MAP_NULL;
+SECURITY_READ_ONLY_LATE(vm_map_t) commpage64_map = VM_MAP_NULL;
-ipc_port_t commpage_text32_handle = IPC_PORT_NULL;
-ipc_port_t commpage_text64_handle = IPC_PORT_NULL;
-vm_named_entry_t commpage_text32_entry = NULL;
-vm_named_entry_t commpage_text64_entry = NULL;
-vm_map_t commpage_text32_map = VM_MAP_NULL;
-vm_map_t commpage_text64_map = VM_MAP_NULL;
+SECURITY_READ_ONLY_LATE(ipc_port_t) commpage_text32_handle = IPC_PORT_NULL;
+SECURITY_READ_ONLY_LATE(ipc_port_t) commpage_text64_handle = IPC_PORT_NULL;
+SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage_text32_entry = NULL;
+SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage_text64_entry = NULL;
+SECURITY_READ_ONLY_LATE(vm_map_t) commpage_text32_map = VM_MAP_NULL;
+SECURITY_READ_ONLY_LATE(vm_map_t) commpage_text64_map = VM_MAP_NULL;
-user32_addr_t commpage_text32_location = 0;
-user64_addr_t commpage_text64_location = 0;
+SECURITY_READ_ONLY_LATE(user32_addr_t) commpage_text32_location = 0;
+SECURITY_READ_ONLY_LATE(user64_addr_t) commpage_text64_location = 0;
#if defined(__i386__) || defined(__x86_64__)
/*
/* populate them according to this specific platform */
commpage_populate();
__commpage_setup = 1;
-#if !CONFIG_EMBEDDED
+#if XNU_TARGET_OS_OSX
if (__system_power_source == 0) {
post_sys_powersource_internal(0, 1);
}
-#endif
+#endif /* XNU_TARGET_OS_OSX */
SHARED_REGION_TRACE_DEBUG(
("commpage: init() <-\n"));
sr->sr_slide_in_progress = FALSE;
thread_wakeup(&sr->sr_slide_in_progress);
-#ifndef CONFIG_EMBEDDED
+#if XNU_TARGET_OS_OSX
if (error == KERN_SUCCESS) {
shared_region_completed_slide = TRUE;
}
-#endif
+#endif /* XNU_TARGET_OS_OSX */
vm_shared_region_unlock();
vm_shared_region_deallocate(sr);
* 1 if it is internal power source ie battery
*/
void
-#if !CONFIG_EMBEDDED
+#if XNU_TARGET_OS_OSX
post_sys_powersource(int i)
-#else
+#else /* XNU_TARGET_OS_OSX */
post_sys_powersource(__unused int i)
-#endif
+#endif /* XNU_TARGET_OS_OSX */
{
-#if !CONFIG_EMBEDDED
+#if XNU_TARGET_OS_OSX
post_sys_powersource_internal(i, 0);
-#endif
+#endif /* XNU_TARGET_OS_OSX */
}
-#if !CONFIG_EMBEDDED
+#if XNU_TARGET_OS_OSX
static void
post_sys_powersource_internal(int i, int internal)
{
__system_power_source = i;
}
}
-#endif
+#endif /* XNU_TARGET_OS_OSX */
void *
vm_shared_region_root_dir(
extern int shared_region_trace_level;
-extern struct vm_shared_region *init_task_shared_region;
+extern struct vm_shared_region *primary_system_shared_region;
#define SHARED_REGION_TRACE_NONE_LVL 0 /* no trace */
#define SHARED_REGION_TRACE_ERROR_LVL 1 /* trace abnormal events */
struct task *task);
extern void vm_shared_region_deallocate(
struct vm_shared_region *shared_region);
-extern mach_vm_offset_t vm_shared_region_base_address(
- struct vm_shared_region *shared_region);
-extern mach_vm_size_t vm_shared_region_size(
- struct vm_shared_region *shared_region);
-extern ipc_port_t vm_shared_region_mem_entry(
- struct vm_shared_region *shared_region);
extern vm_map_t vm_shared_region_vm_map(
struct vm_shared_region *shared_region);
extern void vm_shared_region_set(
boolean_t reslide);
extern kern_return_t vm_shared_region_start_address(
struct vm_shared_region *shared_region,
- mach_vm_offset_t *start_address);
+ mach_vm_offset_t *start_address,
+ task_t task);
extern void vm_shared_region_undo_mappings(
vm_map_t sr_map,
mach_vm_offset_t sr_base_address,
* the "shared_region" EMM.
*/
typedef struct shared_region_pager {
- struct memory_object srp_header; /* mandatory generic header */
+ struct memory_object srp_header; /* mandatory generic header */
/* pager-specific data */
queue_chain_t srp_queue; /* next & prev pagers */
- uint32_t srp_ref_count; /* active uses */
+#if MEMORY_OBJECT_HAS_REFCOUNT
+#define srp_ref_count srp_header.mo_ref
+#else
+ os_ref_atomic_t srp_ref_count; /* active uses */
+#endif
bool srp_is_mapped; /* has active mappings */
bool srp_is_ready; /* is this pager ready? */
vm_object_t srp_backing_object; /* VM object for shared cache */
pager = shared_region_pager_lookup(mem_obj);
assert(pager->srp_is_ready);
- assert(pager->srp_ref_count > 1); /* pager is alive */
+ assert(os_ref_get_count_raw(&pager->srp_ref_count) > 1); /* pager is alive */
assert(pager->srp_is_mapped); /* pager is mapped */
PAGER_DEBUG(PAGER_PAGEIN, ("shared_region_pager_data_request: %p, %llx, %x, %x, pager %p\n", mem_obj, offset, length, protection_required, pager));
retval = kr;
goto done;
}
- dst_object = mo_control->moc_object;
+ dst_object = memory_object_control_to_vm_object(mo_control);
assert(dst_object != VM_OBJECT_NULL);
/*
pager = shared_region_pager_lookup(mem_obj);
lck_mtx_lock(&shared_region_pager_lock);
- assert(pager->srp_ref_count > 0);
- pager->srp_ref_count++;
+ os_ref_retain_locked_raw(&pager->srp_ref_count, NULL);
lck_mtx_unlock(&shared_region_pager_lock);
}
{
assert(pager->srp_is_ready);
assert(!pager->srp_is_mapped);
- assert(pager->srp_ref_count == 1);
+ assert(os_ref_get_count_raw(&pager->srp_ref_count) == 1);
if (pager->srp_backing_object != VM_OBJECT_NULL) {
vm_object_deallocate(pager->srp_backing_object);
{
boolean_t needs_trimming;
int count_unmapped;
+ os_ref_count_t ref_count;
if (!locked) {
lck_mtx_lock(&shared_region_pager_lock);
needs_trimming = (count_unmapped > shared_region_pager_cache_limit);
/* drop a reference on this pager */
- assert(pager->srp_ref_count > 0);
- pager->srp_ref_count--;
+ ref_count = os_ref_release_locked_raw(&pager->srp_ref_count, NULL);
- if (pager->srp_ref_count == 1) {
+ if (ref_count == 1) {
/*
* Only the "named" reference is left, which means that
* no one is really holding on to this pager anymore.
/* the pager is all ours: no need for the lock now */
lck_mtx_unlock(&shared_region_pager_lock);
shared_region_pager_terminate_internal(pager);
- } else if (pager->srp_ref_count == 0) {
+ } else if (ref_count == 0) {
/*
* Dropped the existence reference; the memory object has
* been terminated. Do some final cleanup and release the
lck_mtx_lock(&shared_region_pager_lock);
assert(pager->srp_is_ready);
- assert(pager->srp_ref_count > 0); /* pager is alive */
+ assert(os_ref_get_count_raw(&pager->srp_ref_count) > 0); /* pager is alive */
if (!pager->srp_is_mapped) {
pager->srp_is_mapped = TRUE;
- pager->srp_ref_count++;
+ os_ref_retain_locked_raw(&pager->srp_ref_count, NULL);
shared_region_pager_count_mapped++;
}
lck_mtx_unlock(&shared_region_pager_lock);
assert(mem_obj->mo_pager_ops == &shared_region_pager_ops);
pager = (shared_region_pager_t)(uintptr_t) mem_obj;
- assert(pager->srp_ref_count > 0);
+ assert(os_ref_get_count_raw(&pager->srp_ref_count) > 0);
return pager;
}
pager->srp_header.mo_control = MEMORY_OBJECT_CONTROL_NULL;
pager->srp_is_ready = FALSE;/* not ready until it has a "name" */
- pager->srp_ref_count = 1; /* existence reference (for the cache) */
- pager->srp_ref_count++; /* for the caller */
+ /* existence reference (for the cache) + 1 for the caller */
+ os_ref_init_count_raw(&pager->srp_ref_count, NULL, 2);
pager->srp_is_mapped = FALSE;
pager->srp_backing_object = backing_object;
pager->srp_backing_offset = backing_offset;
if (memcmp(si->si_slide_info_entry, slide_info->si_slide_info_entry, si->si_slide_info_size) != 0) {
continue;
}
- ++pager->srp_ref_count; /* the caller expects a reference on this */
+ /* the caller expects a reference on this */
+ os_ref_retain_locked_raw(&pager->srp_ref_count, NULL);
lck_mtx_unlock(&shared_region_pager_lock);
return (memory_object_t)pager;
}
/* get prev elt before we dequeue */
prev_pager = (shared_region_pager_t)queue_prev(&pager->srp_queue);
- if (pager->srp_ref_count == 2 &&
+ if (os_ref_get_count_raw(&pager->srp_ref_count) == 2 &&
pager->srp_is_ready &&
!pager->srp_is_mapped) {
/* this pager can be trimmed */
srp_queue);
pager->srp_queue.next = NULL;
pager->srp_queue.prev = NULL;
- assert(pager->srp_ref_count == 2);
+ assert(os_ref_get_count_raw(&pager->srp_ref_count) == 2);
/*
* We can't call deallocate_internal() because the pager
* has already been dequeued, but we still need to remove
* a reference.
*/
- pager->srp_ref_count--;
+ (void)os_ref_release_locked_raw(&pager->srp_ref_count, NULL);
shared_region_pager_terminate_internal(pager);
}
}
*/
typedef struct swapfile_pager {
/* mandatory generic header */
- struct memory_object swp_pgr_hdr;
+ struct memory_object swp_pgr_hdr;
/* pager-specific data */
queue_chain_t pager_queue; /* next & prev pagers */
- unsigned int ref_count; /* reference count */
- boolean_t is_ready; /* is this pager ready ? */
- boolean_t is_mapped; /* is this pager mapped ? */
+#if MEMORY_OBJECT_HAS_REFCOUNT
+#define swp_pgr_hdr_ref swp_pgr_hdr.mo_ref
+#else
+ os_ref_atomic_t swp_pgr_hdr_ref; /* reference count */
+#endif
+ bool is_ready; /* is this pager ready ? */
+ bool is_mapped; /* is this pager mapped ? */
struct vnode *swapfile_vnode;/* the swapfile's vnode */
} *swapfile_pager_t;
#define SWAPFILE_PAGER_NULL ((swapfile_pager_t) NULL)
pager = swapfile_pager_lookup(mem_obj);
assert(pager->is_ready);
- assert(pager->ref_count > 1); /* pager is alive and mapped */
+ assert(os_ref_get_count_raw(&pager->swp_pgr_hdr_ref) > 1); /* pager is alive and mapped */
PAGER_DEBUG(PAGER_PAGEIN, ("swapfile_pager_data_request: %p, %llx, %x, %x, pager %p\n", mem_obj, offset, length, protection_required, pager));
retval = kr;
goto done;
}
- dst_object = mo_control->moc_object;
+ dst_object = memory_object_control_to_vm_object(mo_control);
assert(dst_object != VM_OBJECT_NULL);
pager = swapfile_pager_lookup(mem_obj);
lck_mtx_lock(&swapfile_pager_lock);
- assert(pager->ref_count > 0);
- pager->ref_count++;
+ os_ref_retain_locked_raw(&pager->swp_pgr_hdr_ref, NULL);
lck_mtx_unlock(&swapfile_pager_lock);
}
swapfile_pager_t pager,
boolean_t locked)
{
+ os_ref_count_t ref_count;
+
if (!locked) {
lck_mtx_lock(&swapfile_pager_lock);
}
/* drop a reference on this pager */
- pager->ref_count--;
+ ref_count = os_ref_release_locked_raw(&pager->swp_pgr_hdr_ref, NULL);
- if (pager->ref_count == 1) {
+ if (ref_count == 1) {
/*
* Only the "named" reference is left, which means that
* no one is really holding on to this pager anymore.
/* the pager is all ours: no need for the lock now */
lck_mtx_unlock(&swapfile_pager_lock);
swapfile_pager_terminate_internal(pager);
- } else if (pager->ref_count == 0) {
+ } else if (ref_count == 0) {
/*
* Dropped the existence reference; the memory object has
* been terminated. Do some final cleanup and release the
lck_mtx_lock(&swapfile_pager_lock);
assert(pager->is_ready);
- assert(pager->ref_count > 0); /* pager is alive */
+ assert(os_ref_get_count_raw(&pager->swp_pgr_hdr_ref) > 0); /* pager is alive */
if (pager->is_mapped == FALSE) {
/*
* First mapping of this pager: take an extra reference
* are removed.
*/
pager->is_mapped = TRUE;
- pager->ref_count++;
+ os_ref_retain_locked_raw(&pager->swp_pgr_hdr_ref, NULL);
}
lck_mtx_unlock(&swapfile_pager_lock);
assert(mem_obj->mo_pager_ops == &swapfile_pager_ops);
__IGNORE_WCASTALIGN(pager = (swapfile_pager_t) mem_obj);
- assert(pager->ref_count > 0);
+ assert(os_ref_get_count_raw(&pager->swp_pgr_hdr_ref) > 0);
return pager;
}
pager->swp_pgr_hdr.mo_control = MEMORY_OBJECT_CONTROL_NULL;
pager->is_ready = FALSE;/* not ready until it has a "name" */
- pager->ref_count = 1; /* setup reference */
+ os_ref_init_raw(&pager->swp_pgr_hdr_ref, NULL); /* setup reference */
pager->is_mapped = FALSE;
pager->swapfile_vnode = vp;
if (!queue_end(&swapfile_pager_queue,
(queue_entry_t) pager2)) {
/* while we hold the lock, transfer our setup ref to winner */
- pager2->ref_count++;
+ os_ref_retain_locked_raw(&pager2->swp_pgr_hdr_ref, NULL);
/* we lost the race, down with the loser... */
lck_mtx_unlock(&swapfile_pager_lock);
pager->swapfile_vnode = NULL;
pager = SWAPFILE_PAGER_NULL;
} else {
/* make sure pager doesn't disappear */
- pager->ref_count++;
+ os_ref_retain_raw(&pager->swp_pgr_hdr_ref, NULL);
}
lck_mtx_unlock(&swapfile_pager_lock);
mach_memory_entry_port_release(mem_entry);
/* create 4k copy map */
+ curprot = VM_PROT_NONE;
+ maxprot = VM_PROT_NONE;
kr = vm_map_copy_extract(map4k, addr4k, 0x3000,
- VM_PROT_READ, FALSE,
- ©4k, &curprot, &maxprot,
+ FALSE, ©4k, &curprot, &maxprot,
VM_INHERIT_DEFAULT, VM_MAP_KERNEL_FLAGS_NONE);
assert(kr == KERN_SUCCESS);
assert(copy4k->size == 0x3000);
/* create 16k copy map */
+ curprot = VM_PROT_NONE;
+ maxprot = VM_PROT_NONE;
kr = vm_map_copy_extract(map16k, addr16k, 0x4000,
- VM_PROT_READ, FALSE,
- ©16k, &curprot, &maxprot,
+ FALSE, ©16k, &curprot, &maxprot,
VM_INHERIT_DEFAULT, VM_MAP_KERNEL_FLAGS_NONE);
assert(kr == KERN_SUCCESS);
assert(copy16k->size == 0x4000);
return kr;
}
+/*
+ * mach_vm_remap_new -
+ * Behaves like mach_vm_remap, except that VM_FLAGS_RETURN_DATA_ADDR is always set
+ * and {cur,max}_protection are in/out.
+ */
+kern_return_t
+mach_vm_remap_new_external(
+ vm_map_t target_map,
+ mach_vm_offset_t *address,
+ mach_vm_size_t size,
+ mach_vm_offset_t mask,
+ int flags,
+ mach_port_t src_tport,
+ mach_vm_offset_t memory_address,
+ boolean_t copy,
+ vm_prot_t *cur_protection, /* IN/OUT */
+ vm_prot_t *max_protection, /* IN/OUT */
+ vm_inherit_t inheritance)
+{
+ vm_tag_t tag;
+ vm_map_offset_t map_addr;
+ kern_return_t kr;
+ vm_map_t src_map;
+
+ flags |= VM_FLAGS_RETURN_DATA_ADDR;
+ VM_GET_FLAGS_ALIAS(flags, tag);
+
+ /* filter out any kernel-only flags */
+ if (flags & ~VM_FLAGS_USER_REMAP) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ if (target_map == VM_MAP_NULL) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ if ((*cur_protection & ~VM_PROT_ALL) ||
+ (*max_protection & ~VM_PROT_ALL) ||
+ (*cur_protection & *max_protection) != *cur_protection) {
+ return KERN_INVALID_ARGUMENT;
+ }
+ if ((*max_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE)) ==
+ (VM_PROT_WRITE | VM_PROT_EXECUTE)) {
+ /*
+ * XXX FBDP TODO
+ * enforce target's "wx" policies
+ */
+ return KERN_PROTECTION_FAILURE;
+ }
+
+ if (copy || *max_protection == VM_PROT_READ || *max_protection == VM_PROT_NONE) {
+ src_map = convert_port_to_map_read(src_tport);
+ } else {
+ src_map = convert_port_to_map(src_tport);
+ }
+
+ if (src_map == VM_MAP_NULL) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ map_addr = (vm_map_offset_t)*address;
+
+ kr = vm_map_remap(target_map,
+ &map_addr,
+ size,
+ mask,
+ flags,
+ VM_MAP_KERNEL_FLAGS_NONE,
+ tag,
+ src_map,
+ memory_address,
+ copy,
+ cur_protection, /* IN/OUT */
+ max_protection, /* IN/OUT */
+ inheritance);
+
+ *address = map_addr;
+ vm_map_deallocate(src_map);
+
+ if (kr == KERN_SUCCESS) {
+ ipc_port_release_send(src_tport); /* consume on success */
+ }
+ return kr;
+}
+
/*
* mach_vm_remap -
* Remap a range of memory from one task into another,
vm_map_t src_map,
mach_vm_offset_t memory_address,
boolean_t copy,
- vm_prot_t *cur_protection,
- vm_prot_t *max_protection,
+ vm_prot_t *cur_protection, /* OUT */
+ vm_prot_t *max_protection, /* OUT */
vm_inherit_t inheritance)
{
vm_tag_t tag;
vm_map_t src_map,
mach_vm_offset_t memory_address,
boolean_t copy,
- vm_prot_t *cur_protection,
- vm_prot_t *max_protection,
+ vm_prot_t *cur_protection, /* OUT */
+ vm_prot_t *max_protection, /* OUT */
vm_inherit_t inheritance)
{
vm_map_offset_t map_addr;
map_addr = (vm_map_offset_t)*address;
+ *cur_protection = VM_PROT_NONE;
+ *max_protection = VM_PROT_NONE;
+
kr = vm_map_remap(target_map,
&map_addr,
size,
src_map,
memory_address,
copy,
- cur_protection,
- max_protection,
+ cur_protection, /* IN/OUT */
+ max_protection, /* IN/OUT */
inheritance);
*address = map_addr;
return kr;
}
+/*
+ * vm_remap_new -
+ * Behaves like vm_remap, except that VM_FLAGS_RETURN_DATA_ADDR is always set
+ * and {cur,max}_protection are in/out.
+ */
+kern_return_t
+vm_remap_new_external(
+ vm_map_t target_map,
+ vm_offset_t *address,
+ vm_size_t size,
+ vm_offset_t mask,
+ int flags,
+ mach_port_t src_tport,
+ vm_offset_t memory_address,
+ boolean_t copy,
+ vm_prot_t *cur_protection, /* IN/OUT */
+ vm_prot_t *max_protection, /* IN/OUT */
+ vm_inherit_t inheritance)
+{
+ vm_tag_t tag;
+ vm_map_offset_t map_addr;
+ kern_return_t kr;
+ vm_map_t src_map;
+
+ flags |= VM_FLAGS_RETURN_DATA_ADDR;
+ VM_GET_FLAGS_ALIAS(flags, tag);
+
+ /* filter out any kernel-only flags */
+ if (flags & ~VM_FLAGS_USER_REMAP) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ if (target_map == VM_MAP_NULL) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ if ((*cur_protection & ~VM_PROT_ALL) ||
+ (*max_protection & ~VM_PROT_ALL) ||
+ (*cur_protection & *max_protection) != *cur_protection) {
+ return KERN_INVALID_ARGUMENT;
+ }
+ if ((*max_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE)) ==
+ (VM_PROT_WRITE | VM_PROT_EXECUTE)) {
+ /*
+ * XXX FBDP TODO
+ * enforce target's "wx" policies
+ */
+ return KERN_PROTECTION_FAILURE;
+ }
+
+ if (copy || *max_protection == VM_PROT_READ || *max_protection == VM_PROT_NONE) {
+ src_map = convert_port_to_map_read(src_tport);
+ } else {
+ src_map = convert_port_to_map(src_tport);
+ }
+
+ if (src_map == VM_MAP_NULL) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ map_addr = (vm_map_offset_t)*address;
+
+ kr = vm_map_remap(target_map,
+ &map_addr,
+ size,
+ mask,
+ flags,
+ VM_MAP_KERNEL_FLAGS_NONE,
+ tag,
+ src_map,
+ memory_address,
+ copy,
+ cur_protection, /* IN/OUT */
+ max_protection, /* IN/OUT */
+ inheritance);
+
+ *address = CAST_DOWN(vm_offset_t, map_addr);
+ vm_map_deallocate(src_map);
+
+ if (kr == KERN_SUCCESS) {
+ ipc_port_release_send(src_tport); /* consume on success */
+ }
+ return kr;
+}
+
/*
* vm_remap -
* Remap a range of memory from one task into another,
vm_map_t src_map,
vm_offset_t memory_address,
boolean_t copy,
- vm_prot_t *cur_protection,
- vm_prot_t *max_protection,
+ vm_prot_t *cur_protection, /* OUT */
+ vm_prot_t *max_protection, /* OUT */
vm_inherit_t inheritance)
{
vm_tag_t tag;
vm_map_t src_map,
vm_offset_t memory_address,
boolean_t copy,
- vm_prot_t *cur_protection,
- vm_prot_t *max_protection,
+ vm_prot_t *cur_protection, /* OUT */
+ vm_prot_t *max_protection, /* OUT */
vm_inherit_t inheritance)
{
vm_map_offset_t map_addr;
map_addr = (vm_map_offset_t)*address;
+ *cur_protection = VM_PROT_NONE;
+ *max_protection = VM_PROT_NONE;
+
kr = vm_map_remap(target_map,
&map_addr,
size,
src_map,
memory_address,
copy,
- cur_protection,
- max_protection,
+ cur_protection, /* IN/OUT */
+ max_protection, /* IN/OUT */
inheritance);
*address = CAST_DOWN(vm_offset_t, map_addr);
return kr;
return KERN_INVALID_HOST;
}
- assert(host_priv == &realhost);
-
if (map == VM_MAP_NULL) {
return KERN_INVALID_TASK;
}
return KERN_INVALID_HOST;
}
- assert(host_priv == &realhost);
-
if (map == VM_MAP_NULL) {
return KERN_INVALID_TASK;
}
effective_page_size = (1 << effective_page_shift);
effective_page_mask = effective_page_size - 1;
- disp_buf_req_size = (*dispositions_count * sizeof(int));
+ if (os_mul_overflow(*dispositions_count, sizeof(int), &disp_buf_req_size)) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
start = vm_map_trunc_page(address, effective_page_mask);
end = vm_map_round_page(address + size, effective_page_mask);
mach_make_memory_entry_64(
vm_map_t target_map,
memory_object_size_t *size,
- memory_object_offset_t offset,
+ memory_object_offset_t offset,
vm_prot_t permission,
ipc_port_t *object_handle,
ipc_port_t parent_handle)
vm_prot_t cur_prot, max_prot;
vm_map_kernel_flags_t vmk_flags;
vm_map_entry_t parent_copy_entry;
- vm_prot_t required_protection;
if (target_map == VM_MAP_NULL) {
DEBUG4K_MEMENTRY("map %p offset 0x%llx size 0x%llx prot 0x%x -> entry %p kr 0x%x\n", target_map, offset, *size, permission, user_entry, KERN_INVALID_TASK);
vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
parent_copy_entry = VM_MAP_ENTRY_NULL;
if (!(permission & MAP_MEM_VM_SHARE)) {
+ vm_map_t tmp_map, real_map;
+ vm_map_version_t version;
+ vm_object_t tmp_object;
+ vm_object_offset_t obj_off;
+ vm_prot_t prot;
+ boolean_t wired;
+ bool contended;
+
+ /* resolve any pending submap copy-on-write... */
+ if (protections & VM_PROT_WRITE) {
+ tmp_map = target_map;
+ vm_map_lock_read(tmp_map);
+ kr = vm_map_lookup_locked(&tmp_map,
+ map_start,
+ protections | mask_protections,
+ OBJECT_LOCK_EXCLUSIVE,
+ &version,
+ &tmp_object,
+ &obj_off,
+ &prot,
+ &wired,
+ NULL, /* fault_info */
+ &real_map,
+ &contended);
+ if (kr != KERN_SUCCESS) {
+ vm_map_unlock_read(tmp_map);
+ } else {
+ vm_object_unlock(tmp_object);
+ vm_map_unlock_read(tmp_map);
+ if (real_map != tmp_map) {
+ vm_map_unlock_read(real_map);
+ }
+ }
+ }
+ /* ... and carry on */
+
/* stop extracting if VM object changes */
vmk_flags.vmkf_copy_single_object = TRUE;
if ((permission & MAP_MEM_NAMED_REUSE) &&
* caller is asking for whichever proctections are
* available: no required protections.
*/
- required_protection = VM_PROT_NONE;
+ cur_prot = VM_PROT_NONE;
+ max_prot = VM_PROT_NONE;
} else {
/*
* Caller wants a memory entry with "protections".
* Make sure we extract only memory that matches that.
*/
- required_protection = protections;
+ cur_prot = protections;
+ max_prot = protections;
}
- cur_prot = VM_PROT_ALL;
if (target_map->pmap == kernel_pmap) {
/*
* Get "reserved" map entries to avoid deadlocking
kr = vm_map_copy_extract(target_map,
map_start,
map_size,
- required_protection,
FALSE, /* copy */
©,
&cur_prot,
return kr;
}
assert(copy != VM_MAP_COPY_NULL);
- assert((cur_prot & required_protection) == required_protection);
if (mask_protections) {
/*
* We want exactly "original_protections"
* out of "cur_prot".
*/
+ assert((cur_prot & protections) == protections);
+ assert((max_prot & protections) == protections);
+ /* XXX FBDP TODO: no longer needed? */
if ((cur_prot & protections) != protections) {
if (VM_MAP_PAGE_SHIFT(target_map) < PAGE_SHIFT) {
// panic("DEBUG4K %s:%d kr 0x%x\n", __FUNCTION__, __LINE__, KERN_PROTECTION_FAILURE);
if (parent_entry->is_sub_map) {
vm_map_t map = parent_entry->backing.map;
+ vm_map_reference(map);
user_entry->backing.map = map;
- lck_mtx_lock(&map->s_lock);
- os_ref_retain_locked(&map->map_refcnt);
- lck_mtx_unlock(&map->s_lock);
} else {
object = vm_named_entry_to_vm_object(parent_entry);
assert(object != VM_OBJECT_NULL);
return KERN_INVALID_ARGUMENT;
}
- mem_entry = (vm_named_entry_t) entry_port->ip_kobject;
+ mem_entry = (vm_named_entry_t) ipc_kobject_get(entry_port);
named_entry_lock(mem_entry);
return KERN_INVALID_ARGUMENT;
}
- mem_entry = (vm_named_entry_t) entry_port->ip_kobject;
+ mem_entry = (vm_named_entry_t) ipc_kobject_get(entry_port);
named_entry_lock(mem_entry);
if (mem_entry->is_sub_map) {
vm_map_t src_map,
mach_vm_offset_t memory_address,
boolean_t copy,
- vm_prot_t *cur_protection,
- vm_prot_t *max_protection,
+ vm_prot_t *cur_protection, /* OUT */
+ vm_prot_t *max_protection, /* OUT */
vm_inherit_t inheritance)
{
return mach_vm_remap_external(target_map, address, size, mask, flags, src_map, memory_address,
* Size of elements in the permanent zone is not saved as a part of the
* zone's info
*/
- if (__improbable(src_zone && !src_zone->permanent &&
+ if (__improbable(src_zone && !src_zone->z_permanent &&
kernel_buf_size < nbytes)) {
panic("copyio: kernel buffer %p has size %lu < nbytes %lu", kernel_addr, kernel_buf_size, nbytes);
}
--- /dev/null
+/* * Copyright (c) 2020 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+ * @OSF_COPYRIGHT@
+ */
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+#include <kern/assert.h>
+#include <kern/cpu_data.h>
+#include <kern/counter.h>
+#include <kern/zalloc.h>
+#include <machine/atomic.h>
+#include <machine/machine_routines.h>
+#include <machine/cpu_number.h>
+
+OS_OVERLOADABLE
+void
+counter_add(scalable_counter_t *counter, uint64_t amount)
+{
+ disable_preemption();
+ (*zpercpu_get(*counter)) += amount;
+ enable_preemption();
+}
+
+OS_OVERLOADABLE
+void
+counter_inc(scalable_counter_t *counter)
+{
+ disable_preemption();
+ (*zpercpu_get(*counter))++;
+ enable_preemption();
+}
+
+OS_OVERLOADABLE
+void
+counter_dec(scalable_counter_t *counter)
+{
+ disable_preemption();
+ (*zpercpu_get(*counter))--;
+ enable_preemption();
+}
+
+OS_OVERLOADABLE
+void
+counter_add_preemption_disabled(scalable_counter_t *counter, uint64_t amount)
+{
+ (*zpercpu_get(*counter)) += amount;
+}
+
+OS_OVERLOADABLE
+void
+counter_inc_preemption_disabled(scalable_counter_t *counter)
+{
+ (*zpercpu_get(*counter))++;
+}
+
+OS_OVERLOADABLE
+void
+counter_dec_preemption_disabled(scalable_counter_t *counter)
+{
+ (*zpercpu_get(*counter))--;
+}
return false;
}
+SIMPLE_LOCK_DECLARE(pmap_compilation_service_cdhash_lock, 0);
+uint8_t pmap_compilation_service_cdhash[CS_CDHASH_LEN] = { 0 };
+
+void
+pmap_set_compilation_service_cdhash(const uint8_t cdhash[CS_CDHASH_LEN])
+{
+ simple_lock(&pmap_compilation_service_cdhash_lock, LCK_GRP_NULL);
+ memcpy(pmap_compilation_service_cdhash, cdhash, CS_CDHASH_LEN);
+ simple_unlock(&pmap_compilation_service_cdhash_lock);
+
+#if DEVELOPMENT || DEBUG
+ printf("Added Compilation Service CDHash through the PMAP: 0x%02X 0x%02X 0x%02X 0x%02X\n", cdhash[0], cdhash[1], cdhash[2], cdhash[4]);
+#endif
+}
+
+bool
+pmap_match_compilation_service_cdhash(const uint8_t cdhash[CS_CDHASH_LEN])
+{
+ bool match = false;
+
+ simple_lock(&pmap_compilation_service_cdhash_lock, LCK_GRP_NULL);
+ if (bcmp(pmap_compilation_service_cdhash, cdhash, CS_CDHASH_LEN) == 0) {
+ match = true;
+ }
+ simple_unlock(&pmap_compilation_service_cdhash_lock);
+
+#if DEVELOPMENT || DEBUG
+ if (match) {
+ printf("Matched Compilation Service CDHash through the PMAP\n");
+ }
+#endif
+
+ return match;
+}
+
bool
pmap_in_ppl(void)
{
{
// Unsupported on this architecture.
}
+
+#if DEVELOPMENT || DEBUG
+/*
+ * Used for unit testing recovery from text corruptions.
+ */
+kern_return_t
+pmap_test_text_corruption(pmap_paddr_t pa)
+{
+ int pai;
+ uint8_t *va;
+
+ pai = ppn_to_pai(atop(pa));
+ if (!IS_MANAGED_PAGE(pai)) {
+ return KERN_FAILURE;
+ }
+
+ va = (uint8_t *)PHYSMAP_PTOV(pa);
+ va[0] = 0x0f; /* opcode for UD2 */
+ va[1] = 0x0b;
+
+ return KERN_SUCCESS;
+}
+#endif /* DEVELOPMENT || DEBUG */
nanoseconds_to_absolutetime(command_buffer->delay_us * NSEC_PER_USEC, &deadline);
deadline += ml_get_timebase();
while (ml_get_timebase() < deadline) {
- ;
+ os_compiler_barrier();
}
}
}
PE_state.video.v_width = boot_args_ptr->Video.v_width;
PE_state.video.v_height = boot_args_ptr->Video.v_height;
PE_state.video.v_depth = (boot_args_ptr->Video.v_depth >> kBootVideoDepthDepthShift) & kBootVideoDepthMask;
- PE_state.video.v_rotate = (boot_args_ptr->Video.v_depth >> kBootVideoDepthRotateShift) & kBootVideoDepthMask;
+ PE_state.video.v_rotate = (
+ ((boot_args_ptr->Video.v_depth >> kBootVideoDepthRotateShift) & kBootVideoDepthMask) + // rotation
+ ((boot_args_ptr->Video.v_depth >> kBootVideoDepthBootRotateShift) & kBootVideoDepthMask) // add extra boot rotation
+ ) % 4;
PE_state.video.v_scale = ((boot_args_ptr->Video.v_depth >> kBootVideoDepthScaleShift) & kBootVideoDepthMask) + 1;
PE_state.video.v_display = boot_args_ptr->Video.v_display;
strlcpy(PE_state.video.v_pixelFormat, "BBBBBBBBGGGGGGGGRRRRRRRR", sizeof(PE_state.video.v_pixelFormat));
/****************************************************************************/
#ifdef PI3_UART
-vm_offset_t pi3_gpio_base_vaddr = 0;
-vm_offset_t pi3_aux_base_vaddr = 0;
+static vm_offset_t pi3_gpio_base_vaddr = 0;
+static vm_offset_t pi3_aux_base_vaddr = 0;
static int
pi3_uart_tr0(void)
{
};
#endif /* PI3_UART */
+
+/*****************************************************************************/
+
+
/*****************************************************************************/
static void
}
#endif /* PI3_UART */
+
#ifdef DOCKCHANNEL_UART
uint32_t no_dockchannel_uart = 0;
if (SecureDTFindEntry("name", "dockchannel-uart", &entryP) == kSuccess) {
#define kBootVideoDepthDepthShift (0)
#define kBootVideoDepthRotateShift (8)
#define kBootVideoDepthScaleShift (16)
+#define kBootVideoDepthBootRotateShift (24)
#define kBootFlagsDarkBoot (1 << 0)
#ifdef APPLE_ARM64_ARCH_FAMILY
-#define ARM64_REG_HID0 S3_0_c15_c0_0
#define ARM64_REG_HID0_LoopBuffDisb (1<<20)
#define ARM64_REG_HID0_AMXCacheFusionDisb (1ULL<<21)
#define ARM64_REG_HID0_ICPrefLimitOneBrn (1<<25)
#define ARM64_REG_HID0_ICPrefDepth_bmsk (7ULL <<ARM64_REG_HID0_ICPrefDepth_bshift)
#define ARM64_REG_HID0_ICPrefDepth_VALUE (1ULL <<ARM64_REG_HID0_ICPrefDepth_bshift)
-#define ARM64_REG_EHID0 S3_0_c15_c0_1
#define ARM64_REG_EHID0_nfpRetFwdDisb (1ULL<<45)
-#define ARM64_REG_HID1 S3_0_c15_c1_0
#define ARM64_REG_HID1_disCmpBrFusion (1<<14)
#define ARM64_REG_HID1_forceNexL3ClkOn (1<<15)
#define ARM64_REG_HID1_rccForceAllIexL3ClksOn (1<<23)
#define ARM64_REG_HID1_enaBrKillLimit (1ULL << 60)
#define ARM64_REG_HID1_SpareBit6 (1ULL << 60)
-#define ARM64_REG_EHID1 S3_0_c15_c1_1
#define ARM64_REG_EHID1_disMSRSpecDAIF (1ULL << 30)
-#define ARM64_REG_HID2 S3_0_c15_c2_0
#define ARM64_REG_HID2_disMMUmtlbPrefetch (1<<13)
#define ARM64_REG_HID2_ForcePurgeMtb (1<<17)
-#define ARM64_REG_EHID2 S3_0_c15_c2_1
#define ARM64_REG_EHID2_ForcePurgeMtb (1<<17)
-#define ARM64_REG_HID3 S3_0_c15_c3_0
#define ARM64_REG_HID3_DisColorOpt (1<<2)
#define ARM64_REG_HID3_DisDcZvaCmdOnly (1<<25)
#define ARM64_REG_HID3_DisArbFixBifCrd (1ULL<<44)
#define ARM64_REG_HID3_DisXmonSnpEvictTriggerL2StarvationMode (1<<54)
#define ARM64_REG_HID3_DevPcieThrottleEna (1ULL<<63)
-#define ARM64_REG_EHID3 S3_0_c15_c3_1
#define ARM64_REG_EHID3_DisColorOpt (1<<2)
#define ARM64_REG_EHID3_DisDcZvaCmdOnly (1<<25)
-#define ARM64_REG_HID4 S3_0_c15_c4_0
-#define ARM64_REG_EHID4 S3_0_c15_c4_1
-
#define ARM64_REG_HID4_DisDcMVAOps (1<<11)
#define ARM64_REG_HID4_DisSpecLnchRead (1<<33)
#define ARM64_REG_HID4_ForceNsOrdLdReqNoOlderLd (1<<39)
#define ARM64_REG_HID4_disSpecLSRedirect (1<<9)
#define ARM64_REG_HID4_DisSTNTWidget (1<<1)
-#define ARM64_REG_HID5 S3_0_c15_c5_0
#define ARM64_REG_HID5_DisHwpLd (1<<44)
#define ARM64_REG_HID5_DisHwpSt (1<<45)
#define ARM64_REG_HID5_DisFill2cMerge (1ULL << 61)
#define ARM64_REG_HID5_CrdPrbSnpRsvd_mask (0xFULL << ARM64_REG_HID5_CrdPrbSnpRsvd_shift)
#define ARM64_REG_HID5_CrdPrbSnpRsvd_VALUE(x) (x << ARM64_REG_HID5_CrdPrbSnpRsvd_shift)
-#define ARM64_REG_EHID5 S3_0_c15_c5_1
#define ARM64_REG_EHID5_DisFillByp (1 << 35)
-#define ARM64_REG_HID6 S3_0_c15_c6_0
#define ARM64_REG_HID6_UpCrdTknInitC2_shift (5)
#define ARM64_REG_HID6_UpCrdTknInitC2_mask (0x1FULL << ARM64_REG_HID6_UpCrdTknInitC2_shift)
#define ARM64_REG_HID6_DisClkDivGating (1ULL << 55)
-#define ARM64_REG_HID7 S3_0_c15_c7_0
#define ARM64_REG_HID7_forceNonSpecTargetedTimerSel_shift (24)
#define ARM64_REG_HID7_forceNonSpecTargetedTimerSel_mask (3ULL << ARM64_REG_HID7_forceNonSpecTargetedTimerSel_shift)
#define ARM64_REG_HID7_forceNonSpecTargetedTimerSel_VALUE (3ULL << ARM64_REG_HID7_forceNonSpecTargetedTimerSel_shift)
#define ARM64_REG_HID7_forceNonSpecIfStepping (1ULL << 20)
+#define ARM64_REG_HID7_forceNonSpecIfSpecFlushPtrNEBlkRtrPtr (1ULL << 19)
#define ARM64_REG_HID7_forceNonSpecIfSpecFlushPtrInvalidAndMPValid (1ULL << 16)
#define ARM64_REG_HID7_disNexFastFmul (1 << 10)
#define ARM64_REG_HID7_disCrossPick2 (1ULL << 7)
-#define ARM64_REG_HID8 S3_0_c15_c8_0
#define ARM64_REG_HID8_DataSetID0_VALUE (0xF << 4)
#define ARM64_REG_HID8_DataSetID1_VALUE (0xF << 8)
#define ARM64_REG_HID8_WkeForceStrictOrder (0x1ULL << 35)
#define ARM64_REG_HID8_DataSetID2_VALUE (0xF << 56)
#define ARM64_REG_HID8_DataSetID3_VALUE (0xF << 60)
-#define ARM64_REG_HID9 S3_0_c15_c9_0
#define ARM64_REG_HID9_TSOAllowDcZvaWC (1ULL << 26)
#define ARM64_REG_HID9_TSOSerializeVLDmicroops (1ULL << 29)
#define ARM64_REG_HID9_EnableFixBug51667805 (1ULL << 48)
#define ARM64_REG_HID9_EnableFixBug58566122 (3ULL << 53)
#define ARM64_REG_HID9_HidEnFix55719865 (1ULL << 55)
-#define ARM64_REG_EHID9 S3_0_c15_c9_1
#define ARM64_REG_EHID9_DevThrottle2Ena (1ULL << 5)
-#define ARM64_REG_HID10 S3_0_c15_c10_0
#define ARM64_REG_HID10_DisHwpGups (1ULL << 0)
-#define ARM64_REG_EHID10 S3_0_c15_c10_1
#define ARM64_REG_EHID10_rccDisPwrSavePrfClkOff (1ULL << 19)
#define ARM64_REG_EHID10_ForceWStDrainUc (1ULL << 32)
#define ARM64_REG_EHID10_DisZVATemporalTSO (1ULL << 49)
-#if defined(APPLETYPHOON) || defined(APPLETWISTER)
-#define ARM64_REG_HID11 S3_0_c15_c13_0
-#else /* defined(APPLECYCLONE) || defined(APPLETYPHOON) || defined(APPLETWISTER) */
-#define ARM64_REG_HID11 S3_0_c15_c11_0
-#endif /* defined(APPLECYCLONE) || defined(APPLETYPHOON) || defined(APPLETWISTER) */
#define ARM64_REG_HID11_DisX64NTLnchOpt (1ULL << 1)
#define ARM64_REG_HID11_DisFillC1BubOpt (1ULL << 7)
#define ARM64_REG_HID11_HidEnFixUc55719865 (1ULL << 15)
#define ARM64_REG_HID11_DisFastDrainOpt (1ULL << 23)
#define ARM64_REG_HID11_DisLDNTWidget (1ULL << 59)
-#define ARM64_REG_EHID11 S3_0_c15_c11_1
#define ARM64_REG_EHID11_SmbDrainThresh_mask (3ULL << 40)
-#define ARM64_REG_HID13 S3_0_c15_c14_0
#define ARM64_REG_HID13_PreCyc_shift (14)
#define ARM64_REG_HID13_PreCyc_mask (0xFULL << ARM64_REG_HID13_PreCyc_shift)
#define ARM64_REG_HID13_PreCyc_VALUE (0x4ULL << ARM64_REG_HID13_PreCyc_shift)
-#define ARM64_REG_HID14 S3_0_c15_c15_0
#define ARM64_REG_HID14_NexSleepTimeOutCyc_shift (0)
#define ARM64_REG_HID14_NexSleepTimeOutCyc_VALUE 0x7D0ULL
-#define ARM64_REG_HID16 S3_0_c15_c15_2
#define ARM64_REG_HID16_leqThrottleAggr (1ULL << 18)
#define ARM64_REG_HID16_SpareBit0 (1ULL << 56)
#define ARM64_REG_HID16_EnRs4Sec (1ULL << 57)
#define ARM64_REG_HID16_EnMPCyc7 (1ULL << 62)
#define ARM64_REG_HID16_SpareBit7 (1ULL << 63)
-#define ARM64_REG_HID17 S3_0_c15_c15_5
#define ARM64_REG_HID17_CrdEdbSnpRsvd_shift (0)
#define ARM64_REG_HID17_CrdEdbSnpRsvd_mask (0x7ULL << ARM64_REG_HID17_CrdEdbSnpRsvd_shift)
#define ARM64_REG_HID17_CrdEdbSnpRsvd_VALUE (0x2ULL << ARM64_REG_HID17_CrdEdbSnpRsvd_shift)
-#define ARM64_REG_HID18 S3_0_c15_c11_2
#define ARM64_REG_HID18_HVCSpecDisable (1ULL << 14)
#define ARM64_REG_HID18_SpareBit17 (1ULL << 49)
-#define ARM64_REG_HID21 S3_0_c15_c1_3
#define ARM64_REG_HID21_EnLdrexFillRply (1ULL << 19)
#define ARM64_REG_HID21_LdqRtrWaitForOldStRelCmpl (1ULL << 33)
#define ARM64_REG_HID21_DisCdpRplyPurgedTrans (1ULL << 34)
#if defined(APPLETYPHOON) || defined(APPLETWISTER)
-#define ARM64_REG_CYC_CFG S3_5_c15_c4_0
#define ARM64_REG_CYC_CFG_skipInit (1ULL<<30)
#define ARM64_REG_CYC_CFG_deepSleep (1ULL<<24)
#else /* defined(APPLECYCLONE) || defined(APPLETYPHOON) || defined(APPLETWISTER) */
-#define ARM64_REG_ACC_OVRD S3_5_c15_c6_0
-#if defined(APPLEMONSOON)
-#define ARM64_REG_ACC_EBLK_OVRD S3_5_c15_c6_1 // EBLK_OVRD on Zephyr
-#endif /* defined(APPLEMONSOON) */
-
#define ARM64_REG_ACC_OVRD_enDeepSleep (1ULL << 34)
#define ARM64_REG_ACC_OVRD_disPioOnWfiCpu (1ULL << 32)
#define ARM64_REG_ACC_OVRD_dsblClkDtr (1ULL << 29)
#define ARM64_REG_ACC_OVRD_disL2Flush4AccSlp_deepsleep (2ULL << 15)
#define ARM64_REG_ACC_OVRD_ok2PwrDnSRM_mask (3ULL << 13)
#define ARM64_REG_ACC_OVRD_ok2PwrDnSRM_deepsleep (3ULL << 13)
-
#endif /* defined(APPLECYCLONE) || defined(APPLETYPHOON) || defined(APPLETWISTER) */
-#define ARM64_REG_CYC_OVRD S3_5_c15_c5_0
#define ARM64_REG_CYC_OVRD_irq_mask (3<<22)
#define ARM64_REG_CYC_OVRD_irq_disable (2<<22)
#define ARM64_REG_CYC_OVRD_fiq_mask (3<<20)
#define ARM64_REG_CYC_OVRD_dsblSnoopPTime (1ULL << 31) /// Don't fetch the timebase from the P-block
#endif /* APPLEMONSOON */
-#define ARM64_REG_LSU_ERR_STS S3_3_c15_c0_0
#define ARM64_REG_LSU_ERR_STS_L1DTlbMultiHitEN (1ULL<<54)
-
-#define ARM64_REG_E_LSU_ERR_STS S3_3_c15_c2_0
-
-#define ARM64_REG_LSU_ERR_CTL S3_3_c15_c1_0
#define ARM64_REG_LSU_ERR_CTL_L1DTlbMultiHitEN (1ULL<<3)
-#define ARM64_REG_FED_ERR_STS S3_4_C15_C0_0
-
-#define ARM64_REG_E_FED_ERR_STS S3_4_C15_C0_2
-
-#define ARM64_REG_MMU_ERR_STS S3_6_c15_c0_0
-
-#define ARM64_REG_E_MMU_ERR_STS s3_6_c15_c2_0
-
-#define ARM64_REG_L2C_ERR_STS S3_3_c15_c8_0
-
-#define ARM64_REG_L2C_ERR_ADR S3_3_c15_c9_0
-
-#define ARM64_REG_L2C_ERR_INF S3_3_c15_c10_0
-
-#define ARM64_REG_MIGSTS_EL1 S3_4_c15_c0_4
-
-#define ARM64_REG_DPC_ERR_STS S3_5_c15_c0_5
-
-#if defined(HAS_KTRR)
-
-#ifdef ASSEMBLER
-#define ARM64_REG_KTRR_LOWER_EL1 S3_4_c15_c2_3
-#define ARM64_REG_KTRR_UPPER_EL1 S3_4_c15_c2_4
-#define ARM64_REG_KTRR_LOCK_EL1 S3_4_c15_c2_2
-#else /* ASSEMBLER */
-#define ARM64_REG_KTRR_LOWER_EL1 "S3_4_c15_c2_3"
-#define ARM64_REG_KTRR_UPPER_EL1 "S3_4_c15_c2_4"
-#define ARM64_REG_KTRR_LOCK_EL1 "S3_4_c15_c2_2"
-#endif /* ASSEMBLER */
-
-#endif /* defined (HAS_KTRR) */
-
-#if defined(HAS_CTRR)
-
-#ifdef ASSEMBLER
-#define ARM64_REG_CTRR_A_LWR_EL1 S3_4_c15_c2_3
-#define ARM64_REG_CTRR_A_UPR_EL1 S3_4_c15_c2_4
-#define ARM64_REG_CTRR_CTL_EL1 S3_4_c15_c2_5
-#define ARM64_REG_CTRR_LOCK_EL1 S3_4_c15_c2_2
-
-#define ACC_CTRR_A_LWR_EL2 S3_4_c15_c11_0
-#define ACC_CTRR_A_UPR_EL2 S3_4_c15_c11_1
-#define ACC_CTRR_CTL_EL2 S3_4_c15_c11_4
-#define ACC_CTRR_LOCK_EL2 S3_4_c15_c11_5
-#else /* ASSEMBLER */
-#define ARM64_REG_CTRR_A_LWR_EL1 "S3_4_c15_c2_3"
-#define ARM64_REG_CTRR_A_UPR_EL1 "S3_4_c15_c2_4"
-#define ARM64_REG_CTRR_CTL_EL1 "S3_4_c15_c2_5"
-#define ARM64_REG_CTRR_LOCK_EL1 "S3_4_c15_c2_2"
-
-#define ACC_CTRR_A_LWR_EL2 "S3_4_c15_c11_0"
-#define ACC_CTRR_A_UPR_EL2 "S3_4_c15_c11_1"
-#define ACC_CTRR_CTL_EL2 "S3_4_c15_c11_4"
-#define ACC_CTRR_LOCK_EL2 "S3_4_c15_c11_5"
-#endif /* ASSEMBLER */
-
-#define CTRR_CTL_EL1_A_MMUOFF_WRPROTECT (1 << 0)
-#define CTRR_CTL_EL1_A_MMUON_WRPROTECT (1 << 1)
-#define CTRR_CTL_EL1_B_MMUOFF_WRPROTECT (1 << 2)
-#define CTRR_CTL_EL1_B_MMUON_WRPROTECT (1 << 3)
-#define CTRR_CTL_EL1_A_PXN (1 << 4)
-#define CTRR_CTL_EL1_B_PXN (1 << 5)
-#define CTRR_CTL_EL1_A_UXN (1 << 6)
-#define CTRR_CTL_EL1_B_UXN (1 << 7)
-
-#endif /* defined (HAS_CTRR) */
#if defined(HAS_IPI)
-
#define ARM64_REG_IPI_RR_TYPE_IMMEDIATE (0 << 28)
#define ARM64_REG_IPI_RR_TYPE_RETRACT (1 << 28)
#define ARM64_REG_IPI_RR_TYPE_DEFERRED (2 << 28)
#define ARM64_REG_IPI_RR_TYPE_NOWAKE (3 << 28)
-
-#if defined(HAS_CLUSTER)
-#define ARM64_REG_IPI_RR_LOCAL __MSR_STR(S3_5_c15_c0_0)
-#define ARM64_REG_IPI_RR_GLOBAL __MSR_STR(S3_5_c15_c0_1)
-#else /* defined(HAS_CLUSTER) */
-#define ARM64_REG_IPI_RR __MSR_STR(S3_5_c15_c0_1)
-#endif /* defined(HAS_CLUSTER) */
-
-#define ARM64_REG_IPI_SR __MSR_STR(S3_5_c15_c1_1)
-#define ARM64_REG_IPI_CR __MSR_STR(S3_5_c15_c3_1)
-
#endif /* defined(HAS_IPI) */
#endif /* APPLE_ARM64_ARCH_FAMILY */
#if defined(HAS_NEX_PG)
-#define ARM64_REG_HID13 S3_0_c15_c14_0
#define ARM64_REG_HID13_RstCyc_mask (0xfULL << 60)
#define ARM64_REG_HID13_RstCyc_val (0xcULL << 60)
-#define ARM64_REG_HID14 S3_0_c15_c15_0
#define ARM64_REG_HID14_NexPwgEn (1ULL << 32)
#endif /* defined(HAS_NEX_PG) */
-#define ARM64_REG_EHID20 S3_0_c15_c1_2
#define ARM64_REG_EHID20_forceNonSpecTargetedTimerSel_shift (21)
#define ARM64_REG_EHID20_forceNonSpecTargetedTimerSel_mask (3ULL << ARM64_REG_EHID20_forceNonSpecTargetedTimerSel_shift)
#define ARM64_REG_EHID20_forceNonSpecTargetedTimerSel_VALUE (3ULL << ARM64_REG_EHID20_forceNonSpecTargetedTimerSel_shift)
#define ARM64_REG_EHID20_forceNonSpecIfSpecFlushPtrNEBlkRtrPtr (1ULL << 16)
+#define ARM64_REG_EHID20_forceNonSpecIfOldestRedirVldAndOlder (1ULL << 15)
#define ARM64_REG_EHID20_trapSMC (1ULL << 8)
#define ARM64_REG_EHID20_forceNonSpecIfOldestRedirVldAndOlder (1ULL << 15)
#if defined(HAS_BP_RET)
-#define ARM64_REG_ACC_CFG S3_5_c15_c4_0
#define ARM64_REG_ACC_CFG_bdpSlpEn (1ULL << 2)
#define ARM64_REG_ACC_CFG_btpSlpEn (1ULL << 3)
#define ARM64_REG_ACC_CFG_bpSlp_mask 3
#define ARM64_REG_ACC_CFG_bpSlp_shift 2
#endif /* defined(HAS_BP_RET) */
-
-#if defined(HAS_APPLE_PAC)
-
-
-#if ASSEMBLER
-#define ARM64_REG_APIAKEYLO_EL1 S3_0_c2_c1_0
-#define ARM64_REG_APIAKEYHI_EL1 S3_0_c2_c1_1
-#define ARM64_REG_APIBKEYLO_EL1 S3_0_c2_c1_2
-#define ARM64_REG_APIBKEYHI_EL1 S3_0_c2_c1_3
-
-#define ARM64_REG_APDAKEYLO_EL1 S3_0_c2_c2_0
-#define ARM64_REG_APDAKEYHI_EL1 S3_0_c2_c2_1
-#define ARM64_REG_APDBKEYLO_EL1 S3_0_c2_c2_2
-#define ARM64_REG_APDBKEYHI_EL1 S3_0_c2_c2_3
-
-#define ARM64_REG_APGAKEYLO_EL1 S3_0_c2_c3_0
-#define ARM64_REG_APGAKEYHI_EL1 S3_0_c2_c3_1
-#else /* ASSEMBLER */
-#define ARM64_REG_APIAKEYLO_EL1 "S3_0_c2_c1_0"
-#define ARM64_REG_APIAKEYHI_EL1 "S3_0_c2_c1_1"
-#define ARM64_REG_APIBKEYLO_EL1 "S3_0_c2_c1_2"
-#define ARM64_REG_APIBKEYHI_EL1 "S3_0_c2_c1_3"
-
-#define ARM64_REG_APDAKEYLO_EL1 "S3_0_c2_c2_0"
-#define ARM64_REG_APDAKEYHI_EL1 "S3_0_c2_c2_1"
-#define ARM64_REG_APDBKEYLO_EL1 "S3_0_c2_c2_2"
-#define ARM64_REG_APDBKEYHI_EL1 "S3_0_c2_c2_3"
-
-#define ARM64_REG_APGAKEYLO_EL1 "S3_0_c2_c3_0"
-#define ARM64_REG_APGAKEYHI_EL1 "S3_0_c2_c3_1"
-#endif /* ASSEMBLER */
-#endif /* HAS_APPLE_PAC */
-
#if defined(HAS_VMSA_LOCK)
-
-#define ARM64_REG_VMSA_LOCK_EL1 S3_4_c15_c1_2
-
#define VMSA_LOCK_VBAR_EL1 (1ULL << 0)
#define VMSA_LOCK_SCTLR_EL1 (1ULL << 1)
#define VMSA_LOCK_TCR_EL1 (1ULL << 2)
#define VMSA_LOCK_TTBR0_EL1 (1ULL << 3)
#define VMSA_LOCK_TTBR1_EL1 (1ULL << 4)
#define VMSA_LOCK_SCTLR_M_BIT (1ULL << 63)
-
#endif /* HAS_VMSA_LOCK */
-
-
#define MPIDR_PNE_SHIFT 16 // pcore not ecore
#define MPIDR_PNE (1 << MPIDR_PNE_SHIFT)
-
#define CPU_PIO_CPU_STS_OFFSET (0x100ULL)
#define CPU_PIO_CPU_STS_cpuRunSt_mask (0xff)
#define MAX_CPU_CLUSTERS 2
#define XNU_MONITOR 1 /* Secure pmap runtime */
-#define XNU_MONITOR_T8020_DART 1 /* T8020 DART plugin for secure pmap runtime */
-#define T8020_DART_ALLOW_BYPASS (1 << 1) /* DART allows translation bypass in certain cases */
-#define XNU_MONITOR_NVME_PPL 1 /* NVMe PPL plugin for secure pmap runtime */
-#define XNU_MONITOR_ANS2_SART 1 /* ANS2 SART plugin for secure pmap runtime */
-#define PMAP_CS 1
-#define PMAP_CS_ENABLE 1
#endif /* ARM64_BOARD_CONFIG_T8027 */
#ifdef ARM64_BOARD_CONFIG_T8028
#ifdef ARM64_BOARD_CONFIG_T8103
#include <pexpert/arm64/H13.h>
#include <pexpert/arm64/spr_locks.h>
+#undef HAS_SIQ
#define MAX_L2_CLINE 7
#define MAX_CPUS 8
#define CORE_NCTRS 8 /* Placeholder; KPC is not enabled for this target */
#endif /* ARM64_BOARD_CONFIG_BCM2837 */
+
#ifndef HAS_UNCORE_CTRS
#undef UNCORE_VERSION
#undef UNCORE_PER_CLUSTER
#define kBootVideoDepthDepthShift (0)
#define kBootVideoDepthRotateShift (8)
#define kBootVideoDepthScaleShift (16)
+#define kBootVideoDepthBootRotateShift (24)
#define kBootFlagsDarkBoot (1ULL << 0)
/* Version 2, Revision 1 */
uint64_t KC_hdrs_vaddr;
- uint64_t arvRootHashStart; /* Physical address of root hash file */
+ uint64_t arvRootHashStart; /* Physical address of system volume root hash file */
uint64_t arvRootHashSize;
- uint64_t arvManifestStart; /* Physical address of manifest file */
+ uint64_t arvManifestStart; /* Physical address of system volume manifest file */
uint64_t arvManifestSize;
+ uint64_t bsARVRootHashStart;/* Physical address of base system root hash file */
+ uint64_t bsARVRootHashSize;
+
+ uint64_t bsARVManifestStart;/* Physical address of base system manifest file */
+ uint64_t bsARVManifestSize;
+
/* Reserved */
- uint32_t __reserved4[700];
+ uint32_t __reserved4[692];
} boot_args;
extern char assert_boot_args_size_is_4096[sizeof(boot_args) == 4096 ? 1 : -1];
___ubsan_handle_negate_overflow_abort
___ubsan_handle_nonnull_arg
___ubsan_handle_nonnull_arg_abort
-___ubsan_handle_nonnull_return
-___ubsan_handle_nonnull_return_abort
+___ubsan_handle_nonnull_return_v1
+___ubsan_handle_nonnull_return_v1_abort
___ubsan_handle_nullability_arg
___ubsan_handle_nullability_arg_abort
-___ubsan_handle_nullability_return
-___ubsan_handle_nullability_return_abort
+___ubsan_handle_nullability_return_v1
+___ubsan_handle_nullability_return_v1_abort
___ubsan_handle_out_of_bounds
___ubsan_handle_out_of_bounds_abort
___ubsan_handle_pointer_overflow
# Exclude KASAN dependencies
# XXX: could this be relaxed since fakestack is reentrant?
src:./osfmk/kern/zalloc.c
-src:./osfmk/kern/zcache.c
snprintf(fakestack_names[i], 16, "fakestack.%d", i);
fakestack_zones[i] = zone_create_ext(fakestack_names[i], sz,
- ZC_NOCALLOUT | ZC_NOGC | ZC_KASAN_NOREDZONE | ZC_KASAN_NOQUARANTINE,
+ ZC_NOCALLOUT | ZC_NOGC | ZC_NOCACHING |
+ ZC_KASAN_NOREDZONE | ZC_KASAN_NOQUARANTINE,
ZONE_ID_ANY, ^(zone_t z) {
- zone_set_exhaustible(z, maxsz);
+ zone_set_exhaustible(z, maxsz / sz);
});
- zfill(fakestack_zones[i], (int)maxsz / sz);
+ zone_fill_initially(fakestack_zones[i], maxsz / sz);
}
/* globally enable */
static _Atomic unsigned active_devs;
static LCK_GRP_DECLARE(ksancov_lck_grp, "ksancov_lck_grp");
-static lck_rw_t *ksancov_devs_lck;
+static LCK_RW_DECLARE(ksancov_devs_lck, &ksancov_lck_grp);
/* array of devices indexed by devnode minor */
static ksancov_dev_t ksancov_devs[KSANCOV_MAX_DEV];
return EBUSY;
}
- lck_rw_lock_exclusive(ksancov_devs_lck);
+ lck_rw_lock_exclusive(&ksancov_devs_lck);
if (ksancov_devs[minor_num]) {
- lck_rw_unlock_exclusive(ksancov_devs_lck);
+ lck_rw_unlock_exclusive(&ksancov_devs_lck);
return EBUSY;
}
ksancov_dev_t d = create_dev(dev);
if (!d) {
- lck_rw_unlock_exclusive(ksancov_devs_lck);
+ lck_rw_unlock_exclusive(&ksancov_devs_lck);
return ENOMEM;
}
ksancov_devs[minor_num] = d;
- lck_rw_unlock_exclusive(ksancov_devs_lck);
+ lck_rw_unlock_exclusive(&ksancov_devs_lck);
return 0;
}
thread_wait(d->thread, TRUE);
}
+ assert(active_devs >= 1);
+ os_atomic_sub(&active_devs, 1, relaxed);
+
/* drop our thread reference */
thread_deallocate(d->thread);
d->thread = THREAD_NULL;
#pragma unused(flags,devtype,p)
const int minor_num = minor(dev);
- lck_rw_lock_exclusive(ksancov_devs_lck);
+ lck_rw_lock_exclusive(&ksancov_devs_lck);
ksancov_dev_t d = ksancov_devs[minor_num];
ksancov_devs[minor_num] = NULL; /* dev no longer discoverable */
- lck_rw_unlock_exclusive(ksancov_devs_lck);
+ lck_rw_unlock_exclusive(&ksancov_devs_lck);
/*
* No need to lock d here as there is and will be no one having its
}
if (d->mode == KS_MODE_TRACE && d->trace) {
- os_atomic_sub(&active_devs, 1, relaxed);
os_atomic_store(&d->trace->enabled, 0, relaxed); /* stop tracing */
} else if (d->mode == KS_MODE_COUNTERS && d->counters) {
- os_atomic_sub(&active_devs, 1, relaxed);
os_atomic_store(&d->counters->enabled, 0, relaxed); /* stop tracing */
}
struct ksancov_buf_desc *mcmd;
void *data = (void *)_data;
- lck_rw_lock_shared(ksancov_devs_lck);
+ lck_rw_lock_shared(&ksancov_devs_lck);
ksancov_dev_t d = ksancov_devs[minor(dev)];
if (!d) {
- lck_rw_unlock_shared(ksancov_devs_lck);
+ lck_rw_unlock_shared(&ksancov_devs_lck);
return EINVAL; /* dev not open */
}
break;
}
- lck_rw_unlock_shared(ksancov_devs_lck);
+ lck_rw_unlock_shared(&ksancov_devs_lck);
return ret;
}
ksancov_edgemap->nedges = (uint32_t)nedges;
ksancov_edgemap->offset = KSANCOV_PC_OFFSET;
- ksancov_devs_lck = lck_rw_alloc_init(&ksancov_lck_grp, LCK_ATTR_NULL);
-
return 0;
}
/*
- * Copyright (c) 2016 Apple Inc. All rights reserved.
+ * Copyright (c) 2016-2020 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
static inline char *
__nosan_strncpy(char *dst, const char *src, size_t sz)
{
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wdeprecated-declarations"
+#endif
return strncpy(dst, src, sz);
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
}
static inline size_t
__nosan_strlcat(char *dst, const char *src, size_t sz)
static inline char *
__nosan_strncat(char *dst, const char *src, size_t sz)
{
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wdeprecated-declarations"
+#endif
return strncat(dst, src, sz);
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
}
static inline size_t
__nosan_strnlen(const char *src, size_t sz)
#include <stdatomic.h>
#include <kern/debug.h>
+#include <kern/assert.h>
#include <libkern/libkern.h>
#include "ubsan.h"
static const uint32_t line_acquired = 0x80000000UL;
static const char *get_type_check_kind(uint8_t kind);
-static size_t
-format_loc(struct san_src_loc *loc, char *dst, size_t sz)
+static void
+ubsan_buf_log(struct ubsan_buf *ub, const char *fmt, ...)
{
- return scnprintf(dst, sz, ", file:\"%s\", line:%d, column:%d },\n",
- loc->filename,
- loc->line & ~line_acquired,
- loc->col
- );
+ va_list ap;
+
+ va_start(ap, fmt);
+ int n = vscnprintf(ub->ub_buf + ub->ub_logged, ub->ub_buf_size - ub->ub_logged, fmt, ap);
+ va_end(ap);
+
+ ub->ub_logged += n;
+ assert(ub->ub_logged <= ub->ub_buf_size);
+}
+
+static void
+ubsan_buf_log_loc(struct ubsan_buf *ub, const char *desc, struct san_src_loc *loc)
+{
+ ubsan_buf_log(ub, "%s:{ file:\"%s\", line:%d, column:%d }",
+ desc,
+ loc->filename,
+ loc->line & ~line_acquired,
+ loc->col);
}
/*
NULL
};
-static size_t
-format_overflow(struct ubsan_violation *v, char *buf, size_t sz)
+static void
+format_overflow(struct ubsan_violation *v, struct ubsan_buf *ub)
{
struct san_type_desc *ty = v->overflow->ty;
- return scnprintf(buf, sz,
- "problem:\"%s overflow\", op:\"%s\", ty:\"%s\", width:%d, lhs:0x%llx, rhs:0x%llx, ",
- ty->issigned ? "signed" : "unsigned",
- overflow_str[v->ubsan_type],
- ty->name,
- 1 << ty->width,
- v->lhs,
- v->rhs
- );
+ ubsan_buf_log(ub,
+ "problem:\"%s overflow\", op:\"%s\", ty:\"%s\", width:%d, lhs:0x%llx, rhs:0x%llx",
+ ty->issigned ? "signed" : "unsigned",
+ overflow_str[v->ubsan_type],
+ ty->name,
+ 1 << ty->width,
+ v->lhs,
+ v->rhs
+ );
}
-static size_t
-format_shift(struct ubsan_violation *v, char *buf, size_t sz)
+static void
+format_shift(struct ubsan_violation *v, struct ubsan_buf *ub)
{
- size_t n = 0;
struct san_type_desc *l = v->shift->lhs_t;
struct san_type_desc *r = v->shift->rhs_t;
- n += scnprintf(buf + n, sz - n, "problem:\"bad shift\", ");
- n += scnprintf(buf + n, sz - n, "lhs:0x%llx, lty:\"%s\", lsigned:%d, lwidth:%d, ", v->lhs, l->name, l->issigned, 1 << l->width);
- n += scnprintf(buf + n, sz - n, "rhs:0x%llx, rty:\"%s\", rsigned:%d, rwidth:%d, ", v->rhs, r->name, r->issigned, 1 << r->width);
-
- return n;
+ ubsan_buf_log(ub, "problem:\"bad shift\", ");
+ ubsan_buf_log(ub, "lhs:0x%llx, lty:\"%s\", lsigned:%d, lwidth:%d, ", v->lhs, l->name, l->issigned, 1 << l->width);
+ ubsan_buf_log(ub, "rhs:0x%llx, rty:\"%s\", rsigned:%d, rwidth:%d", v->rhs, r->name, r->issigned, 1 << r->width);
}
static const char * const
: "some";
}
-static size_t
-format_type_mismatch(struct ubsan_violation *v, char *buf, size_t sz)
+static void
+format_type_mismatch(struct ubsan_violation *v, struct ubsan_buf *ub)
{
- size_t n = 0;
size_t alignment = 1 << v->align->align;
void *ptr = (void*)v->lhs;
- const char * kind = get_type_check_kind(v->align->kind);
+ const char *kind = get_type_check_kind(v->align->kind);
+
if (NULL == ptr) {
//null pointer use
- n += scnprintf(buf + n, sz - n, "problem:\"%s NULL pointer\", ty:\"%s\", ", kind, v->align->ty->name);
+ ubsan_buf_log(ub, "problem:\"%s NULL pointer\", ty:\"%s\"", kind, v->align->ty->name);
} else if (alignment && ((uintptr_t)ptr & (alignment - 1))) {
//misaligned pointer use
- n += scnprintf(buf + n, sz - n, "problem:\"%s mis-aligned\", address:%p, ty:\"%s\", ", kind, (void*)v->lhs, v->align->ty->name);
- n += scnprintf(buf + n, sz - n, "required_alignment:%d, ", 1 << v->align->align);
+ ubsan_buf_log(ub, "problem:\"%s mis-aligned\", address:%p, ty:\"%s\", ",
+ kind, (void*)v->lhs, v->align->ty->name);
+ ubsan_buf_log(ub, "required_alignment:%d", 1 << v->align->align);
} else {
//insufficient object size
- n += scnprintf(buf + n, sz - n, "problem:\"%s insufficient object size\", ty:\"%s\", address:%p, ",
+ ubsan_buf_log(ub, "problem:\"%s insufficient object size\", ty:\"%s\", address:%p",
kind, v->align->ty->name, ptr);
}
-
- return n;
}
-static size_t
-format_oob(struct ubsan_violation *v, char *buf, size_t sz)
+static void
+format_oob(struct ubsan_violation *v, struct ubsan_buf *ub)
{
- size_t n = 0;
struct san_type_desc *aty = v->oob->array_ty;
struct san_type_desc *ity = v->oob->index_ty;
uintptr_t idx = v->lhs;
- n += scnprintf(buf + n, sz - n, "problem:\"OOB array access\", ");
- n += scnprintf(buf + n, sz - n, "idx:%ld, ", idx);
- n += scnprintf(buf + n, sz - n, "aty:\"%s\", asigned:%d, awidth:%d, ", aty->name, aty->issigned, 1 << aty->width);
- n += scnprintf(buf + n, sz - n, "ity:\"%s\", isigned:%d, iwidth:%d, ", ity->name, ity->issigned, 1 << ity->width);
+ ubsan_buf_log(ub, "problem:\"OOB array access\", ");
+ ubsan_buf_log(ub, "idx:%ld, ", idx);
+ ubsan_buf_log(ub, "aty:\"%s\", asigned:%d, awidth:%d, ", aty->name, aty->issigned, 1 << aty->width);
+ ubsan_buf_log(ub, "ity:\"%s\", isigned:%d, iwidth:%d", ity->name, ity->issigned, 1 << ity->width);
+}
- return n;
+static void
+format_nullability_arg(struct ubsan_violation *v, struct ubsan_buf *ub)
+{
+ struct ubsan_nullability_arg_desc *data = v->nonnull_arg;
+
+ const int arg_index = data->arg_index;
+ const char *attr_type = v->lhs ? "nonnull attribute" : "_Nonnull annotation";
+
+ ubsan_buf_log(ub, "problem:\"null in argument %d declared with %s\", ", arg_index, attr_type);
+ ubsan_buf_log_loc(ub, "declared", &data->attr_loc);
}
-static size_t
-format_load_invalid_value(struct ubsan_violation *v, char *buf, size_t sz)
+static void
+format_nonnull_return(struct ubsan_violation *v, struct ubsan_buf *ub)
{
- return scnprintf(buf, sz, "problem:\"invalid value load\", type:\"%s\", value:0x%llx",
- v->invalid->type->name, v->lhs);
+ struct san_src_loc *declaration = (struct san_src_loc *)v->rhs;
+ const char *return_type = v->lhs ? "returns_nonnull attribute" : "_Nonnull return type annotation";
+
+ ubsan_buf_log(ub, "problem:\"null returned from function declared with %s\", ", return_type);
+ ubsan_buf_log_loc(ub, "declared", declaration);
}
-size_t
-ubsan_format(struct ubsan_violation *v, char *buf, size_t sz)
+static void
+format_load_invalid_value(struct ubsan_violation *v, struct ubsan_buf *ub)
{
- size_t n = scnprintf(buf, sz, "{ ");
+ ubsan_buf_log(ub, "problem:\"invalid value load\", type:\"%s\", value:0x%llx",
+ v->invalid->type->name, v->lhs);
+}
+
+static void
+format_missing_return(struct ubsan_violation *v __unused, struct ubsan_buf *ub)
+{
+ ubsan_buf_log(ub, "problem:\"no value returned from value-returning function\"");
+}
+
+static void
+format_float_cast_overflow(struct ubsan_violation *v, struct ubsan_buf *ub)
+{
+ struct ubsan_float_desc *data = v->flt;
+ /*
+ * Cannot print out offending value (e.g. using %A, %f and so on) as kernel logging
+ * does not support float types (yet).
+ */
+ ubsan_buf_log(ub, "problem:\"%s type value outside the range of %s\"",
+ data->type_from->name, data->type_to->name);
+}
+
+static const char *
+get_implicit_conv_type(unsigned char kind)
+{
+ static const char * const conv_types[] = {
+ "integer truncation",
+ "unsigned integer truncation",
+ "signed integer truncation",
+ "integer sign change",
+ "signed integer truncation or sign change"
+ };
+ static const size_t conv_types_cnt = sizeof(conv_types) / sizeof(conv_types[0]);
+
+ return kind < conv_types_cnt ? conv_types[kind] : "unknown implicit integer conversion";
+}
+
+static void
+format_implicit_conversion(struct ubsan_violation *v, struct ubsan_buf *ub)
+{
+ struct ubsan_implicit_conv_desc *data = v->implicit;
+ struct san_type_desc *from = data->type_from;
+ struct san_type_desc *to = data->type_to;
+
+ ubsan_buf_log(ub, "problem:\"%s\", ", get_implicit_conv_type(data->kind));
+ ubsan_buf_log(ub, "src value:%#llx type:\"%s\", signed:%d, width:%d, ",
+ v->lhs, from->name, from->issigned, 1 << from->width);
+ ubsan_buf_log(ub, "dst value:%#llx type:\"%s\", signed:%d, width:%d",
+ v->rhs, to->name, to->issigned, 1 << to->width);
+}
+
+static void
+format_function_type_mismatch(struct ubsan_violation *v, struct ubsan_buf *ub)
+{
+ struct ubsan_func_type_mismatch_desc *data = v->func_mismatch;
+ ubsan_buf_log(ub, "problem:\"indirect function call through %p of a wrong type %s\"",
+ (void *)v->lhs, data->type->name);
+}
+
+static void
+format_vla_bound_not_positive(struct ubsan_violation *v, struct ubsan_buf *ub)
+{
+ struct ubsan_vla_bound_desc *data = v->vla_bound;
+ ubsan_buf_log(ub, "problem:\"VLA %s bound %#llx not positive\"", data->type->name, v->lhs);
+}
+
+static void
+format_invalid_builtin(struct ubsan_violation *v, struct ubsan_buf *ub)
+{
+ ubsan_buf_log(ub, "problem:\"passing invalid zero argument to %s\"",
+ v->invalid_builtin->kind == 0 ? "ctz()" : "clz()");
+}
+
+void
+ubsan_format(struct ubsan_violation *v, struct ubsan_buf *ub)
+{
+ ubsan_buf_log(ub, "{ ");
switch (v->ubsan_type) {
case UBSAN_OVERFLOW_add ... UBSAN_OVERFLOW_negate:
- n += format_overflow(v, buf + n, sz - n);
+ format_overflow(v, ub);
break;
case UBSAN_UNREACHABLE:
- n += scnprintf(buf + n, sz - n, "problem:\"unreachable\", ");
+ ubsan_buf_log(ub, "problem:\"unreachable\", ");
break;
case UBSAN_SHIFT:
- n += format_shift(v, buf + n, sz - n);
+ format_shift(v, ub);
break;
case UBSAN_TYPE_MISMATCH:
- n += format_type_mismatch(v, buf + n, sz - n);
+ format_type_mismatch(v, ub);
break;
case UBSAN_POINTER_OVERFLOW:
- n += scnprintf(buf + n, sz - n, "problem:\"pointer overflow\", before:0x%llx, after:0x%llx, ", v->lhs, v->rhs);
+ ubsan_buf_log(ub, "problem:\"pointer overflow\", before:0x%llx, after:0x%llx", v->lhs, v->rhs);
break;
case UBSAN_OOB:
- n += format_oob(v, buf + n, sz - n);
+ format_oob(v, ub);
break;
- case UBSAN_LOAD_INVALID_VALUE:
- n += format_load_invalid_value(v, buf + n, sz - n);
+ case UBSAN_NULLABILITY_ARG:
+ format_nullability_arg(v, ub);
+ break;
+ case UBSAN_NULLABILITY_RETURN:
+ format_nonnull_return(v, ub);
+ break;
+ case UBSAN_MISSING_RETURN:
+ format_missing_return(v, ub);
+ break;
+ case UBSAN_FLOAT_CAST_OVERFLOW:
+ format_float_cast_overflow(v, ub);
+ break;
+ case UBSAN_IMPLICIT_CONVERSION:
+ format_implicit_conversion(v, ub);
break;
- case UBSAN_GENERIC:
- n += scnprintf(buf + n, sz - n, "problem:\"generic\", function:\"%s\", ", v->func);
+ case UBSAN_FUNCTION_TYPE_MISMATCH:
+ format_function_type_mismatch(v, ub);
+ break;
+ case UBSAN_VLA_BOUND_NOT_POSITIVE:
+ format_vla_bound_not_positive(v, ub);
+ break;
+ case UBSAN_INVALID_BUILTIN:
+ format_invalid_builtin(v, ub);
+ break;
+ case UBSAN_LOAD_INVALID_VALUE:
+ format_load_invalid_value(v, ub);
break;
default:
panic("unknown violation");
}
- n += format_loc(v->loc, buf + n, sz - n);
-
- return n;
+ ubsan_buf_log_loc(ub, ", found", v->loc);
+ ubsan_buf_log(ub, " },\n");
}
enum UBFatality { Fatal, FleshWound };
ubsan_log_append(v);
if (ubsan_print || (fatality == Fatal)) {
- const size_t sz = 256;
- static char buf[sz];
- buf[0] = '\0';
- ubsan_format(v, buf, sz);
+ static char buf[256] = { 0 };
+ struct ubsan_buf ubsan_buf = {
+ .ub_logged = 0,
+ .ub_buf_size = sizeof(buf),
+ .ub_buf = buf
+ };
+ ubsan_format(v, &ubsan_buf);
printf("UBSan: %s", buf);
}
}
ubsan_handle(&v, Fatal);
}
+void
+__ubsan_handle_nullability_arg(struct ubsan_nullability_arg_desc *desc)
+{
+ struct ubsan_violation v = { UBSAN_NULLABILITY_ARG, 0, 0, .nonnull_arg = desc, &desc->loc };
+ ubsan_handle(&v, FleshWound);
+}
+
+void
+__ubsan_handle_nullability_arg_abort(struct ubsan_nullability_arg_desc *desc)
+{
+ struct ubsan_violation v = { UBSAN_NULLABILITY_ARG, 0, 0, .nonnull_arg = desc, &desc->loc };
+ ubsan_handle(&v, Fatal);
+}
+
+void
+__ubsan_handle_nonnull_arg(struct ubsan_nullability_arg_desc *desc)
+{
+ struct ubsan_violation v = { UBSAN_NULLABILITY_ARG, 1, 0, .nonnull_arg = desc, &desc->loc };
+ ubsan_handle(&v, FleshWound);
+}
+
+void
+__ubsan_handle_nonnull_arg_abort(struct ubsan_nullability_arg_desc *desc)
+{
+ struct ubsan_violation v = { UBSAN_NULLABILITY_ARG, 1, 0, .nonnull_arg = desc, &desc->loc };
+ ubsan_handle(&v, Fatal);
+}
+
+void
+__ubsan_handle_nullability_return_v1(struct ubsan_nullability_ret_desc *desc, uint64_t declaration)
+{
+ struct ubsan_violation v = { UBSAN_NULLABILITY_RETURN, 0, (uint64_t)&desc->loc, .nonnull_ret = desc, (struct san_src_loc *)declaration };
+ ubsan_handle(&v, FleshWound);
+}
+
+void
+__ubsan_handle_nullability_return_v1_abort(struct ubsan_nullability_ret_desc *desc, uint64_t declaration)
+{
+ struct ubsan_violation v = { UBSAN_NULLABILITY_RETURN, 0, (uint64_t)&desc->loc, .nonnull_ret = desc, (struct san_src_loc *)declaration };
+ ubsan_handle(&v, Fatal);
+}
+
+void
+__ubsan_handle_nonnull_return_v1(struct ubsan_nullability_ret_desc *desc, uint64_t declaration)
+{
+ struct ubsan_violation v = { UBSAN_NULLABILITY_RETURN, 1, (uint64_t)&desc->loc, .nonnull_ret = desc, (struct san_src_loc *)declaration };
+ ubsan_handle(&v, FleshWound);
+}
+
+void
+__ubsan_handle_nonnull_return_v1_abort(struct ubsan_nullability_ret_desc *desc, uint64_t declaration)
+{
+ struct ubsan_violation v = { UBSAN_NULLABILITY_RETURN, 1, (uint64_t)&desc->loc, .nonnull_ret = desc, (struct san_src_loc *)declaration };
+ ubsan_handle(&v, Fatal);
+}
+
+void
+__ubsan_handle_missing_return(struct ubsan_missing_ret_desc *desc)
+{
+ struct ubsan_violation v = { UBSAN_MISSING_RETURN, 0, 0, .missing_ret = desc, &desc->loc };
+ ubsan_handle(&v, Fatal);
+}
+
+void
+__ubsan_handle_missing_return_abort(struct ubsan_missing_ret_desc *desc)
+{
+ struct ubsan_violation v = { UBSAN_MISSING_RETURN, 0, 0, .missing_ret = desc, &desc->loc };
+ ubsan_handle(&v, Fatal);
+}
+
+void
+__ubsan_handle_float_cast_overflow(struct ubsan_float_desc *desc, uint64_t value)
+{
+ struct ubsan_violation v = { UBSAN_FLOAT_CAST_OVERFLOW, value, 0, .flt = desc, &desc->loc };
+ ubsan_handle(&v, Fatal);
+}
+
+void
+__ubsan_handle_float_cast_overflow_abort(struct ubsan_float_desc *desc, uint64_t value)
+{
+ struct ubsan_violation v = { UBSAN_FLOAT_CAST_OVERFLOW, value, 0, .flt = desc, &desc->loc };
+ ubsan_handle(&v, Fatal);
+}
+
+void
+__ubsan_handle_implicit_conversion(struct ubsan_implicit_conv_desc *desc, uint64_t from, uint64_t to)
+{
+ struct ubsan_violation v = { UBSAN_IMPLICIT_CONVERSION, from, to, .implicit = desc, &desc->loc };
+ ubsan_handle(&v, Fatal);
+}
+
+void
+__ubsan_handle_implicit_conversion_abort(struct ubsan_implicit_conv_desc *desc, uint64_t from, uint64_t to)
+{
+ struct ubsan_violation v = { UBSAN_IMPLICIT_CONVERSION, from, to, .implicit = desc, &desc->loc };
+ ubsan_handle(&v, Fatal);
+}
+
+void
+__ubsan_handle_function_type_mismatch(struct ubsan_func_type_mismatch_desc *desc, uint64_t func)
+{
+ struct ubsan_violation v = { UBSAN_FUNCTION_TYPE_MISMATCH, func, 0, .func_mismatch = desc, &desc->loc };
+ ubsan_handle(&v, Fatal);
+}
+
+void
+__ubsan_handle_function_type_mismatch_abort(struct ubsan_func_type_mismatch_desc *desc, uint64_t func)
+{
+ struct ubsan_violation v = { UBSAN_FUNCTION_TYPE_MISMATCH, func, 0, .func_mismatch = desc, &desc->loc };
+ ubsan_handle(&v, Fatal);
+}
+
+void
+__ubsan_handle_vla_bound_not_positive(struct ubsan_vla_bound_desc *desc, uint64_t length)
+{
+ struct ubsan_violation v = { UBSAN_VLA_BOUND_NOT_POSITIVE, length, 0, .vla_bound = desc, &desc->loc };
+ ubsan_handle(&v, Fatal);
+}
+
+void
+__ubsan_handle_vla_bound_not_positive_abort(struct ubsan_vla_bound_desc *desc, uint64_t length)
+{
+ struct ubsan_violation v = { UBSAN_VLA_BOUND_NOT_POSITIVE, length, 0, .vla_bound = desc, &desc->loc };
+ ubsan_handle(&v, Fatal);
+}
+
+void
+__ubsan_handle_invalid_builtin(struct ubsan_invalid_builtin *desc)
+{
+ struct ubsan_violation v = { UBSAN_INVALID_BUILTIN, 0, 0, .invalid_builtin = desc, &desc->loc };
+ ubsan_handle(&v, Fatal);
+}
+
+void
+__ubsan_handle_invalid_builtin_abort(struct ubsan_invalid_builtin *desc)
+{
+ struct ubsan_violation v = { UBSAN_INVALID_BUILTIN, 0, 0, .invalid_builtin = desc, &desc->loc };
+ ubsan_handle(&v, Fatal);
+}
+
void
__ubsan_handle_load_invalid_value(struct ubsan_load_invalid_desc *desc, uint64_t invalid_value)
{
struct ubsan_violation v = { UBSAN_LOAD_INVALID_VALUE, invalid_value, 0, .invalid = desc, &desc->loc };
ubsan_handle(&v, Fatal);
}
-
-#define DEFINE_GENERIC(check) \
- void __ubsan_handle_##check (struct san_src_loc* loc) \
- { \
- struct ubsan_violation v = { UBSAN_GENERIC, 0, 0, .func = __func__, loc }; \
- ubsan_handle(&v, FleshWound); \
- } \
- void __ubsan_handle_##check##_abort(struct san_src_loc* loc) \
- { \
- struct ubsan_violation v = { UBSAN_GENERIC, 0, 0, .func = __func__, loc }; \
- ubsan_handle(&v, Fatal); \
- }
-
-DEFINE_GENERIC(invalid_builtin)
-DEFINE_GENERIC(nonnull_arg)
-DEFINE_GENERIC(vla_bound_not_positive)
-DEFINE_GENERIC(float_cast_overflow)
-DEFINE_GENERIC(function_type_mismatch)
-DEFINE_GENERIC(missing_return)
-DEFINE_GENERIC(nonnull_return)
-DEFINE_GENERIC(nullability_arg)
-DEFINE_GENERIC(nullability_return)
-DEFINE_GENERIC(implicit_conversion)
struct san_type_desc *type;
};
+struct ubsan_nullability_arg_desc {
+ struct san_src_loc loc;
+ struct san_src_loc attr_loc;
+ int arg_index;
+};
+
+struct ubsan_nullability_ret_desc {
+ struct san_src_loc loc;
+};
+
+struct ubsan_missing_ret_desc {
+ struct san_src_loc loc;
+};
+
+struct ubsan_float_desc {
+ struct san_src_loc loc;
+ struct san_type_desc *type_from;
+ struct san_type_desc *type_to;
+};
+
+struct ubsan_implicit_conv_desc {
+ struct san_src_loc loc;
+ struct san_type_desc *type_from;
+ struct san_type_desc *type_to;
+ unsigned char kind;
+};
+
+struct ubsan_func_type_mismatch_desc {
+ struct san_src_loc loc;
+ struct san_type_desc *type;
+};
+
+struct ubsan_vla_bound_desc {
+ struct san_src_loc loc;
+ struct san_type_desc *type;
+};
+
+struct ubsan_invalid_builtin {
+ struct san_src_loc loc;
+ unsigned char kind;
+};
+
enum {
UBSAN_OVERFLOW_add = 1,
UBSAN_OVERFLOW_sub,
UBSAN_ALIGN,
UBSAN_POINTER_OVERFLOW,
UBSAN_OOB,
- UBSAN_GENERIC,
UBSAN_TYPE_MISMATCH,
UBSAN_LOAD_INVALID_VALUE,
- UBSAN_VIOLATION_MAX,
+ UBSAN_NULLABILITY_ARG,
+ UBSAN_NULLABILITY_RETURN,
+ UBSAN_MISSING_RETURN,
+ UBSAN_FLOAT_CAST_OVERFLOW,
+ UBSAN_IMPLICIT_CONVERSION,
+ UBSAN_FUNCTION_TYPE_MISMATCH,
+ UBSAN_VLA_BOUND_NOT_POSITIVE,
+ UBSAN_INVALID_BUILTIN,
+ UBSAN_VIOLATION_MAX
};
struct ubsan_violation {
struct ubsan_ptroverflow_desc *ptroverflow;
struct ubsan_oob_desc *oob;
struct ubsan_load_invalid_desc *invalid;
+ struct ubsan_nullability_arg_desc *nonnull_arg;
+ struct ubsan_nullability_ret_desc *nonnull_ret;
+ struct ubsan_missing_ret_desc *missing_ret;
+ struct ubsan_float_desc *flt;
+ struct ubsan_implicit_conv_desc *implicit;
+ struct ubsan_func_type_mismatch_desc *func_mismatch;
+ struct ubsan_vla_bound_desc *vla_bound;
+ struct ubsan_invalid_builtin *invalid_builtin;
const char *func;
};
struct san_src_loc *loc;
};
+struct ubsan_buf {
+ size_t ub_logged;
+ size_t ub_buf_size;
+ char *ub_buf;
+};
+
void ubsan_log_append(struct ubsan_violation *);
-size_t ubsan_format(struct ubsan_violation *, char *buf, size_t sz);
+void ubsan_format(struct ubsan_violation *, struct ubsan_buf *);
/*
* UBSan ABI
void __ubsan_handle_builtin_unreachable(struct ubsan_unreachable_desc *);
void __ubsan_handle_divrem_overflow(struct ubsan_overflow_desc *, uint64_t lhs, uint64_t rhs);
void __ubsan_handle_divrem_overflow_abort(struct ubsan_overflow_desc *, uint64_t lhs, uint64_t rhs);
+void __ubsan_handle_float_cast_overflow(struct ubsan_float_desc *, uint64_t);
+void __ubsan_handle_float_cast_overflow_abort(struct ubsan_float_desc *, uint64_t);
+void __ubsan_handle_function_type_mismatch(struct ubsan_func_type_mismatch_desc*, uint64_t);
+void __ubsan_handle_function_type_mismatch_abort(struct ubsan_func_type_mismatch_desc *, uint64_t);
+void __ubsan_handle_implicit_conversion(struct ubsan_implicit_conv_desc *, uint64_t, uint64_t);
+void __ubsan_handle_implicit_conversion_abort(struct ubsan_implicit_conv_desc *, uint64_t, uint64_t);
+void __ubsan_handle_invalid_builtin(struct ubsan_invalid_builtin *);
+void __ubsan_handle_invalid_builtin_abort(struct ubsan_invalid_builtin *);
+void __ubsan_handle_load_invalid_value(struct ubsan_load_invalid_desc *, uint64_t);
+void __ubsan_handle_load_invalid_value_abort(struct ubsan_load_invalid_desc *, uint64_t);
+void __ubsan_handle_missing_return(struct ubsan_missing_ret_desc *);
+void __ubsan_handle_missing_return_abort(struct ubsan_missing_ret_desc *);
void __ubsan_handle_mul_overflow(struct ubsan_overflow_desc *, uint64_t lhs, uint64_t rhs);
void __ubsan_handle_mul_overflow_abort(struct ubsan_overflow_desc *, uint64_t lhs, uint64_t rhs);
void __ubsan_handle_negate_overflow(struct ubsan_overflow_desc *, uint64_t lhs, uint64_t rhs);
void __ubsan_handle_negate_overflow_abort(struct ubsan_overflow_desc *, uint64_t lhs, uint64_t rhs);
+void __ubsan_handle_nonnull_arg(struct ubsan_nullability_arg_desc *);
+void __ubsan_handle_nonnull_arg_abort(struct ubsan_nullability_arg_desc *);
+void __ubsan_handle_nonnull_return_v1(struct ubsan_nullability_ret_desc *, uint64_t);
+void __ubsan_handle_nonnull_return_v1_abort(struct ubsan_nullability_ret_desc *, uint64_t);
+void __ubsan_handle_nullability_arg(struct ubsan_nullability_arg_desc *);
+void __ubsan_handle_nullability_arg_abort(struct ubsan_nullability_arg_desc *);
+void __ubsan_handle_nullability_return_v1(struct ubsan_nullability_ret_desc *, uint64_t);
+void __ubsan_handle_nullability_return_v1_abort(struct ubsan_nullability_ret_desc *, uint64_t);
void __ubsan_handle_out_of_bounds(struct ubsan_oob_desc *, uint64_t idx);
void __ubsan_handle_out_of_bounds_abort(struct ubsan_oob_desc *, uint64_t idx);
void __ubsan_handle_pointer_overflow(struct ubsan_ptroverflow_desc *, uint64_t lhs, uint64_t rhs);
void __ubsan_handle_sub_overflow_abort(struct ubsan_overflow_desc *, uint64_t lhs, uint64_t rhs);
void __ubsan_handle_type_mismatch_v1(struct ubsan_align_desc *, uint64_t val);
void __ubsan_handle_type_mismatch_v1_abort(struct ubsan_align_desc *, uint64_t val);
-void __ubsan_handle_load_invalid_value(struct ubsan_load_invalid_desc *, uint64_t);
-void __ubsan_handle_load_invalid_value_abort(struct ubsan_load_invalid_desc *, uint64_t);
-
-/* currently unimplemented */
-void __ubsan_handle_float_cast_overflow(struct san_src_loc *);
-void __ubsan_handle_float_cast_overflow_abort(struct san_src_loc *);
-void __ubsan_handle_function_type_mismatch(struct san_src_loc *);
-void __ubsan_handle_function_type_mismatch_abort(struct san_src_loc *);
-void __ubsan_handle_implicit_conversion(struct san_src_loc *);
-void __ubsan_handle_implicit_conversion_abort(struct san_src_loc *);
-void __ubsan_handle_invalid_builtin(struct san_src_loc *);
-void __ubsan_handle_invalid_builtin_abort(struct san_src_loc *);
-void __ubsan_handle_missing_return(struct san_src_loc *);
-void __ubsan_handle_missing_return_abort(struct san_src_loc *);
-void __ubsan_handle_nonnull_arg(struct san_src_loc *);
-void __ubsan_handle_nonnull_arg_abort(struct san_src_loc *);
-void __ubsan_handle_nonnull_return(struct san_src_loc *);
-void __ubsan_handle_nonnull_return_abort(struct san_src_loc *);
-void __ubsan_handle_nullability_arg(struct san_src_loc *);
-void __ubsan_handle_nullability_arg_abort(struct san_src_loc *);
-void __ubsan_handle_nullability_return(struct san_src_loc *);
-void __ubsan_handle_nullability_return_abort(struct san_src_loc *);
-void __ubsan_handle_vla_bound_not_positive(struct san_src_loc *);
-void __ubsan_handle_vla_bound_not_positive_abort(struct san_src_loc *);
+void __ubsan_handle_vla_bound_not_positive(struct ubsan_vla_bound_desc *, uint64_t);
+void __ubsan_handle_vla_bound_not_positive_abort(struct ubsan_vla_bound_desc *, uint64_t);
#endif /* _UBSAN_H_ */
os_atomic_thread_fence(seq_cst);
tail = os_atomic_load(&ubsan_log_tail, relaxed);
- char *buf;
- size_t n = 0;
- int err;
-
if (tail == head) {
return 0; /* log is empty */
}
- buf = kheap_alloc(KHEAP_TEMP, sz, Z_WAITOK | Z_ZERO);
+ char *buf = kheap_alloc(KHEAP_TEMP, sz, Z_WAITOK | Z_ZERO);
if (!buf) {
return 0;
}
+ struct ubsan_buf ubsan_buf = {
+ .ub_logged = 0,
+ .ub_buf_size = sz,
+ .ub_buf = buf
+ };
+
for (size_t i = tail; i != head; i = next_entry(i)) {
- n += ubsan_format(&ubsan_log[i], buf + n, sz - n);
+ ubsan_format(&ubsan_log[i], &ubsan_buf);
}
- err = SYSCTL_OUT(req, buf, n);
+ int err = SYSCTL_OUT(req, buf, ubsan_buf.ub_logged);
kheap_free(KHEAP_TEMP, buf, sz);
return err;
#if CONFIG_MACF
SYSCTL_NODE(, OID_AUTO, security, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
"Security Controls");
-SYSCTL_NODE(_security, OID_AUTO, mac, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
+SYSCTL_EXTENSIBLE_NODE(_security, OID_AUTO, mac, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
"TrustedBSD MAC policy controls");
/*
* For a few special operations involving a change to the list of
* active policies, the mtx itself must be held.
*/
-static lck_mtx_t *mac_policy_mtx;
+static LCK_GRP_DECLARE(mac_lck_grp, "MAC lock");
+static LCK_MTX_DECLARE(mac_policy_mtx, &mac_lck_grp);
/*
* Policy list array allocation chunk size. Each entry holds a pointer.
static __inline void
mac_policy_grab_exclusive(void)
{
- lck_mtx_lock(mac_policy_mtx);
+ lck_mtx_lock(&mac_policy_mtx);
while (mac_policy_busy != 0) {
- lck_mtx_sleep(mac_policy_mtx, LCK_SLEEP_UNLOCK,
+ lck_mtx_sleep(&mac_policy_mtx, LCK_SLEEP_UNLOCK,
(event_t)&mac_policy_busy, THREAD_UNINT);
- lck_mtx_lock(mac_policy_mtx);
+ lck_mtx_lock(&mac_policy_mtx);
}
}
{
KASSERT(mac_policy_busy == 0,
("mac_policy_release_exclusive(): not exclusive"));
- lck_mtx_unlock(mac_policy_mtx);
+ lck_mtx_unlock(&mac_policy_mtx);
thread_wakeup((event_t) &mac_policy_busy);
}
void
mac_policy_list_busy(void)
{
- lck_mtx_lock(mac_policy_mtx);
+ lck_mtx_lock(&mac_policy_mtx);
mac_policy_busy++;
- lck_mtx_unlock(mac_policy_mtx);
+ lck_mtx_unlock(&mac_policy_mtx);
}
int
return 0;
}
- lck_mtx_lock(mac_policy_mtx);
+ lck_mtx_lock(&mac_policy_mtx);
if (mac_policy_list.numloaded > mac_policy_list.staticmax) {
mac_policy_busy++;
ret = 1;
} else {
ret = 0;
}
- lck_mtx_unlock(mac_policy_mtx);
+ lck_mtx_unlock(&mac_policy_mtx);
return ret;
}
void
mac_policy_list_unbusy(void)
{
- lck_mtx_lock(mac_policy_mtx);
+ lck_mtx_lock(&mac_policy_mtx);
mac_policy_busy--;
KASSERT(mac_policy_busy >= 0, ("MAC_POLICY_LIST_LOCK"));
if (mac_policy_busy == 0) {
thread_wakeup(&mac_policy_busy);
}
- lck_mtx_unlock(mac_policy_mtx);
+ lck_mtx_unlock(&mac_policy_mtx);
}
/*
void
mac_policy_init(void)
{
- lck_grp_attr_t *mac_lck_grp_attr;
- lck_attr_t *mac_lck_attr;
- lck_grp_t *mac_lck_grp;
-
mac_policy_list.numloaded = 0;
mac_policy_list.max = MAC_POLICY_LIST_CHUNKSIZE;
mac_policy_list.maxindex = 0;
LIST_INIT(&mac_label_element_list);
LIST_INIT(&mac_static_label_element_list);
-
- mac_lck_grp_attr = lck_grp_attr_alloc_init();
- mac_lck_grp = lck_grp_alloc_init("MAC lock", mac_lck_grp_attr);
- mac_lck_attr = lck_attr_alloc_init();
- lck_attr_setdefault(mac_lck_attr);
- mac_policy_mtx = lck_mtx_alloc_init(mac_lck_grp, mac_lck_attr);
- lck_attr_free(mac_lck_attr);
- lck_grp_attr_free(mac_lck_grp_attr);
- lck_grp_free(mac_lck_grp);
}
/* Function pointer set up for loading security extensions.
struct vnode_attr;
struct vop_setlabel_args;
+#include <stdbool.h>
#include <sys/kauth.h>
#include <sys/kernel_types.h>
/*@ === */
int mac_audit_check_postselect(kauth_cred_t cred, unsigned short syscode,
- void *args, int error, int retval, int mac_forced);
+ void *args, int error, int retval, int mac_forced) __result_use_check;
int mac_audit_check_preselect(kauth_cred_t cred, unsigned short syscode,
- void *args);
+ void *args) __result_use_check;
int mac_cred_check_label_update(kauth_cred_t cred,
- struct label *newlabel);
+ struct label *newlabel) __result_use_check;
int mac_cred_check_label_update_execve(vfs_context_t ctx,
struct vnode *vp, off_t offset, struct vnode *scriptvp,
struct label *scriptvnodelabel, struct label *execlabel,
- proc_t proc, void *macextensions);
-int mac_cred_check_visible(kauth_cred_t u1, kauth_cred_t u2);
+ proc_t proc, void *macextensions) __result_use_check;
+int mac_cred_check_visible(kauth_cred_t u1, kauth_cred_t u2) __result_use_check;
struct label *mac_cred_label_alloc(void);
void mac_cred_label_associate(kauth_cred_t cred_parent,
kauth_cred_t cred_child);
void mac_cred_label_associate_kernel(kauth_cred_t cred);
void mac_cred_label_associate_user(kauth_cred_t cred);
void mac_cred_label_destroy(kauth_cred_t cred);
-int mac_cred_label_externalize_audit(proc_t p, struct mac *mac);
+int mac_cred_label_externalize_audit(proc_t p, struct mac *mac) __result_use_check;
void mac_cred_label_free(struct label *label);
void mac_cred_label_init(kauth_cred_t cred);
-int mac_cred_label_compare(struct label *a, struct label *b);
+bool mac_cred_label_is_equal(const struct label *a, const struct label *b) __result_use_check;
+uint32_t mac_cred_label_hash_update(const struct label *a, uint32_t hash);
void mac_cred_label_update(kauth_cred_t cred, struct label *newlabel);
void mac_cred_label_update_execve(vfs_context_t ctx, kauth_cred_t newcred,
struct vnode *vp, off_t offset, struct vnode *scriptvp,
void mac_devfs_label_init(struct devnode *de);
void mac_devfs_label_update(struct mount *mp, struct devnode *de,
struct vnode *vp);
-int mac_execve_enter(user_addr_t mac_p, struct image_params *imgp);
-int mac_file_check_change_offset(kauth_cred_t cred, struct fileglob *fg);
-int mac_file_check_create(kauth_cred_t cred);
-int mac_file_check_dup(kauth_cred_t cred, struct fileglob *fg, int newfd);
+int mac_execve_enter(user_addr_t mac_p, struct image_params *imgp) __result_use_check;
+int mac_file_check_change_offset(kauth_cred_t cred, struct fileglob *fg) __result_use_check;
+int mac_file_check_create(kauth_cred_t cred) __result_use_check;
+int mac_file_check_dup(kauth_cred_t cred, struct fileglob *fg, int newfd) __result_use_check;
int mac_file_check_fcntl(kauth_cred_t cred, struct fileglob *fg, int cmd,
- user_long_t arg);
+ user_long_t arg) __result_use_check;
int mac_file_check_get(kauth_cred_t cred, struct fileglob *fg,
- char *elements, size_t len);
-int mac_file_check_get_offset(kauth_cred_t cred, struct fileglob *fg);
-int mac_file_check_inherit(kauth_cred_t cred, struct fileglob *fg);
+ char *elements, size_t len) __result_use_check;
+int mac_file_check_get_offset(kauth_cred_t cred, struct fileglob *fg) __result_use_check;
+int mac_file_check_inherit(kauth_cred_t cred, struct fileglob *fg) __result_use_check;
int mac_file_check_ioctl(kauth_cred_t cred, struct fileglob *fg,
- unsigned long cmd);
+ unsigned long cmd) __result_use_check;
int mac_file_check_lock(kauth_cred_t cred, struct fileglob *fg, int op,
- struct flock *fl);
+ struct flock *fl) __result_use_check;
int mac_file_check_library_validation(struct proc *proc,
struct fileglob *fg, off_t slice_offset,
- user_long_t error_message, size_t error_message_size);
+ user_long_t error_message, size_t error_message_size) __result_use_check;
int mac_file_check_mmap(kauth_cred_t cred, struct fileglob *fg,
- int prot, int flags, uint64_t file_pos, int *maxprot);
+ int prot, int flags, uint64_t file_pos, int *maxprot) __result_use_check;
void mac_file_check_mmap_downgrade(kauth_cred_t cred, struct fileglob *fg,
int *prot);
-int mac_file_check_receive(kauth_cred_t cred, struct fileglob *fg);
+int mac_file_check_receive(kauth_cred_t cred, struct fileglob *fg) __result_use_check;
int mac_file_check_set(kauth_cred_t cred, struct fileglob *fg,
- char *bufp, size_t buflen);
+ char *bufp, size_t buflen) __result_use_check;
void mac_file_notify_close(struct ucred *cred, struct fileglob *fg);
void mac_file_label_associate(kauth_cred_t cred, struct fileglob *fg);
void mac_file_label_destroy(struct fileglob *fg);
void mac_file_label_init(struct fileglob *fg);
-int mac_iokit_check_open(kauth_cred_t cred, io_object_t user_client, unsigned int user_client_type);
-int mac_iokit_check_set_properties(kauth_cred_t cred, io_object_t registry_entry, io_object_t properties);
-int mac_iokit_check_filter_properties(kauth_cred_t cred, io_object_t registry_entry);
-int mac_iokit_check_get_property(kauth_cred_t cred, io_object_t registry_entry, const char *name);
+int mac_iokit_check_open_service(kauth_cred_t cred, io_object_t service, unsigned int user_client_type) __result_use_check;
+int mac_iokit_check_open(kauth_cred_t cred, io_object_t user_client, unsigned int user_client_type) __result_use_check;
+int mac_iokit_check_set_properties(kauth_cred_t cred, io_object_t registry_entry, io_object_t properties) __result_use_check;
+int mac_iokit_check_filter_properties(kauth_cred_t cred, io_object_t registry_entry) __result_use_check;
+int mac_iokit_check_get_property(kauth_cred_t cred, io_object_t registry_entry, const char *name) __result_use_check;
#ifdef KERNEL_PRIVATE
-int mac_iokit_check_hid_control(kauth_cred_t cred);
+int mac_iokit_check_hid_control(kauth_cred_t cred) __result_use_check;
#endif
int mac_mount_check_fsctl(vfs_context_t ctx, struct mount *mp,
- unsigned long cmd);
+ unsigned long cmd) __result_use_check;
int mac_mount_check_getattr(vfs_context_t ctx, struct mount *mp,
- struct vfs_attr *vfa);
-int mac_mount_check_label_update(vfs_context_t ctx, struct mount *mp);
+ struct vfs_attr *vfa) __result_use_check;
+int mac_mount_check_label_update(vfs_context_t ctx, struct mount *mp) __result_use_check;
int mac_mount_check_mount(vfs_context_t ctx, struct vnode *vp,
- struct componentname *cnp, const char *vfc_name);
-int mac_mount_check_mount_late(vfs_context_t ctx, struct mount *mp);
+ struct componentname *cnp, const char *vfc_name) __result_use_check;
+int mac_mount_check_mount_late(vfs_context_t ctx, struct mount *mp) __result_use_check;
int mac_mount_check_snapshot_create(vfs_context_t ctx, struct mount *mp,
- const char *name);
+ const char *name) __result_use_check;
int mac_mount_check_snapshot_delete(vfs_context_t ctx, struct mount *mp,
- const char *name);
+ const char *name) __result_use_check;
#ifdef KERNEL_PRIVATE
int mac_mount_check_snapshot_mount(vfs_context_t ctx, struct vnode *rvp,
struct vnode *vp, struct componentname *cnp, const char *name,
- const char *vfc_name);
+ const char *vfc_name) __result_use_check;
#endif
int mac_mount_check_snapshot_revert(vfs_context_t ctx, struct mount *mp,
- const char *name);
-int mac_mount_check_remount(vfs_context_t ctx, struct mount *mp);
+ const char *name) __result_use_check;
+int mac_mount_check_remount(vfs_context_t ctx, struct mount *mp) __result_use_check;
int mac_mount_check_setattr(vfs_context_t ctx, struct mount *mp,
- struct vfs_attr *vfa);
-int mac_mount_check_stat(vfs_context_t ctx, struct mount *mp);
-int mac_mount_check_umount(vfs_context_t ctx, struct mount *mp);
+ struct vfs_attr *vfa) __result_use_check;
+int mac_mount_check_stat(vfs_context_t ctx, struct mount *mp) __result_use_check;
+int mac_mount_check_umount(vfs_context_t ctx, struct mount *mp) __result_use_check;
void mac_mount_label_associate(vfs_context_t ctx, struct mount *mp);
void mac_mount_label_destroy(struct mount *mp);
int mac_mount_label_externalize(struct label *label, char *elements,
- char *outbuf, size_t outbuflen);
-int mac_mount_label_get(struct mount *mp, user_addr_t mac_p);
+ char *outbuf, size_t outbuflen) __result_use_check;
+int mac_mount_label_get(struct mount *mp, user_addr_t mac_p) __result_use_check;
void mac_mount_label_init(struct mount *);
-int mac_mount_label_internalize(struct label *, char *string);
+int mac_mount_label_internalize(struct label *, char *string) __result_use_check;
int mac_pipe_check_ioctl(kauth_cred_t cred, struct pipe *cpipe,
- unsigned long cmd);
+ unsigned long cmd) __result_use_check;
int mac_pipe_check_kqfilter(kauth_cred_t cred, struct knote *kn,
- struct pipe *cpipe);
-int mac_pipe_check_read(kauth_cred_t cred, struct pipe *cpipe);
+ struct pipe *cpipe) __result_use_check;
+int mac_pipe_check_read(kauth_cred_t cred, struct pipe *cpipe) __result_use_check;
int mac_pipe_check_select(kauth_cred_t cred, struct pipe *cpipe,
- int which);
-int mac_pipe_check_stat(kauth_cred_t cred, struct pipe *cpipe);
-int mac_pipe_check_write(kauth_cred_t cred, struct pipe *cpipe);
+ int which) __result_use_check;
+int mac_pipe_check_stat(kauth_cred_t cred, struct pipe *cpipe) __result_use_check;
+int mac_pipe_check_write(kauth_cred_t cred, struct pipe *cpipe) __result_use_check;
struct label *mac_pipe_label_alloc(void);
void mac_pipe_label_associate(kauth_cred_t cred, struct pipe *cpipe);
void mac_pipe_label_destroy(struct pipe *cpipe);
void mac_pipe_label_free(struct label *label);
void mac_pipe_label_init(struct pipe *cpipe);
void mac_policy_initbsd(void);
-int mac_posixsem_check_create(kauth_cred_t cred, const char *name);
-int mac_posixsem_check_open(kauth_cred_t cred, struct pseminfo *psem);
-int mac_posixsem_check_post(kauth_cred_t cred, struct pseminfo *psem);
+int mac_posixsem_check_create(kauth_cred_t cred, const char *name) __result_use_check;
+int mac_posixsem_check_open(kauth_cred_t cred, struct pseminfo *psem) __result_use_check;
+int mac_posixsem_check_post(kauth_cred_t cred, struct pseminfo *psem) __result_use_check;
int mac_posixsem_check_unlink(kauth_cred_t cred, struct pseminfo *psem,
- const char *name);
-int mac_posixsem_check_wait(kauth_cred_t cred, struct pseminfo *psem);
+ const char *name) __result_use_check;
+int mac_posixsem_check_wait(kauth_cred_t cred, struct pseminfo *psem) __result_use_check;
void mac_posixsem_vnode_label_associate(kauth_cred_t cred,
struct pseminfo *psem, struct label *plabel,
vnode_t vp, struct label *vlabel);
struct pseminfo *psem, const char *name);
void mac_posixsem_label_destroy(struct pseminfo *psem);
void mac_posixsem_label_init(struct pseminfo *psem);
-int mac_posixshm_check_create(kauth_cred_t cred, const char *name);
+int mac_posixshm_check_create(kauth_cred_t cred, const char *name) __result_use_check;
int mac_posixshm_check_mmap(kauth_cred_t cred, struct pshminfo *pshm,
- int prot, int flags);
+ int prot, int flags) __result_use_check;
int mac_posixshm_check_open(kauth_cred_t cred, struct pshminfo *pshm,
- int fflags);
-int mac_posixshm_check_stat(kauth_cred_t cred, struct pshminfo *pshm);
+ int fflags) __result_use_check;
+int mac_posixshm_check_stat(kauth_cred_t cred, struct pshminfo *pshm) __result_use_check;
int mac_posixshm_check_truncate(kauth_cred_t cred, struct pshminfo *pshm,
- off_t s);
+ off_t s) __result_use_check;
int mac_posixshm_check_unlink(kauth_cred_t cred, struct pshminfo *pshm,
- const char *name);
+ const char *name) __result_use_check;
void mac_posixshm_vnode_label_associate(kauth_cred_t cred,
struct pshminfo *pshm, struct label *plabel,
vnode_t vp, struct label *vlabel);
struct pshminfo *pshm, const char *name);
void mac_posixshm_label_destroy(struct pshminfo *pshm);
void mac_posixshm_label_init(struct pshminfo *pshm);
-int mac_priv_check(kauth_cred_t cred, int priv);
-int mac_priv_grant(kauth_cred_t cred, int priv);
-int mac_proc_check_debug(proc_ident_t tracing_ident, kauth_cred_t tracing_cred, proc_ident_t traced_ident);
-int mac_proc_check_dump_core(proc_t proc);
-int mac_proc_check_proc_info(proc_t curp, proc_t target, int callnum, int flavor);
-int mac_proc_check_get_cs_info(proc_t curp, proc_t target, unsigned int op);
-int mac_proc_check_set_cs_info(proc_t curp, proc_t target, unsigned int op);
-int mac_proc_check_fork(proc_t proc);
-int mac_proc_check_suspend_resume(proc_t proc, int sr);
-int mac_proc_check_get_task_name(kauth_cred_t cred, proc_ident_t pident);
-int mac_proc_check_get_task(kauth_cred_t cred, proc_ident_t pident);
-int mac_proc_check_expose_task(kauth_cred_t cred, proc_ident_t pident);
-int mac_proc_check_inherit_ipc_ports(struct proc *p, struct vnode *cur_vp, off_t cur_offset, struct vnode *img_vp, off_t img_offset, struct vnode *scriptvp);
-int mac_proc_check_getaudit(proc_t proc);
-int mac_proc_check_getauid(proc_t proc);
+int mac_priv_check(kauth_cred_t cred, int priv) __result_use_check;
+int mac_priv_grant(kauth_cred_t cred, int priv) __result_use_check;
+int mac_proc_check_debug(proc_ident_t tracing_ident, kauth_cred_t tracing_cred, proc_ident_t traced_ident) __result_use_check;
+int mac_proc_check_dump_core(proc_t proc) __result_use_check;
+int mac_proc_check_proc_info(proc_t curp, proc_t target, int callnum, int flavor) __result_use_check;
+int mac_proc_check_get_cs_info(proc_t curp, proc_t target, unsigned int op) __result_use_check;
+int mac_proc_check_set_cs_info(proc_t curp, proc_t target, unsigned int op) __result_use_check;
+int mac_proc_check_fork(proc_t proc) __result_use_check;
+int mac_proc_check_suspend_resume(proc_t proc, int sr) __result_use_check;
+int mac_proc_check_get_task(kauth_cred_t cred, proc_ident_t pident, mach_task_flavor_t flavor) __result_use_check;
+int mac_proc_check_expose_task(kauth_cred_t cred, proc_ident_t pident, mach_task_flavor_t flavor) __result_use_check;
+int mac_proc_check_get_movable_control_port(void) __result_use_check;
+int mac_proc_check_inherit_ipc_ports(struct proc *p, struct vnode *cur_vp, off_t cur_offset, struct vnode *img_vp, off_t img_offset, struct vnode *scriptvp) __result_use_check;
+int mac_proc_check_getaudit(proc_t proc) __result_use_check;
+int mac_proc_check_getauid(proc_t proc) __result_use_check;
int mac_proc_check_getlcid(proc_t proc1, proc_t proc2,
- pid_t pid);
-int mac_proc_check_ledger(proc_t curp, proc_t target, int op);
+ pid_t pid) __result_use_check;
+int mac_proc_check_dyld_process_info_notify_register(void) __result_use_check;
+int mac_proc_check_ledger(proc_t curp, proc_t target, int op) __result_use_check;
int mac_proc_check_map_anon(proc_t proc, user_addr_t u_addr,
- user_size_t u_size, int prot, int flags, int *maxprot);
+ user_size_t u_size, int prot, int flags, int *maxprot) __result_use_check;
int mac_proc_check_mprotect(proc_t proc,
- user_addr_t addr, user_size_t size, int prot);
-int mac_proc_check_run_cs_invalid(proc_t proc);
+ user_addr_t addr, user_size_t size, int prot) __result_use_check;
+int mac_proc_check_run_cs_invalid(proc_t proc) __result_use_check;
void mac_proc_notify_cs_invalidated(proc_t proc);
-int mac_proc_check_sched(proc_t proc, proc_t proc2);
-int mac_proc_check_setaudit(proc_t proc, struct auditinfo_addr *ai);
-int mac_proc_check_setauid(proc_t proc, uid_t auid);
+int mac_proc_check_sched(proc_t proc, proc_t proc2) __result_use_check;
+int mac_proc_check_setaudit(proc_t proc, struct auditinfo_addr *ai) __result_use_check;
+int mac_proc_check_setauid(proc_t proc, uid_t auid) __result_use_check;
int mac_proc_check_setlcid(proc_t proc1, proc_t proc2,
- pid_t pid1, pid_t pid2);
+ pid_t pid1, pid_t pid2) __result_use_check;
int mac_proc_check_signal(proc_t proc1, proc_t proc2,
- int signum);
-int mac_proc_check_syscall_unix(proc_t proc, int scnum);
-int mac_proc_check_wait(proc_t proc1, proc_t proc2);
+ int signum) __result_use_check;
+int mac_proc_check_syscall_unix(proc_t proc, int scnum) __result_use_check;
+int mac_proc_check_wait(proc_t proc1, proc_t proc2) __result_use_check;
void mac_proc_notify_exit(proc_t proc);
-int mac_socket_check_accept(kauth_cred_t cred, struct socket *so);
-int mac_socket_check_accepted(kauth_cred_t cred, struct socket *so);
+int mac_socket_check_accept(kauth_cred_t cred, struct socket *so) __result_use_check;
+int mac_socket_check_accepted(kauth_cred_t cred, struct socket *so) __result_use_check;
int mac_socket_check_bind(kauth_cred_t cred, struct socket *so,
- struct sockaddr *addr);
+ struct sockaddr *addr) __result_use_check;
int mac_socket_check_connect(kauth_cred_t cred, struct socket *so,
- struct sockaddr *addr);
+ struct sockaddr *addr) __result_use_check;
int mac_socket_check_create(kauth_cred_t cred, int domain,
- int type, int protocol);
+ int type, int protocol) __result_use_check;
int mac_socket_check_ioctl(kauth_cred_t cred, struct socket *so,
- unsigned long cmd);
-int mac_socket_check_listen(kauth_cred_t cred, struct socket *so);
-int mac_socket_check_receive(kauth_cred_t cred, struct socket *so);
+ unsigned long cmd) __result_use_check;
+int mac_socket_check_listen(kauth_cred_t cred, struct socket *so) __result_use_check;
+int mac_socket_check_receive(kauth_cred_t cred, struct socket *so) __result_use_check;
int mac_socket_check_received(kauth_cred_t cred, struct socket *so,
- struct sockaddr *saddr);
+ struct sockaddr *saddr) __result_use_check;
int mac_socket_check_send(kauth_cred_t cred, struct socket *so,
- struct sockaddr *addr);
+ struct sockaddr *addr) __result_use_check;
int mac_socket_check_getsockopt(kauth_cred_t cred, struct socket *so,
- struct sockopt *sopt);
+ struct sockopt *sopt) __result_use_check;
int mac_socket_check_setsockopt(kauth_cred_t cred, struct socket *so,
- struct sockopt *sopt);
-int mac_socket_check_stat(kauth_cred_t cred, struct socket *so);
+ struct sockopt *sopt) __result_use_check;
+int mac_socket_check_stat(kauth_cred_t cred, struct socket *so) __result_use_check;
void mac_socket_label_associate(kauth_cred_t cred, struct socket *so);
void mac_socket_label_associate_accept(struct socket *oldsocket,
struct socket *newsocket);
void mac_socket_label_copy(struct label *from, struct label *to);
void mac_socket_label_destroy(struct socket *);
int mac_socket_label_get(kauth_cred_t cred, struct socket *so,
- struct mac *extmac);
-int mac_socket_label_init(struct socket *, int waitok);
+ struct mac *extmac) __result_use_check;
+int mac_socket_label_init(struct socket *, int waitok) __result_use_check;
void mac_socketpeer_label_associate_socket(struct socket *peersocket,
struct socket *socket_to_modify);
int mac_socketpeer_label_get(kauth_cred_t cred, struct socket *so,
- struct mac *extmac);
-int mac_system_check_acct(kauth_cred_t cred, struct vnode *vp);
-int mac_system_check_audit(kauth_cred_t cred, void *record, int length);
-int mac_system_check_auditctl(kauth_cred_t cred, struct vnode *vp);
-int mac_system_check_auditon(kauth_cred_t cred, int cmd);
-int mac_system_check_host_priv(kauth_cred_t cred);
-int mac_system_check_info(kauth_cred_t, const char *info_type);
-int mac_system_check_nfsd(kauth_cred_t cred);
-int mac_system_check_reboot(kauth_cred_t cred, int howto);
-int mac_system_check_settime(kauth_cred_t cred);
-int mac_system_check_swapoff(kauth_cred_t cred, struct vnode *vp);
-int mac_system_check_swapon(kauth_cred_t cred, struct vnode *vp);
+ struct mac *extmac) __result_use_check;
+int mac_system_check_acct(kauth_cred_t cred, struct vnode *vp) __result_use_check;
+int mac_system_check_audit(kauth_cred_t cred, void *record, int length) __result_use_check;
+int mac_system_check_auditctl(kauth_cred_t cred, struct vnode *vp) __result_use_check;
+int mac_system_check_auditon(kauth_cred_t cred, int cmd) __result_use_check;
+int mac_system_check_host_priv(kauth_cred_t cred) __result_use_check;
+int mac_system_check_info(kauth_cred_t, const char *info_type) __result_use_check;
+int mac_system_check_nfsd(kauth_cred_t cred) __result_use_check;
+int mac_system_check_reboot(kauth_cred_t cred, int howto) __result_use_check;
+int mac_system_check_settime(kauth_cred_t cred) __result_use_check;
+int mac_system_check_swapoff(kauth_cred_t cred, struct vnode *vp) __result_use_check;
+int mac_system_check_swapon(kauth_cred_t cred, struct vnode *vp) __result_use_check;
int mac_system_check_sysctlbyname(kauth_cred_t cred, const char *namestring, int *name,
size_t namelen, user_addr_t oldctl, size_t oldlen,
- user_addr_t newctl, size_t newlen);
-int mac_system_check_kas_info(kauth_cred_t cred, int selector);
+ user_addr_t newctl, size_t newlen) __result_use_check;
+int mac_system_check_kas_info(kauth_cred_t cred, int selector) __result_use_check;
void mac_sysvmsg_label_associate(kauth_cred_t cred,
struct msqid_kernel *msqptr, struct msg *msgptr);
void mac_sysvmsg_label_init(struct msg *msgptr);
void mac_sysvmsg_label_recycle(struct msg *msgptr);
int mac_sysvmsq_check_enqueue(kauth_cred_t cred, struct msg *msgptr,
- struct msqid_kernel *msqptr);
-int mac_sysvmsq_check_msgrcv(kauth_cred_t cred, struct msg *msgptr);
-int mac_sysvmsq_check_msgrmid(kauth_cred_t cred, struct msg *msgptr);
+ struct msqid_kernel *msqptr) __result_use_check;
+int mac_sysvmsq_check_msgrcv(kauth_cred_t cred, struct msg *msgptr) __result_use_check;
+int mac_sysvmsq_check_msgrmid(kauth_cred_t cred, struct msg *msgptr) __result_use_check;
int mac_sysvmsq_check_msqctl(kauth_cred_t cred,
- struct msqid_kernel *msqptr, int cmd);
+ struct msqid_kernel *msqptr, int cmd) __result_use_check;
int mac_sysvmsq_check_msqget(kauth_cred_t cred,
- struct msqid_kernel *msqptr);
+ struct msqid_kernel *msqptr) __result_use_check;
int mac_sysvmsq_check_msqrcv(kauth_cred_t cred,
- struct msqid_kernel *msqptr);
+ struct msqid_kernel *msqptr) __result_use_check;
int mac_sysvmsq_check_msqsnd(kauth_cred_t cred,
- struct msqid_kernel *msqptr);
+ struct msqid_kernel *msqptr) __result_use_check;
void mac_sysvmsq_label_associate(kauth_cred_t cred,
struct msqid_kernel *msqptr);
void mac_sysvmsq_label_init(struct msqid_kernel *msqptr);
void mac_sysvmsq_label_recycle(struct msqid_kernel *msqptr);
int mac_sysvsem_check_semctl(kauth_cred_t cred,
- struct semid_kernel *semakptr, int cmd);
+ struct semid_kernel *semakptr, int cmd) __result_use_check;
int mac_sysvsem_check_semget(kauth_cred_t cred,
- struct semid_kernel *semakptr);
+ struct semid_kernel *semakptr) __result_use_check;
int mac_sysvsem_check_semop(kauth_cred_t cred,
- struct semid_kernel *semakptr, size_t accesstype);
+ struct semid_kernel *semakptr, size_t accesstype) __result_use_check;
void mac_sysvsem_label_associate(kauth_cred_t cred,
struct semid_kernel *semakptr);
void mac_sysvsem_label_destroy(struct semid_kernel *semakptr);
void mac_sysvsem_label_init(struct semid_kernel *semakptr);
void mac_sysvsem_label_recycle(struct semid_kernel *semakptr);
int mac_sysvshm_check_shmat(kauth_cred_t cred,
- struct shmid_kernel *shmsegptr, int shmflg);
+ struct shmid_kernel *shmsegptr, int shmflg) __result_use_check;
int mac_sysvshm_check_shmctl(kauth_cred_t cred,
- struct shmid_kernel *shmsegptr, int cmd);
+ struct shmid_kernel *shmsegptr, int cmd) __result_use_check;
int mac_sysvshm_check_shmdt(kauth_cred_t cred,
- struct shmid_kernel *shmsegptr);
+ struct shmid_kernel *shmsegptr) __result_use_check;
int mac_sysvshm_check_shmget(kauth_cred_t cred,
- struct shmid_kernel *shmsegptr, int shmflg);
+ struct shmid_kernel *shmsegptr, int shmflg) __result_use_check;
void mac_sysvshm_label_associate(kauth_cred_t cred,
struct shmid_kernel *shmsegptr);
void mac_sysvshm_label_destroy(struct shmid_kernel *shmsegptr);
void mac_sysvshm_label_init(struct shmid_kernel* shmsegptr);
void mac_sysvshm_label_recycle(struct shmid_kernel *shmsegptr);
int mac_vnode_check_access(vfs_context_t ctx, struct vnode *vp,
- int acc_mode);
-int mac_vnode_check_chdir(vfs_context_t ctx, struct vnode *dvp);
+ int acc_mode) __result_use_check;
+int mac_vnode_check_chdir(vfs_context_t ctx, struct vnode *dvp) __result_use_check;
int mac_vnode_check_chroot(vfs_context_t ctx, struct vnode *dvp,
- struct componentname *cnp);
+ struct componentname *cnp) __result_use_check;
int mac_vnode_check_clone(vfs_context_t ctx, struct vnode *dvp,
- struct vnode *vp, struct componentname *cnp);
+ struct vnode *vp, struct componentname *cnp) __result_use_check;
int mac_vnode_check_create(vfs_context_t ctx, struct vnode *dvp,
- struct componentname *cnp, struct vnode_attr *vap);
+ struct componentname *cnp, struct vnode_attr *vap) __result_use_check;
int mac_vnode_check_deleteextattr(vfs_context_t ctx, struct vnode *vp,
- const char *name);
+ const char *name) __result_use_check;
int mac_vnode_check_exchangedata(vfs_context_t ctx, struct vnode *v1,
- struct vnode *v2);
+ struct vnode *v2) __result_use_check;
int mac_vnode_check_exec(vfs_context_t ctx, struct vnode *vp,
- struct image_params *imgp);
-int mac_vnode_check_fsgetpath(vfs_context_t ctx, struct vnode *vp);
+ struct image_params *imgp) __result_use_check;
+int mac_vnode_check_fsgetpath(vfs_context_t ctx, struct vnode *vp) __result_use_check;
int mac_vnode_check_getattr(vfs_context_t ctx, struct ucred *file_cred,
- struct vnode *vp, struct vnode_attr *va);
+ struct vnode *vp, struct vnode_attr *va) __result_use_check;
int mac_vnode_check_getattrlist(vfs_context_t ctx, struct vnode *vp,
- struct attrlist *alist);
+ struct attrlist *alist) __result_use_check;
int mac_vnode_check_getextattr(vfs_context_t ctx, struct vnode *vp,
- const char *name, struct uio *uio);
+ const char *name, struct uio *uio) __result_use_check;
int mac_vnode_check_ioctl(vfs_context_t ctx, struct vnode *vp,
- unsigned long cmd);
+ unsigned long cmd) __result_use_check;
int mac_vnode_check_kqfilter(vfs_context_t ctx,
- kauth_cred_t file_cred, struct knote *kn, struct vnode *vp);
+ kauth_cred_t file_cred, struct knote *kn, struct vnode *vp) __result_use_check;
int mac_vnode_check_label_update(vfs_context_t ctx, struct vnode *vp,
- struct label *newlabel);
+ struct label *newlabel); __result_use_check
int mac_vnode_check_link(vfs_context_t ctx, struct vnode *dvp,
- struct vnode *vp, struct componentname *cnp);
-int mac_vnode_check_listextattr(vfs_context_t ctx, struct vnode *vp);
+ struct vnode *vp, struct componentname *cnp) __result_use_check;
+int mac_vnode_check_listextattr(vfs_context_t ctx, struct vnode *vp) __result_use_check;
int mac_vnode_check_lookup(vfs_context_t ctx, struct vnode *dvp,
- struct componentname *cnp);
+ struct componentname *cnp) __result_use_check;
int mac_vnode_check_lookup_preflight(vfs_context_t ctx, struct vnode *dvp,
- const char *path, size_t pathlen);
+ const char *path, size_t pathlen) __result_use_check;
int mac_vnode_check_open(vfs_context_t ctx, struct vnode *vp,
- int acc_mode);
+ int acc_mode) __result_use_check;
int mac_vnode_check_read(vfs_context_t ctx,
- kauth_cred_t file_cred, struct vnode *vp);
-int mac_vnode_check_readdir(vfs_context_t ctx, struct vnode *vp);
-int mac_vnode_check_readlink(vfs_context_t ctx, struct vnode *vp);
+ kauth_cred_t file_cred, struct vnode *vp) __result_use_check;
+int mac_vnode_check_readdir(vfs_context_t ctx, struct vnode *vp) __result_use_check;
+int mac_vnode_check_readlink(vfs_context_t ctx, struct vnode *vp) __result_use_check;
int mac_vnode_check_rename(vfs_context_t ctx, struct vnode *dvp,
struct vnode *vp, struct componentname *cnp, struct vnode *tdvp,
- struct vnode *tvp, struct componentname *tcnp);
-int mac_vnode_check_revoke(vfs_context_t ctx, struct vnode *vp);
+ struct vnode *tvp, struct componentname *tcnp) __result_use_check;
+int mac_vnode_check_revoke(vfs_context_t ctx, struct vnode *vp) __result_use_check;
int mac_vnode_check_searchfs(vfs_context_t ctx, struct vnode *vp,
- struct attrlist *alist);
+ struct attrlist *alist) __result_use_check;
int mac_vnode_check_select(vfs_context_t ctx, struct vnode *vp,
- int which);
+ int which) __result_use_check;
int mac_vnode_check_setacl(vfs_context_t ctx, struct vnode *vp,
- struct kauth_acl *acl);
+ struct kauth_acl *acl) __result_use_check;
int mac_vnode_check_setattrlist(vfs_context_t ctxd, struct vnode *vp,
- struct attrlist *alist);
+ struct attrlist *alist) __result_use_check;
int mac_vnode_check_setextattr(vfs_context_t ctx, struct vnode *vp,
- const char *name, struct uio *uio);
+ const char *name, struct uio *uio) __result_use_check;
int mac_vnode_check_setflags(vfs_context_t ctx, struct vnode *vp,
- u_long flags);
+ u_long flags) __result_use_check;
int mac_vnode_check_setmode(vfs_context_t ctx, struct vnode *vp,
- mode_t mode);
+ mode_t mode) __result_use_check;
int mac_vnode_check_setowner(vfs_context_t ctx, struct vnode *vp,
- uid_t uid, gid_t gid);
+ uid_t uid, gid_t gid) __result_use_check;
int mac_vnode_check_setutimes(vfs_context_t ctx, struct vnode *vp,
- struct timespec atime, struct timespec mtime);
+ struct timespec atime, struct timespec mtime) __result_use_check;
int mac_vnode_check_signature(struct vnode *vp,
struct cs_blob *cs_blob, struct image_params *imgp,
unsigned int *cs_flags, unsigned int *signer_type,
- int flags, unsigned int platform);
+ int flags, unsigned int platform) __result_use_check;
int mac_vnode_check_supplemental_signature(struct vnode *vp,
struct cs_blob *cs_blob, struct vnode *linked_vp,
- struct cs_blob *linked_cs_blob, unsigned int *signer_type);
+ struct cs_blob *linked_cs_blob, unsigned int *signer_type) __result_use_check;
int mac_vnode_check_stat(vfs_context_t ctx,
- kauth_cred_t file_cred, struct vnode *vp);
+ kauth_cred_t file_cred, struct vnode *vp) __result_use_check;
#ifdef KERNEL_PRIVATE
int mac_vnode_check_trigger_resolve(vfs_context_t ctx, struct vnode *dvp,
- struct componentname *cnp);
+ struct componentname *cnp) __result_use_check;
#endif
int mac_vnode_check_truncate(vfs_context_t ctx,
- kauth_cred_t file_cred, struct vnode *vp);
+ kauth_cred_t file_cred, struct vnode *vp) __result_use_check;
int mac_vnode_check_uipc_bind(vfs_context_t ctx, struct vnode *dvp,
- struct componentname *cnp, struct vnode_attr *vap);
-int mac_vnode_check_uipc_connect(vfs_context_t ctx, struct vnode *vp, struct socket *so);
+ struct componentname *cnp, struct vnode_attr *vap) __result_use_check;
+int mac_vnode_check_uipc_connect(vfs_context_t ctx, struct vnode *vp, struct socket *so) __result_use_check;
int mac_vnode_check_unlink(vfs_context_t ctx, struct vnode *dvp,
- struct vnode *vp, struct componentname *cnp);
+ struct vnode *vp, struct componentname *cnp) __result_use_check;
int mac_vnode_check_write(vfs_context_t ctx,
- kauth_cred_t file_cred, struct vnode *vp);
+ kauth_cred_t file_cred, struct vnode *vp) __result_use_check;
struct label *mac_vnode_label_alloc(void);
int mac_vnode_label_associate(struct mount *mp, struct vnode *vp,
- vfs_context_t ctx);
+ vfs_context_t ctx) __result_use_check;
void mac_vnode_label_associate_devfs(struct mount *mp, struct devnode *de,
struct vnode *vp);
-int mac_vnode_label_associate_extattr(struct mount *mp, struct vnode *vp);
+int mac_vnode_label_associate_extattr(struct mount *mp, struct vnode *vp) __result_use_check;
int mac_vnode_label_associate_fdesc(struct mount *mp, struct fdescnode *fnp,
- struct vnode *vp, vfs_context_t ctx);
+ struct vnode *vp, vfs_context_t ctx) __result_use_check;
void mac_vnode_label_associate_singlelabel(struct mount *mp,
struct vnode *vp);
void mac_vnode_label_copy(struct label *l1, struct label *l2);
void mac_vnode_label_destroy(struct vnode *vp);
-int mac_vnode_label_externalize_audit(struct vnode *vp, struct mac *mac);
+int mac_vnode_label_externalize_audit(struct vnode *vp, struct mac *mac) __result_use_check;
void mac_vnode_label_free(struct label *label);
void mac_vnode_label_init(struct vnode *vp);
-int mac_vnode_label_init_needed(struct vnode *vp);
+int mac_vnode_label_init_needed(struct vnode *vp) __result_use_check;
#ifdef KERNEL_PRIVATE
struct label *mac_vnode_label_allocate(vnode_t vp);
#endif
void mac_vnode_label_update_extattr(struct mount *mp, struct vnode *vp,
const char *name);
int mac_vnode_notify_create(vfs_context_t ctx, struct mount *mp,
- struct vnode *dvp, struct vnode *vp, struct componentname *cnp);
+ struct vnode *dvp, struct vnode *vp, struct componentname *cnp) __result_use_check;
void mac_vnode_notify_deleteextattr(vfs_context_t ctx, struct vnode *vp, const char *name);
void mac_vnode_notify_link(vfs_context_t ctx, struct vnode *vp,
struct vnode *dvp, struct componentname *cnp);
void mac_vnode_notify_setowner(vfs_context_t ctx, struct vnode *vp, uid_t uid, gid_t gid);
void mac_vnode_notify_setutimes(vfs_context_t ctx, struct vnode *vp, struct timespec atime, struct timespec mtime);
void mac_vnode_notify_truncate(vfs_context_t ctx, kauth_cred_t file_cred, struct vnode *vp);
-int mac_vnode_find_sigs(struct proc *p, struct vnode *vp, off_t offsetInMacho);
+int mac_vnode_find_sigs(struct proc *p, struct vnode *vp, off_t offsetInMacho) __result_use_check;
int vnode_label(struct mount *mp, struct vnode *dvp, struct vnode *vp,
- struct componentname *cnp, int flags, vfs_context_t ctx);
+ struct componentname *cnp, int flags, vfs_context_t ctx) __result_use_check;
void vnode_relabel(struct vnode *vp);
void mac_pty_notify_grant(proc_t p, struct tty *tp, dev_t dev, struct label *label);
void mac_pty_notify_close(proc_t p, struct tty *tp, dev_t dev, struct label *label);
-int mac_kext_check_load(kauth_cred_t cred, const char *identifier);
-int mac_kext_check_unload(kauth_cred_t cred, const char *identifier);
-int mac_kext_check_query(kauth_cred_t cred);
-int mac_skywalk_flow_check_connect(proc_t p, void *flow, const struct sockaddr *addr, int type, int protocol);
-int mac_skywalk_flow_check_listen(proc_t p, void *flow, const struct sockaddr *addr, int type, int protocol);
+int mac_kext_check_load(kauth_cred_t cred, const char *identifier) __result_use_check;
+int mac_kext_check_unload(kauth_cred_t cred, const char *identifier) __result_use_check;
+int mac_kext_check_query(kauth_cred_t cred) __result_use_check;
+int mac_skywalk_flow_check_connect(proc_t p, void *flow, const struct sockaddr *addr, int type, int protocol) __result_use_check;
+int mac_skywalk_flow_check_listen(proc_t p, void *flow, const struct sockaddr *addr, int type, int protocol) __result_use_check;
void mac_vnode_notify_reclaim(vnode_t vp);
void psem_label_associate(struct fileproc *fp, struct vnode *vp, struct vfs_context *ctx);
#include <security/mac_framework.h>
#include <security/mac_internal.h>
+int
+mac_iokit_check_open_service(kauth_cred_t cred, io_object_t service, unsigned int user_client_type)
+{
+ int error;
+
+ MAC_CHECK(iokit_check_open_service, cred, service, user_client_type);
+ return error;
+}
+
int
mac_iokit_check_open(kauth_cred_t cred, io_object_t user_client, unsigned int user_client_type)
{
}
int
-mac_task_check_expose_task(struct task *task)
+mac_task_check_expose_task(struct task *task, mach_task_flavor_t flavor)
{
int error;
+ assert(flavor <= TASK_FLAVOR_NAME);
+
struct proc *p = mac_task_get_proc(task);
if (p == NULL) {
return ESRCH;
struct ucred *cred = kauth_cred_get();
proc_rele(p);
- MAC_CHECK(proc_check_expose_task, cred, &pident);
+
+ /* Also call the old hook for compatability, deprecating in rdar://66356944. */
+ if (flavor == TASK_FLAVOR_CONTROL) {
+ MAC_CHECK(proc_check_expose_task, cred, &pident);
+ if (error) {
+ return error;
+ }
+ }
+
+ MAC_CHECK(proc_check_expose_task_with_flavor, cred, &pident, flavor);
+
+ return error;
+}
+
+int
+mac_task_check_task_id_token_get_task(struct task *task, mach_task_flavor_t flavor)
+{
+ int error;
+
+ assert(flavor <= TASK_FLAVOR_NAME);
+
+ struct proc *p = mac_task_get_proc(task);
+ if (p == NULL) {
+ return ESRCH;
+ }
+ struct proc_ident pident = proc_ident(p);
+
+ proc_rele(p);
+
+ p = current_proc();
+ kauth_cred_t cred = kauth_cred_proc_ref(p);
+ MAC_CHECK(proc_check_task_id_token_get_task, cred, &pident, flavor);
+ kauth_cred_unref(&cred);
+ return error;
+}
+
+int
+mac_task_check_get_movable_control_port(void)
+{
+ int error;
+ struct proc *p = current_proc();
+
+ kauth_cred_t cred = kauth_cred_proc_ref(p);
+ MAC_CHECK(proc_check_get_movable_control_port, cred);
+ kauth_cred_unref(&cred);
return error;
}
return error;
}
+int
+mac_task_check_dyld_process_info_notify_register(void)
+{
+ int error;
+ struct proc *p = current_proc();
+
+ kauth_cred_t cred = kauth_cred_proc_ref(p);
+ MAC_CHECK(proc_check_dyld_process_info_notify_register, cred);
+ kauth_cred_unref(&cred);
+ return error;
+}
+
int
mac_task_check_set_host_exception_ports(struct task *task, unsigned int exception_mask)
{
void mac_policy_initmach(void);
/* tasks */
-int mac_task_check_expose_task(struct task *t);
-
+int mac_task_check_expose_task(struct task *t, mach_task_flavor_t flavor);
+int mac_task_check_task_id_token_get_task(struct task *t, mach_task_flavor_t flavor);
int mac_task_check_set_host_special_port(struct task *task,
int id, struct ipc_port *port);
int mac_task_check_set_host_exception_port(struct task *task,
unsigned int exception);
int mac_task_check_set_host_exception_ports(struct task *task,
unsigned int exception_mask);
+int mac_task_check_get_movable_control_port(void);
+int mac_task_check_dyld_process_info_notify_register(void);
/* See rdar://problem/58989880 */
#ifndef bitstr_test
typedef int (*mac_task_kobj_filter_cbfunc_t)(struct proc *bsdinfo, int msgid, int index);
extern mac_task_mach_filter_cbfunc_t mac_task_mach_trap_evaluate;
extern mac_task_kobj_filter_cbfunc_t mac_task_kobj_msg_evaluate;
-extern int mach_trap_count;
+extern const int mach_trap_count;
extern int mach_kobj_count;
void mac_task_set_mach_filter_mask(struct task *task, uint8_t *maskptr);
*
* Determine whether the subject identified by the credential can open an
* I/O Kit device at the passed path of the passed user client class and
- * type.
+ * type. This check is performed after instantiating the user client.
+ * See also mpo_iokit_check_open_service_t.
*
* @return Return 0 if access is granted, or an appropriate value for
* errno should be returned.
io_object_t user_client,
unsigned int user_client_type
);
+/**
+ * @brief Access control check for opening an I/O Kit device
+ * @param cred Subject credential
+ * @param service Service instance
+ * @param user_client_type User client type
+ *
+ * Determine whether the subject identified by the credential can open a
+ * I/O Kit user client of the passed service and user client type.
+ * This check is performed before instantiating the user client. See also
+ * mpo_iokit_check_open_t.
+ *
+ * @return Return 0 if access is granted, or an appropriate value for
+ * errno should be returned.
+ */
+typedef int mpo_iokit_check_open_service_t(
+ kauth_cred_t cred,
+ io_object_t service,
+ unsigned int user_client_type
+ );
/**
* @brief Access control check for setting I/O Kit device properties
* @param cred Subject credential
kauth_cred_t cred,
unsigned int exception
);
+/**
+ * @brief Access control check for getting movable task/thread control port for current task.
+ * @param cred Subject credential
+ *
+ * @return Return 0 if access is granted, otherwise an appropriate value for
+ * errno should be returned.
+ */
+typedef int mpo_proc_check_get_movable_control_port_t(
+ kauth_cred_t cred
+ );
+/**
+ * @brief Access control check for calling task_dyld_process_info_notify_register
+ * and task_dyld_process_info_notify_deregister.
+ * @param cred Subject credential
+ *
+ * @return Return 0 if access is granted, otherwise an appropriate value for
+ * errno should be returned.
+ */
+typedef int mpo_proc_check_dyld_process_info_notify_register_t(
+ kauth_cred_t cred
+ );
/**
* @brief Access control over pid_suspend, pid_resume and family
* @param cred Subject credential
struct proc_ident *pident
);
+/**
+ * @brief Access control check for getting a process's task ports of different flavors
+ * @param cred Subject credential
+ * @param pident Object unique process identifier
+ * @param flavor Requested task port flavor
+ *
+ * Determine whether the subject identified by the credential can get
+ * the passed process's task port of given flavor.
+ * This call is used by the task_{,read,inspect,name}_for_pid(2) API.
+ *
+ * @return Return 0 if access is granted, otherwise an appropriate value for
+ * errno should be returned. Suggested failure: EACCES for label mismatch,
+ * EPERM for lack of privilege, or ESRCH to hide visibility of the target.
+ */
+typedef int mpo_proc_check_get_task_with_flavor_t(
+ kauth_cred_t cred,
+ struct proc_ident *pident,
+ mach_task_flavor_t flavor
+ );
+
/**
* @brief Access control check for exposing a process's task port
* @param cred Subject credential
struct proc_ident *pident
);
+/**
+ * @brief Access control check for exposing a process's task ports of different flavors
+ * @param cred Subject credential
+ * @param pident Object unique process identifier
+ * @param flavor Requested task port flavor
+ *
+ * Determine whether the subject identified by the credential can expose
+ * the passed process's task port of given flavor.
+ * This call is used by the accessor APIs like processor_set_tasks() and
+ * processor_set_threads().
+ *
+ * @return Return 0 if access is granted, otherwise an appropriate value for
+ * errno should be returned. Suggested failure: EACCES for label mismatch,
+ * EPERM for lack of privilege, or ESRCH to hide visibility of the target.
+ */
+typedef int mpo_proc_check_expose_task_with_flavor_t(
+ kauth_cred_t cred,
+ struct proc_ident *pident,
+ mach_task_flavor_t flavor
+ );
+
+/**
+ * @brief Access control check for upgrading to task port with a task identity token
+ * @param cred Subject credential
+ * @param pident Object unique process identifier
+ * @param flavor Requested task port flavor
+ *
+ * Determine whether the subject identified by the credential can upgrade to task port
+ * of given flavor with a task identity token of the passed process.
+ * This call is used by task_identity_token_get_task_port().
+ *
+ * @return Return 0 if access is granted, otherwise an appropriate value for
+ * errno should be returned. Suggested failure: EACCES for label mismatch,
+ * EPERM for lack of privilege, or ESRCH to hide visibility of the target.
+ */
+typedef int mpo_proc_check_task_id_token_get_task_t(
+ kauth_cred_t cred,
+ struct proc_ident *pident,
+ mach_task_flavor_t flavor
+ );
+
/**
* @brief Check whether task's IPC may inherit across process exec
* @param p current process instance
* Please note that this should be kept in sync with the check assumptions
* policy in bsd/kern/policy_check.c (policy_ops struct).
*/
-#define MAC_POLICY_OPS_VERSION 69 /* inc when new reserved slots are taken */
+#define MAC_POLICY_OPS_VERSION 74 /* inc when new reserved slots are taken */
struct mac_policy_ops {
mpo_audit_check_postselect_t *mpo_audit_check_postselect;
mpo_audit_check_preselect_t *mpo_audit_check_preselect;
mpo_mount_label_init_t *mpo_mount_label_init;
mpo_mount_label_internalize_t *mpo_mount_label_internalize;
- mpo_reserved_hook_t *mpo_reserved38;
- mpo_reserved_hook_t *mpo_reserved39;
- mpo_reserved_hook_t *mpo_reserved40;
+ mpo_proc_check_expose_task_with_flavor_t *mpo_proc_check_expose_task_with_flavor;
+ mpo_proc_check_get_task_with_flavor_t *mpo_proc_check_get_task_with_flavor;
+ mpo_proc_check_task_id_token_get_task_t *mpo_proc_check_task_id_token_get_task;
mpo_pipe_check_ioctl_t *mpo_pipe_check_ioctl;
mpo_pipe_check_kqfilter_t *mpo_pipe_check_kqfilter;
mpo_proc_notify_exec_complete_t *mpo_proc_notify_exec_complete;
mpo_proc_notify_cs_invalidated_t *mpo_proc_notify_cs_invalidated;
mpo_proc_check_syscall_unix_t *mpo_proc_check_syscall_unix;
- mpo_proc_check_expose_task_t *mpo_proc_check_expose_task;
+ mpo_proc_check_expose_task_t *mpo_proc_check_expose_task; /* Deprecating, use mpo_proc_check_expose_task_with_flavor instead */
mpo_proc_check_set_host_special_port_t *mpo_proc_check_set_host_special_port;
mpo_proc_check_set_host_exception_port_t *mpo_proc_check_set_host_exception_port;
mpo_exc_action_check_exception_send_t *mpo_exc_action_check_exception_send;
mpo_proc_check_debug_t *mpo_proc_check_debug;
mpo_proc_check_fork_t *mpo_proc_check_fork;
- mpo_proc_check_get_task_name_t *mpo_proc_check_get_task_name;
- mpo_proc_check_get_task_t *mpo_proc_check_get_task;
+ mpo_proc_check_get_task_name_t *mpo_proc_check_get_task_name; /* Deprecating, use mpo_proc_check_get_task_with_flavor instead */
+ mpo_proc_check_get_task_t *mpo_proc_check_get_task; /* Deprecating, use mpo_proc_check_get_task_with_flavor instead */
mpo_proc_check_getaudit_t *mpo_proc_check_getaudit;
mpo_proc_check_getauid_t *mpo_proc_check_getauid;
mpo_proc_check_getlcid_t *mpo_proc_check_getlcid;
mpo_socket_check_setsockopt_t *mpo_socket_check_setsockopt;
mpo_socket_check_getsockopt_t *mpo_socket_check_getsockopt;
- mpo_reserved_hook_t *mpo_reserved50;
- mpo_reserved_hook_t *mpo_reserved51;
+ mpo_proc_check_get_movable_control_port_t *mpo_proc_check_get_movable_control_port;
+ mpo_proc_check_dyld_process_info_notify_register_t *mpo_proc_check_dyld_process_info_notify_register;
mpo_reserved_hook_t *mpo_reserved52;
mpo_reserved_hook_t *mpo_reserved53;
mpo_reserved_hook_t *mpo_reserved54;
mpo_reserved_hook_t *mpo_reserved59;
mpo_reserved_hook_t *mpo_reserved60;
mpo_reserved_hook_t *mpo_reserved61;
- mpo_reserved_hook_t *mpo_reserved62;
+
+ mpo_iokit_check_open_service_t *mpo_iokit_check_open_service;
mpo_system_check_acct_t *mpo_system_check_acct;
mpo_system_check_audit_t *mpo_system_check_audit;
#include <mach/mach_types.h>
#include <kern/task.h>
+#include <os/hash.h>
+
#include <security/mac_internal.h>
#include <security/mac_mach_internal.h>
mac_labelzone_free(label);
}
-int
-mac_cred_label_compare(struct label *a, struct label *b)
+bool
+mac_cred_label_is_equal(const struct label *a, const struct label *b)
+{
+ if (a->l_flags != b->l_flags) {
+ return false;
+ }
+ for (int slot = 0; slot < MAC_MAX_SLOTS; slot++) {
+ const void *pa = a->l_perpolicy[slot].l_ptr;
+ const void *pb = b->l_perpolicy[slot].l_ptr;
+
+ if (pa != pb) {
+ return false;
+ }
+ }
+ return true;
+}
+
+uint32_t
+mac_cred_label_hash_update(const struct label *a, uint32_t hash)
{
- return bcmp(a, b, sizeof(*a)) == 0;
+ hash = os_hash_jenkins_update(&a->l_flags,
+ sizeof(a->l_flags), hash);
+#if __has_feature(ptrauth_calls)
+ for (int slot = 0; slot < MAC_MAX_SLOTS; slot++) {
+ const void *ptr = a->l_perpolicy[slot].l_ptr;
+ hash = os_hash_jenkins_update(&ptr, sizeof(ptr), hash);
+ }
+#else
+ hash = os_hash_jenkins_update(&a->l_perpolicy,
+ sizeof(a->l_perpolicy), hash);
+#endif
+ return hash;
}
int
}
int
-mac_proc_check_get_task_name(struct ucred *cred, proc_ident_t pident)
+mac_proc_check_get_task(struct ucred *cred, proc_ident_t pident, mach_task_flavor_t flavor)
{
int error;
- MAC_CHECK(proc_check_get_task_name, cred, pident);
+ assert(flavor <= TASK_FLAVOR_NAME);
- return error;
-}
+ /* Also call the old hook for compatability, deprecating in rdar://66356944. */
+ if (flavor == TASK_FLAVOR_CONTROL) {
+ MAC_CHECK(proc_check_get_task, cred, pident);
+ if (error) {
+ return error;
+ }
+ }
-int
-mac_proc_check_get_task(struct ucred *cred, proc_ident_t pident)
-{
- int error;
+ if (flavor == TASK_FLAVOR_NAME) {
+ MAC_CHECK(proc_check_get_task_name, cred, pident);
+ if (error) {
+ return error;
+ }
+ }
- MAC_CHECK(proc_check_get_task, cred, pident);
+ MAC_CHECK(proc_check_get_task_with_flavor, cred, pident, flavor);
return error;
}
int
-mac_proc_check_expose_task(struct ucred *cred, proc_ident_t pident)
+mac_proc_check_expose_task(struct ucred *cred, proc_ident_t pident, mach_task_flavor_t flavor)
{
int error;
- MAC_CHECK(proc_check_expose_task, cred, pident);
+ assert(flavor <= TASK_FLAVOR_NAME);
+
+ /* Also call the old hook for compatability, deprecating in rdar://66356944. */
+ if (flavor == TASK_FLAVOR_CONTROL) {
+ MAC_CHECK(proc_check_expose_task, cred, pident);
+ if (error) {
+ return error;
+ }
+ }
+
+ MAC_CHECK(proc_check_expose_task_with_flavor, cred, pident, flavor);
return error;
}
sr_entitlement: OTHER_LDFLAGS += -ldarwintest_utils
+restrict_jit: CODE_SIGN_ENTITLEMENTS = restrict_jit.entitlements
+
backtracing: OTHER_LDFLAGS += -framework CoreSymbolication
backtracing: CODE_SIGN_ENTITLEMENTS = kernel_symbolication_entitlements.plist
data_protection: OTHER_LDFLAGS += -ldarwintest_utils -framework IOKit
+CUSTOM_TARGETS += immovable_send_client vm_spawn_tool
+
+exception_tests: excserver exc_helpers.c
+exception_tests: CODE_SIGN_ENTITLEMENTS = exception_tests.entitlements
+exception_tests: OTHER_CFLAGS += $(OBJROOT)/excserver.c
+exception_tests: OTHER_CFLAGS += -I $(OBJROOT)
+exception_tests: OTHER_CFLAGS += -DENTITLED=1
+
immovable_send: excserver
immovable_send: OTHER_CFLAGS += $(OBJROOT)/excserver.c -I $(OBJROOT)
immovable_send: OTHER_LDFLAGS += -ldarwintest_utils -lpthread -framework IOKit
-
-CUSTOM_TARGETS += immovable_send_client vm_spawn_tool inspect_port_nocodesign
immovable_send: immovable_send_client
+immovable_send_client: immovable_send_client.c
+ $(CC) $(DT_CFLAGS) -I $(OBJROOT) $(CFLAGS) $(DT_LDFLAGS) $(OTHER_LDFLAGS) $(LDFLAGS) immovable_send_client.c -o $(SYMROOT)/immovable_send_client
+
+install-immovable_send_client: immovable_send_client
+ mkdir -p $(INSTALLDIR)
+ cp $(SYMROOT)/immovable_send_client $(INSTALLDIR)/
+
vm_spawn_tool: INVALID_ARCHS = i386
vm_spawn_tool: vm_spawn_tool.c
$(CC) $(DT_CFLAGS) -I $(OBJROOT) $(CFLAGS) $(DT_LDFLAGS) $(OTHER_LDFLAGS) $(LDFLAGS) vm_spawn_tool.c -o $(SYMROOT)/vm_spawn_tool
mkdir -p $(INSTALLDIR)/tools
cp $(SYMROOT)/vm_spawn_tool $(INSTALLDIR)/tools/
-immovable_send_client: immovable_send_client.c
- $(CC) $(DT_CFLAGS) -I $(OBJROOT) $(CFLAGS) $(DT_LDFLAGS) $(OTHER_LDFLAGS) $(LDFLAGS) immovable_send_client.c -o $(SYMROOT)/immovable_send_client
+CUSTOM_TARGETS += imm_pinned_control_port_crasher
-install-immovable_send_client: immovable_send_client
- mkdir -p $(INSTALLDIR)
- cp $(SYMROOT)/immovable_send_client $(INSTALLDIR)/
+imm_pinned_control_port: excserver
+imm_pinned_control_port: CODE_SIGN_ENTITLEMENTS = task_for_pid_entitlement.plist
+imm_pinned_control_port: OTHER_CFLAGS += $(OBJROOT)/excserver.c -I $(OBJROOT)
+imm_pinned_control_port: OTHER_LDFLAGS += -ldarwintest_utils -lpthread
+imm_pinned_control_port: imm_pinned_control_port_crasher
-inspect_port_nocodesign: inspect_port.c
- $(CC) $(DT_CFLAGS) -I $(OBJROOT) -DT_NOCODESIGN=1 $(CFLAGS) $(DT_LDFLAGS) $(OTHER_LDFLAGS) $(LDFLAGS) $^ -o $(SYMROOT)/inspect_port_nocodesign
-
-install-inspect_port_nocodesign: inspect_port_nocodesign
+imm_pinned_control_port_crasher: imm_pinned_control_port_crasher.c
+ $(CC) $(DT_CFLAGS) -I $(OBJROOT) $(CFLAGS) $(DT_LDFLAGS) $(OTHER_LDFLAGS) $(LDFLAGS) imm_pinned_control_port_crasher.c -o $(SYMROOT)/imm_pinned_control_port_crasher
+ $(CODESIGN) --force --sign - --timestamp=none $(SYMROOT)/$@
+
+install-imm_pinned_control_port_crasher: imm_pinned_control_port_crasher
mkdir -p $(INSTALLDIR)
- env CODESIGN_ALLOCATE=$(CODESIGN_ALLOCATE) $(CODESIGN_ALLOCATE) -r -i $(SYMROOT)/inspect_port_nocodesign -o $(SYMROOT)/inspect_port_nocodesign
+ cp $(SYMROOT)/imm_pinned_control_port_crasher $(INSTALLDIR)/
kas_info: OTHER_LDFLAGS += -framework CoreSymbolication
kas_info: CODE_SIGN_ENTITLEMENTS = kernel_symbolication_entitlements.plist
kdebug: INVALID_ARCHS = i386
kdebug: OTHER_LDFLAGS = -framework ktrace -ldarwintest_utils -framework kperf
+kdebug: OTHER_CFLAGS += test_utils.c
-EXCLUDED_SOURCES += drop_priv.c kperf_helpers.c xnu_quick_test_helpers.c memorystatus_assertion_helpers.c bpflib.c in_cksum.c
+EXCLUDED_SOURCES += drop_priv.c kperf_helpers.c xnu_quick_test_helpers.c memorystatus_assertion_helpers.c bpflib.c in_cksum.c test_utils.c
ifneq ($(PLATFORM),iPhoneOS)
EXCLUDED_SOURCES += jumbo_va_spaces_28530648.c perf_compressor.c memorystatus_freeze_test.c vm/entitlement_increased_memory_limit.c
memorystatus_is_assertion: OTHER_LDFLAGS += -ldarwintest_utils
memorystatus_is_assertion: OTHER_CFLAGS += memorystatus_assertion_helpers.c
+memorystatus_vm_map_fork: OTHER_CFLAGS += test_utils.c
+
shared_cache_tests: OTHER_LDFLAGS += -ldarwintest_utils
stackshot_tests: OTHER_CFLAGS += -Wno-objc-messaging-id
kperf_backtracing: OTHER_LDFLAGS += -framework CoreSymbolication
kperf_backtracing: CODE_SIGN_ENTITLEMENTS = kernel_symbolication_entitlements.plist
+text_corruption: OTHER_LDFLAGS += -ldarwintest_utils
+CUSTOM_TARGETS += text_corruption_helper
+
+text_corruption_helper:
+ $(CC) $(LDFLAGS) $(CFLAGS) text_corruption_helper.c -lm -o $(SYMROOT)/$@;
+ env CODESIGN_ALLOCATE=$(CODESIGN_ALLOCATE) $(CODESIGN) --force --sign - --timestamp=none $(SYMROOT)/$@;
+
+install-text_corruption_helper:
+ mkdir -p $(INSTALLDIR)
+ cp $(SYMROOT)/text_corruption_helper $(INSTALLDIR)/
+
kevent_qos: OTHER_CFLAGS += -Wno-unused-macros
kevent_qos: OTHER_CFLAGS += -I $(OBJROOT)/
$(CXX) $(DT_CXXFLAGS) $(OTHER_CXXFLAGS) $(CXXFLAGS) $(DT_LDFLAGS) $(OTHER_LDFLAGS) $(LDFLAGS) $< -o $(SYMROOT)/$@
priority_queue: OTHER_CXXFLAGS += -std=c++17
+zalloc_buddy: OTHER_CFLAGS += -Wno-format-pedantic
os_refcnt: OTHER_CFLAGS += -I$(SRCROOT)/../libkern/ -Wno-gcc-compat -Wno-undef -O3 -flto
-task_inspect: CODE_SIGN_ENTITLEMENTS = task_inspect.entitlements
-task_inspect: OTHER_CFLAGS += -DENTITLED=1
+kernel_inspection: CODE_SIGN_ENTITLEMENTS = ./task_for_pid_entitlement.plist
+kernel_inspection: OTHER_CFLAGS += -DENTITLED=1
turnstile_multihop: OTHER_CFLAGS += -Wno-unused-macros
turnstile_multihop: OTHER_CFLAGS += -I $(OBJROOT)/
xnu_quick_test: OTHER_CFLAGS += xnu_quick_test_helpers.c
-xnu_quick_test_entitled: CODE_SIGN_ENTITLEMENTS = xnu_quick_test.entitlements
-
CUSTOM_TARGETS += vm_set_max_addr_helper
vm_set_max_addr_helper: vm_set_max_addr_helper.c
task_info_28439149: CODE_SIGN_ENTITLEMENTS = ./task_for_pid_entitlement.plist
-inspect_port: CODE_SIGN_ENTITLEMENTS = ./task_for_pid_entitlement.plist
+read_inspect: CODE_SIGN_ENTITLEMENTS = ./task_for_pid_entitlement.plist
proc_info: CODE_SIGN_ENTITLEMENTS = ./task_for_pid_entitlement.plist
proc_info: OTHER_LDFLAGS += -ldarwintest_utils
thread_group_set_32261625: OTHER_LDFLAGS = -framework ktrace
task_info: CODE_SIGN_ENTITLEMENTS = task_for_pid_entitlement.plist
+task_info: OTHER_CFLAGS += test_utils.c
+
+extract_right_soft_fail: CODE_SIGN_ENTITLEMENTS = task_for_pid_entitlement.plist
ifneq ($(PLATFORM),iPhoneOS)
EXCLUDED_SOURCES += task_vm_info_decompressions.c
prng: OTHER_LDFLAGS += -ldarwintest_utils
preoslog: OTHER_LDFLAGS += -ldarwintest_utils
+preoslog: OTHER_CFLAGS += test_utils.c
task_policy: CODE_SIGN_ENTITLEMENTS = ./task_policy_entitlement.plist
EXCLUDED_SOURCES += vm/kern_max_task_pmem.c
endif
-EXCLUDED_SOURCES += vm/perf_helpers.c
+EXCLUDED_SOURCES += benchmark/helpers.c
+
+perf_vmfault: OTHER_CFLAGS += benchmark/helpers.c
fault_throughput: vm/fault_throughput.c
mkdir -p $(SYMROOT)/vm
$(CC) $(DT_CFLAGS) $(OTHER_CFLAGS) $(CFLAGS) $(DT_LDFLAGS) $(OTHER_LDFLAGS) $(LDFLAGS) $< -o $(SYMROOT)/vm/$@
-fault_throughput: OTHER_CFLAGS += vm/perf_helpers.c
+fault_throughput: OTHER_CFLAGS += benchmark/helpers.c
install-fault_throughput: fault_throughput
mkdir -p $(INSTALLDIR)/vm
perf_madvise: vm/perf_madvise.c
mkdir -p $(SYMROOT)/vm
$(CC) $(DT_CFLAGS) $(OTHER_CFLAGS) $(CFLAGS) $(DT_LDFLAGS) $(OTHER_LDFLAGS) $(LDFLAGS) $< -o $(SYMROOT)/vm/$@
-perf_madvise: OTHER_CFLAGS += vm/perf_helpers.c
+perf_madvise: OTHER_CFLAGS += benchmark/helpers.c
install-perf_madvise: perf_madvise
mkdir -p $(INSTALLDIR)/vm
cp $(SYMROOT)/vm/perf_madvise $(INSTALLDIR)/vm/
task_create_suid_cred_unentitled: task_create_suid_cred.c
$(CC) $(DT_CFLAGS) $(OTHER_CFLAGS) $(CFLAGS) $(DT_LDFLAGS) $(OTHER_LDFLAGS) $(LDFLAGS) $< -o $(SYMROOT)/$@
-ifeq ($(PLATFORM),MacOSX)
-test_dext_launch_56101852: OTHER_LDFLAGS += -framework CoreFoundation -framework IOKit
-test_dext_launch_56101852: CODE_SIGN_ENTITLEMENTS += test_dext_launch_56101852.entitlements
-else
-EXCLUDED_SOURCES += test_dext_launch_56101852.c
-endif
-
ioconnectasyncmethod_57641955: OTHER_LDFLAGS += -framework IOKit
ifeq ($(PLATFORM),BridgeOS)
test_sysctl_kern_procargs_25397314: OTHER_LDFLAGS += -framework Foundation -ldarwintest_utils
+INCLUDED_TEST_SOURCE_DIRS += counter
+
+EXCLUDED_SOURCES += counter/common.c
+counter/counter: OTHER_CFLAGS += counter/common.c test_utils.c
+counter/counter: OTHER_LDFLAGS += -ldarwintest_utils -ldarwintest
+
+counter/benchmark: counter/benchmark.c
+ mkdir -p $(SYMROOT)/counter
+ $(CC) $(DT_CFLAGS) $(OTHER_CFLAGS) $(CFLAGS) $(OTHER_LDFLAGS) $(LDFLAGS) $< -o $(SYMROOT)/$@
+ env CODESIGN_ALLOCATE=$(CODESIGN_ALLOCATE) $(CODESIGN) --force --sign - --timestamp=none $(SYMROOT)/$@;
+
+counter/benchmark: OTHER_CFLAGS += counter/common.c benchmark/helpers.c
+
+install-counter/benchmark: counter/benchmark
+ mkdir -p $(INSTALLDIR)/counter
+ cp $(SYMROOT)/counter/benchmark $(INSTALLDIR)/counter/
+
+counter/benchmark_benchrun:
+ mkdir -p $(SYMROOT)/counter
+ cp $(SRCROOT)/counter/benchmark.lua $(SYMROOT)/counter/benchmark.lua
+ chmod +x $(SYMROOT)/counter/benchmark.lua
+
+install-counter/benchmark_benchrun: counter/benchmark_benchrun
+ mkdir -p $(INSTALLDIR)/counter
+ cp $(SYMROOT)/counter/benchmark.lua $(INSTALLDIR)/counter/
+ chmod +x $(INSTALLDIR)/counter/benchmark.lua
+
+CUSTOM_TARGETS += counter/benchmark counter/benchmark_benchrun
+EXCLUDED_SOURCES += counter/benchmark.c
+
+ifneq ($(PLATFORM),MacOSX)
+EXCLUDED_SOURCES += vm/page_size_globals.c
+else
+vm/page_size_globals: INVALID_ARCHS = arm64 arm64e
+endif
+
+INCLUDED_TEST_SOURCE_DIRS += lockf_uaf_poc
+
include $(DEVELOPER_DIR)/AppleInternal/Makefiles/darwintest/Makefile.targets
+
+trial_experiments: CODE_SIGN_ENTITLEMENTS = trial_experiments.entitlements
+trial_experiments: OTHER_CFLAGS += -DENTITLED=1 test_utils.c drop_priv.c
+trial_experiments: trial_experiments.c
+ $(CC) $(DT_CFLAGS) $(OTHER_CFLAGS) $(CFLAGS) $(DT_LDFLAGS) $(OTHER_LDFLAGS) $(LDFLAGS) $< -o $(SYMROOT)/$@
+ env CODESIGN_ALLOCATE=$(CODESIGN_ALLOCATE) $(CODESIGN) --force --sign - --timestamp=none --entitlements $(CODE_SIGN_ENTITLEMENTS) $(SYMROOT)/$@;
+
+trial_experiments_unentitled: OTHER_CFLAGS += drop_priv.c test_utils.c
+trial_experiments_unentitled: trial_experiments.c
+ $(CC) $(DT_CFLAGS) $(OTHER_CFLAGS) $(CFLAGS) $(DT_LDFLAGS) $(OTHER_LDFLAGS) $(LDFLAGS) $< -o $(SYMROOT)/$@
#include <mach/mach_error.h>
#include <mach/mach_host.h>
+#include "drop_priv.h"
+
T_GLOBAL_META(T_META_NAMESPACE("xnu.debugging"));
/*
*/
#define LIBTRACE_PRIVATE_DATA 0x01000000
-extern void drop_priv(void);
-
static bool _needs_reset;
static uint32_t _original;
--- /dev/null
+#include <assert.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/sysctl.h>
+
+#include <sys/mman.h>
+
+#include "benchmark/helpers.h"
+
+#define K_CTIME_BUFFER_LEN 26
+void
+benchmark_log(bool verbose, const char *restrict fmt, ...)
+{
+ time_t now;
+ char time_buffer[K_CTIME_BUFFER_LEN];
+ struct tm local_time;
+ va_list args;
+ if (verbose) {
+ strncpy(time_buffer, "UNKNOWN", K_CTIME_BUFFER_LEN);
+
+ now = time(NULL);
+ if (now != -1) {
+ struct tm* ret = localtime_r(&now, &local_time);
+ if (ret == &local_time) {
+ snprintf(time_buffer, K_CTIME_BUFFER_LEN,
+ "%.2d/%.2d/%.2d %.2d:%.2d:%.2d",
+ local_time.tm_mon + 1, local_time.tm_mday,
+ local_time.tm_year + 1900,
+ local_time.tm_hour, local_time.tm_min,
+ local_time.tm_sec);
+ }
+ }
+
+ printf("%s: ", time_buffer);
+ va_start(args, fmt);
+ vprintf(fmt, args);
+ fflush(stdout);
+ }
+}
+
+uint64_t
+timespec_difference_us(const struct timespec* a, const struct timespec* b)
+{
+ assert(a->tv_sec >= b->tv_sec || a->tv_nsec >= b->tv_nsec);
+ long seconds_elapsed = a->tv_sec - b->tv_sec;
+ uint64_t nsec_elapsed;
+ if (b->tv_nsec > a->tv_nsec) {
+ seconds_elapsed--;
+ nsec_elapsed = kNumNanosecondsInSecond - (uint64_t) (b->tv_nsec - a->tv_nsec);
+ } else {
+ nsec_elapsed = (uint64_t) (a->tv_nsec - b->tv_nsec);
+ }
+ return (uint64_t) seconds_elapsed * kNumMicrosecondsInSecond + nsec_elapsed / kNumNanosecondsInMicrosecond;
+}
+
+unsigned char *
+mmap_buffer(size_t memsize)
+{
+ int fd = -1;
+ unsigned char* addr = (unsigned char *)mmap(NULL, memsize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE,
+ fd, 0);
+ if ((void*) addr == MAP_FAILED) {
+ fprintf(stderr, "Unable to mmap a memory object: %s\n", strerror(errno));
+ exit(2);
+ }
+ return addr;
+}
+
+int
+get_ncpu(void)
+{
+ int ncpu;
+ size_t length = sizeof(ncpu);
+
+ int ret = sysctlbyname("hw.ncpu", &ncpu, &length, NULL, 0);
+ if (ret == -1) {
+ fprintf(stderr, "failed to query hw.ncpu");
+ exit(1);
+ }
+ return ncpu;
+}
--- /dev/null
+#ifndef BENCHMARK_PERF_HELPERS_H
+#define BENCHMARK_PERF_HELPERS_H
+
+/*
+ * Utility functions and constants used by perf tests.
+ */
+#include <inttypes.h>
+#include <time.h>
+#include <stdbool.h>
+
+/*
+ * mmap an anonymous chunk of memory.
+ */
+unsigned char *mmap_buffer(size_t size);
+/*
+ * Returns a - b in microseconds.
+ * NB: a must be >= b
+ */
+uint64_t timespec_difference_us(const struct timespec* a, const struct timespec* b);
+/*
+ * Print the message to stdout along with the current time.
+ * Also flushes stdout so that the log can help detect hangs. Don't call
+ * this function from within the measured portion of the benchmark as it will
+ * pollute your measurement.
+ *
+ * NB: Will only log if verbose == true.
+ */
+void benchmark_log(bool verbose, const char *restrict fmt, ...) __attribute__((format(printf, 2, 3)));
+
+static const uint64_t kNumMicrosecondsInSecond = 1000UL * 1000;
+static const uint64_t kNumNanosecondsInMicrosecond = 1000UL;
+static const uint64_t kNumNanosecondsInSecond = kNumNanosecondsInMicrosecond * kNumMicrosecondsInSecond;
+/* Get a (wall-time) timestamp in nanoseconds */
+#define current_timestamp_ns() (clock_gettime_nsec_np(CLOCK_MONOTONIC_RAW));
+
+int get_ncpu(void);
+
+#endif /* !defined(BENCHMARK_PERF_HELPERS_H) */
--- /dev/null
+/* Per-cpu counter microbenchmarks. */
+
+#include <assert.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include <stdatomic.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/types.h>
+#include <sys/sysctl.h>
+
+#include "benchmark/helpers.h"
+#include "counter/common.h"
+
+typedef enum test_variant {
+ VARIANT_SCALABLE_COUNTER,
+ VARIANT_ATOMIC,
+ VARIANT_RACY
+} test_variant_t;
+
+static const char* kScalableCounterArgument = "scalable";
+static const char* kAtomicCounterArgument = "atomic";
+static const char* kRacyCounterArgument = "racy";
+
+static const int64_t kChunkSize = 100000000;
+
+/* Arguments parsed from the command line */
+typedef struct test_args {
+ size_t n_threads;
+ unsigned long long num_writes;
+ test_variant_t variant;
+ bool verbose;
+} test_args_t;
+
+typedef struct {
+ char _padding1[128];
+ atomic_bool tg_test_start;
+ atomic_ullong tg_num_writes_remaining;
+ atomic_ullong tg_threads_ready;
+ test_args_t tg_args;
+ uint64_t tg_start_time;
+ uint64_t tg_end_time;
+ uint64_t tg_start_value;
+ uint64_t tg_end_value;
+ char _padding2[128];
+} test_globals_t;
+
+static void parse_arguments(int argc, char** argv, test_args_t *args);
+static const char *get_sysctl_name_for_test_variant(test_variant_t variant);
+static void *writer(void *);
+static uint64_t counter_read(test_variant_t);
+
+int
+main(int argc, char** argv)
+{
+ test_globals_t globals = {0};
+ pthread_t* threads = NULL;
+ int ret;
+ int is_development_kernel;
+ size_t is_development_kernel_size = sizeof(is_development_kernel);
+ pthread_attr_t pthread_attrs;
+ uint64_t duration, writes_stored;
+ double writes_per_second;
+ double loss;
+
+ if (sysctlbyname("kern.development", &is_development_kernel,
+ &is_development_kernel_size, NULL, 0) != 0 || !is_development_kernel) {
+ fprintf(stderr, "%s requires the development kernel\n", argv[0]);
+ exit(1);
+ }
+
+ parse_arguments(argc, argv, &(globals.tg_args));
+ atomic_store(&(globals.tg_num_writes_remaining), globals.tg_args.num_writes);
+
+ threads = malloc(sizeof(pthread_t) * globals.tg_args.n_threads);
+ assert(threads);
+ ret = pthread_attr_init(&pthread_attrs);
+ assert(ret == 0);
+ ret = init_scalable_counter_test();
+ assert(ret == 0);
+ globals.tg_start_value = counter_read(globals.tg_args.variant);
+ for (size_t i = 0; i < globals.tg_args.n_threads; i++) {
+ ret = pthread_create(threads + i, &pthread_attrs, writer, &globals);
+ assert(ret == 0);
+ }
+ for (size_t i = 0; i < globals.tg_args.n_threads; i++) {
+ ret = pthread_join(threads[i], NULL);
+ assert(ret == 0);
+ }
+ ret = fini_scalable_counter_test();
+ assert(ret == 0);
+ globals.tg_end_value = counter_read(globals.tg_args.variant);
+
+ duration = globals.tg_end_time - globals.tg_start_time;
+ printf("-----Results-----\n");
+ printf("rate,loss\n");
+ writes_per_second = globals.tg_args.num_writes / ((double) duration / kNumNanosecondsInSecond);
+ writes_stored = globals.tg_end_value - globals.tg_start_value;
+ loss = (1.0 - ((double) writes_stored / globals.tg_args.num_writes)) * 100;
+ printf("%.4f,%.4f\n", writes_per_second, loss);
+ return 0;
+}
+
+static void *
+writer(void *arg)
+{
+ int ret;
+ const char* sysctl_name;
+ test_globals_t *globals = arg;
+ int64_t value = kChunkSize;
+ //size_t size = sizeof(value);
+
+ sysctl_name = get_sysctl_name_for_test_variant(globals->tg_args.variant);
+ assert(sysctl_name != NULL);
+
+ if (atomic_fetch_add(&(globals->tg_threads_ready), 1) == globals->tg_args.n_threads - 1) {
+ globals->tg_start_time = current_timestamp_ns();
+ atomic_store(&globals->tg_test_start, true);
+ }
+ while (!atomic_load(&(globals->tg_test_start))) {
+ ;
+ }
+
+ while (true) {
+ unsigned long long remaining = atomic_fetch_sub(&(globals->tg_num_writes_remaining), value);
+ if (remaining < kChunkSize || remaining > globals->tg_args.num_writes) {
+ break;
+ }
+
+ ret = sysctlbyname(sysctl_name, NULL, NULL, &value, sizeof(value));
+ assert(ret == 0);
+ if (remaining == kChunkSize || remaining - kChunkSize > remaining) {
+ break;
+ }
+ }
+
+ if (atomic_fetch_sub(&(globals->tg_threads_ready), 1) == 1) {
+ globals->tg_end_time = current_timestamp_ns();
+ }
+
+ return NULL;
+}
+
+static const char*
+get_sysctl_name_for_test_variant(test_variant_t variant)
+{
+ switch (variant) {
+ case VARIANT_SCALABLE_COUNTER:
+ return "kern.scalable_counter_write_benchmark";
+ case VARIANT_ATOMIC:
+ return "kern.scalable_counter_atomic_counter_write_benchmark";
+ case VARIANT_RACY:
+ return "kern.scalable_counter_racy_counter_benchmark";
+ default:
+ return NULL;
+ }
+}
+
+static const char*
+get_sysctl_load_name_for_test_variant(test_variant_t variant)
+{
+ switch (variant) {
+ case VARIANT_SCALABLE_COUNTER:
+ return "kern.scalable_counter_test_load";
+ case VARIANT_ATOMIC:
+ return "kern.scalable_counter_atomic_counter_load";
+ case VARIANT_RACY:
+ return "kern.scalable_counter_racy_counter_load";
+ default:
+ return NULL;
+ }
+}
+
+static uint64_t
+counter_read(test_variant_t variant)
+{
+ const char *sysctl_name = get_sysctl_load_name_for_test_variant(variant);
+ int result;
+ uint64_t value;
+ size_t size = sizeof(value);
+ result = sysctlbyname(sysctl_name, &value, &size, NULL, 0);
+ assert(result == 0);
+ return value;
+}
+
+static void
+print_help(char** argv)
+{
+ fprintf(stderr, "%s: <test-variant> [-v] num_writes num_threads\n", argv[0]);
+ fprintf(stderr, "\ntest variants:\n");
+ fprintf(stderr, " %s Benchmark scalable counters.\n", kScalableCounterArgument);
+ fprintf(stderr, " %s Benchmark single atomic counter.\n", kAtomicCounterArgument);
+ fprintf(stderr, " %s Benchmark racy counter.\n", kRacyCounterArgument);
+}
+
+static void
+parse_arguments(int argc, char** argv, test_args_t *args)
+{
+ int current_argument = 1;
+ memset(args, 0, sizeof(test_args_t));
+ if (argc < 4 || argc > 6) {
+ print_help(argv);
+ exit(1);
+ }
+ if (argv[current_argument][0] == '-') {
+ if (strcmp(argv[current_argument], "-v") == 0) {
+ args->verbose = true;
+ } else {
+ fprintf(stderr, "Unknown argument %s\n", argv[current_argument]);
+ print_help(argv);
+ exit(1);
+ }
+ current_argument++;
+ }
+ if (strncasecmp(argv[current_argument], kScalableCounterArgument, strlen(kScalableCounterArgument)) == 0) {
+ args->variant = VARIANT_SCALABLE_COUNTER;
+ } else if (strncasecmp(argv[current_argument], kAtomicCounterArgument, strlen(kAtomicCounterArgument)) == 0) {
+ args->variant = VARIANT_ATOMIC;
+ } else if (strncasecmp(argv[current_argument], kRacyCounterArgument, strlen(kRacyCounterArgument)) == 0) {
+ args->variant = VARIANT_RACY;
+ } else {
+ print_help(argv);
+ exit(1);
+ }
+ current_argument++;
+
+ long num_writes = strtol(argv[current_argument++], NULL, 10);
+ if (num_writes == 0) {
+ print_help(argv);
+ exit(1);
+ }
+ long num_cores = strtol(argv[current_argument++], NULL, 10);
+ if (num_cores == 0) {
+ print_help(argv);
+ exit(1);
+ }
+ assert(num_cores > 0 && num_cores <= get_ncpu());
+ args->n_threads = (unsigned int) num_cores;
+ args->num_writes = (unsigned long long) num_writes;
+}
--- /dev/null
+#!/usr/local/bin/recon
+require 'strict'
+
+local benchrun = require 'benchrun'
+local perfdata = require 'perfdata'
+local sysctl = require 'sysctl'
+local csv = require 'csv'
+
+local kDefaultNumWrites = 10000000000
+
+local benchmark = benchrun.new {
+ name = 'xnu.per_cpu_counter',
+ version = 1,
+ arg = arg,
+ modify_argparser = function(parser)
+ parser:argument{
+ name = 'path',
+ description = 'Path to benchmark binary'
+ }
+ parser:option{
+ name = '--cpu-workers',
+ description = 'Number of cpu workers'
+ }
+ parser:flag{
+ name = '--through-max-workers',
+ description = 'Run benchmark for [1..n] cpu workers'
+ }
+ parser:flag{
+ name = '--through-max-workers-fast',
+ description = 'Run benchmark for [1..2] and each power of four value in [4..n] cpu workers'
+ }
+ parser:option {
+ name = "--num-writes",
+ description = "number of writes",
+ default = kDefaultNumWrites
+ }
+ parser:option{
+ name = '--variant',
+ description = 'Which benchmark variant to run (scalable, atomic, or racy)',
+ default = 'scalable',
+ choices = {"scalable", "atomic", "racy"}
+ }
+ end
+}
+
+assert(benchmark.opt.path, "No path supplied for fault throughput binary")
+
+local ncpus, err = sysctl('hw.logicalcpu_max')
+assert(ncpus > 0, 'invalid number of logical cpus')
+local cpu_workers = tonumber(benchmark.opt.cpu_workers) or ncpus
+
+local writes_per_second = perfdata.unit.custom('writes/sec')
+local tests = {}
+
+function QueueTest(num_cores)
+ table.insert(tests, {
+ path = benchmark.opt.path,
+ num_cores = num_cores,
+ })
+end
+
+if benchmark.opt.through_max_workers then
+ for i = 1, cpu_workers do
+ QueueTest(i)
+ end
+elseif benchmark.opt.through_max_workers_fast then
+ local i = 1
+ while i <= cpu_workers do
+ QueueTest(i)
+ -- Always do a run with two threads to see what the first part of
+ -- the scaling curve looks like
+ -- (and to measure perf on dual core systems).
+ if i == 1 and cpu_workers >= 2 then
+ QueueTest(i + 1)
+ end
+ i = i * 4
+ end
+else
+ QueueTest(cpu_workers)
+end
+
+for _, test in ipairs(tests) do
+ local args = {test.path, benchmark.opt.variant, benchmark.opt.num_writes, test.num_cores,
+ echo = true}
+ for out in benchmark:run(args) do
+ local result = out:match("-----Results-----\n(.*)")
+ benchmark:assert(result, "Unable to find result data in output")
+ local data = csv.openstring(result, {header = true})
+ for field in data:lines() do
+ for k, v in pairs(field) do
+ local unit = writes_per_second
+ local larger_better = true
+ if k == "loss" then
+ unit = percentage
+ larger_better = false
+ end
+ benchmark.writer:add_value(k, unit, tonumber(v), {
+ [perfdata.larger_better] = larger_better,
+ threads = test.num_cores,
+ variant = benchmark.opt.variant
+ })
+ end
+ end
+ end
+end
+
+benchmark:finish()
--- /dev/null
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <sys/kern_sysctl.h>
+
+#include "counter/common.h"
+
+int
+init_scalable_counter_test()
+{
+ kern_return_t result;
+ int value = 1;
+
+ result = sysctlbyname("kern.scalable_counter_test_start", NULL, NULL, &value, sizeof(value));
+ return result;
+}
+
+int
+fini_scalable_counter_test()
+{
+ kern_return_t result;
+ int value = 1;
+ result = sysctlbyname("kern.scalable_counter_test_finish", NULL, NULL, &value, sizeof(value));
+ return result;
+}
--- /dev/null
+#ifndef _COUNTER_COMMON_H
+#define _COUNTER_COMMON_H
+
+int init_scalable_counter_test(void);
+int fini_scalable_counter_test(void);
+
+#endif /* !defined(_COUNTER_COMMON_H) */
--- /dev/null
+#include <stdatomic.h>
+#include <sys/kern_sysctl.h>
+
+#include <darwintest_utils.h>
+#include <darwintest.h>
+
+#include "counter/common.h"
+#include "test_utils.h"
+
+static unsigned int ncpu(void);
+
+static uint64_t
+sysctl_read(const char *name)
+{
+ int result;
+ uint64_t value;
+ size_t size = sizeof(value);
+ result = sysctlbyname(name, &value, &size, NULL, 0);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(result, "Read from %s", name);
+ return value;
+}
+
+static void
+sysctl_write(const char* name, int64_t amount)
+{
+ kern_return_t result;
+ result = sysctlbyname(name, NULL, NULL, &amount, sizeof(int64_t));
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(result, "Write to %s", name);
+}
+
+static void
+scalable_counter_add(int64_t amount)
+{
+ sysctl_write("kern.scalable_counter_test_add", amount);
+}
+
+static void
+static_scalable_counter_add(int64_t amount)
+{
+ sysctl_write("kern.static_scalable_counter_test_add", amount);
+}
+
+static int64_t
+scalable_counter_load(void)
+{
+ return (int64_t) sysctl_read("kern.scalable_counter_test_load");
+}
+
+static int64_t
+static_scalable_counter_load(void)
+{
+ return (int64_t) sysctl_read("kern.static_scalable_counter_test_load");
+}
+
+/*
+ * A background thread that bangs on the percpu counter and then exits.
+ * @param num_iterations How many times to bang on the counter. Each iteration makes the counter
+ * bigger by 100.
+ */
+static void*
+background_scalable_counter_thread(void* num_iterations_ptr)
+{
+ int64_t i, num_iterations;
+ num_iterations = (int64_t)(num_iterations_ptr);
+ for (i = 0; i < num_iterations; i++) {
+ scalable_counter_add(-25);
+ scalable_counter_add(75);
+ scalable_counter_add(-100);
+ scalable_counter_add(150);
+ }
+ atomic_thread_fence(memory_order_release);
+ return 0;
+}
+
+static
+void
+darwin_test_fini_scalable_counter_test()
+{
+ int ret = fini_scalable_counter_test();
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "fini_scalable_counter_test");
+}
+
+static
+void
+darwin_test_setup(void)
+{
+ T_SETUPBEGIN;
+ int dev_kernel = is_development_kernel();
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(dev_kernel, "sysctlbyname kern.development");
+ if (is_development_kernel() != 1) {
+ T_SKIP("Skipping test on non development kernel.");
+ }
+ init_scalable_counter_test();
+ T_SETUPEND;
+ T_ATEND(darwin_test_fini_scalable_counter_test);
+}
+
+T_DECL(test_scalable_counters_single_threaded, "Test single threaded operations on scalable_counters", T_META_ASROOT(true))
+{
+ static int64_t kNumIterations = 100, i, expected_value = 0;
+ darwin_test_setup();
+ T_QUIET; T_EXPECT_EQ(scalable_counter_load(), 0LL, "Counter starts at zero");
+
+ /* Simple add, subtract, and read */
+ scalable_counter_add(1);
+ T_QUIET; T_EXPECT_EQ(scalable_counter_load(), 1LL, "0 + 1 == 1");
+ scalable_counter_add(-1);
+ T_QUIET; T_EXPECT_EQ(scalable_counter_load(), 0LL, "1 - 1 == 0");
+ for (i = 0; i < kNumIterations; i++) {
+ scalable_counter_add(i);
+ expected_value += i;
+ }
+ for (i = 0; i < kNumIterations / 2; i++) {
+ scalable_counter_add(-i);
+ expected_value -= i;
+ }
+ T_QUIET; T_EXPECT_EQ(scalable_counter_load(), expected_value, "Counter value is correct.");
+ T_END;
+}
+
+T_DECL(test_static_counter, "Test staticly declared counter", T_META_ASROOT(true))
+{
+ static size_t kNumIterations = 100;
+ int64_t start_value;
+ darwin_test_setup();
+ start_value = static_scalable_counter_load();
+ for (size_t i = 0; i < kNumIterations; i++) {
+ static_scalable_counter_add(1);
+ }
+ T_QUIET; T_EXPECT_EQ(static_scalable_counter_load(), (long long) kNumIterations + start_value, "Counter value is correct");
+ T_END;
+}
+
+T_DECL(test_scalable_counters_multithreaded, "Test multi-threaded operations on scalable_counters", T_META_ASROOT(true))
+{
+ unsigned int kNumThreads = ncpu() * 5;
+ int ret;
+ int64_t i;
+ pthread_attr_t pthread_attr;
+ pthread_t *threads;
+
+ darwin_test_setup();
+
+ threads = malloc(sizeof(pthread_t) * kNumThreads);
+ T_QUIET; T_ASSERT_NOTNULL(threads, "Out of memory");
+
+ ret = pthread_attr_init(&pthread_attr);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "pthread_attr_init");
+
+ int64_t expected_value = 0;
+ for (i = 0; i < kNumThreads; i++) {
+ ret = pthread_create(&threads[i], &pthread_attr, background_scalable_counter_thread, (void*)(i));
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "pthread_create");
+ expected_value += 100 * i;
+ }
+
+ for (i = 0; i < kNumThreads; i++) {
+ void *exit_code;
+ ret = pthread_join(threads[i], &exit_code);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "pthread_join");
+ T_QUIET; T_ASSERT_EQ((ptrdiff_t) exit_code, (ptrdiff_t) 0, "Background thread exited sucessfully.");
+ }
+ atomic_thread_fence(memory_order_acquire);
+
+ T_QUIET; T_EXPECT_EQ(scalable_counter_load(), expected_value, "Counter value is correct.");
+
+ ret = pthread_attr_destroy(&pthread_attr);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "pthread_attr_destroy");
+ free(threads);
+}
+
+static unsigned int
+ncpu()
+{
+ kern_return_t result;
+ int ncpu;
+ size_t size = sizeof(ncpu);
+ result = sysctlbyname("hw.ncpu", &ncpu, &size, NULL, 0);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(result, "hw.npu");
+ return (unsigned int) ncpu;
+}
* Test to validate that we can schedule threads on all hw.ncpus cores according to _os_cpu_number
*
* <rdar://problem/29545645>
+ * <rdar://problem/30445216>
*
* xcrun -sdk macosx.internal clang -o cpucount cpucount.c -ldarwintest -g -Weverything
* xcrun -sdk iphoneos.internal clang -arch arm64 -o cpucount-ios cpucount.c -ldarwintest -g -Weverything
+ * xcrun -sdk macosx.internal clang -o cpucount cpucount.c -ldarwintest -arch arm64e -Weverything
*/
#include <darwintest.h>
#include <stdio.h>
#include <stdlib.h>
-#include <stdbool.h>
-#include <stdalign.h>
#include <unistd.h>
-#include <assert.h>
#include <pthread.h>
-#include <err.h>
-#include <errno.h>
-#include <sysexits.h>
#include <sys/sysctl.h>
-#include <stdatomic.h>
+#include <sys/proc_info.h>
+#include <libproc.h>
#include <mach/mach.h>
#include <mach/mach_time.h>
#include <os/tsd.h> /* private header for _os_cpu_number */
-T_GLOBAL_META(T_META_RUN_CONCURRENTLY(true));
+T_GLOBAL_META(
+ T_META_RUN_CONCURRENTLY(false),
+ T_META_BOOTARGS_SET("enable_skstb=1"),
+ T_META_CHECK_LEAKS(false),
+ T_META_ASROOT(true),
+ T_META_ALL_VALID_ARCHS(true)
+ );
-/* const variables aren't constants, but enums are */
-enum { max_threads = 40 };
+#define KERNEL_BOOTARGS_MAX_SIZE 1024
+static char kernel_bootargs[KERNEL_BOOTARGS_MAX_SIZE];
-#define CACHE_ALIGNED __attribute__((aligned(128)))
-
-static _Atomic CACHE_ALIGNED uint64_t g_ready_threads = 0;
-
-static _Atomic CACHE_ALIGNED bool g_cpu_seen[max_threads];
-
-static _Atomic CACHE_ALIGNED bool g_bail = false;
-
-static uint32_t g_threads; /* set by sysctl hw.ncpu */
-
-static uint64_t g_spin_ms = 50; /* it takes ~50ms of spinning for CLPC to deign to give us all cores */
-
-/*
- * sometimes pageout scan can eat all of CPU 0 long enough to fail the test,
- * so we run the test at RT priority
- */
-static uint32_t g_thread_pri = 97;
-
-/*
- * add in some extra low-pri threads to convince the amp scheduler to use E-cores consistently
- * works around <rdar://problem/29636191>
- */
-static uint32_t g_spin_threads = 2;
-static uint32_t g_spin_threads_pri = 20;
-
-static semaphore_t g_readysem, g_go_sem;
+#define KERNEL_VERSION_MAX_SIZE 1024
+static char kernel_version[KERNEL_VERSION_MAX_SIZE];
static mach_timebase_info_data_t timebase_info;
static uint64_t
-nanos_to_abs(uint64_t nanos)
+abs_to_nanos(uint64_t abs)
{
- return nanos * timebase_info.denom / timebase_info.numer;
+ return abs * timebase_info.numer / timebase_info.denom;
}
-static void
-set_realtime(pthread_t thread)
-{
- kern_return_t kr;
- thread_time_constraint_policy_data_t pol;
-
- mach_port_t target_thread = pthread_mach_thread_np(thread);
- T_QUIET; T_ASSERT_NOTNULL(target_thread, "pthread_mach_thread_np");
-
- /* 1s 100ms 10ms */
- pol.period = (uint32_t)nanos_to_abs(1000000000);
- pol.constraint = (uint32_t)nanos_to_abs(100000000);
- pol.computation = (uint32_t)nanos_to_abs(10000000);
-
- pol.preemptible = 0; /* Ignored by OS */
- kr = thread_policy_set(target_thread, THREAD_TIME_CONSTRAINT_POLICY, (thread_policy_t) &pol,
- THREAD_TIME_CONSTRAINT_POLICY_COUNT);
- T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "thread_policy_set(THREAD_TIME_CONSTRAINT_POLICY)");
-}
-
-static pthread_t
-create_thread(void *(*start_routine)(void *), uint32_t priority)
+static int32_t
+get_csw_count()
{
+ struct proc_taskinfo taskinfo;
int rv;
- pthread_t new_thread;
- pthread_attr_t attr;
-
- struct sched_param param = { .sched_priority = (int)priority };
-
- rv = pthread_attr_init(&attr);
- T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_attr_init");
-
- rv = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
- T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_attr_setdetachstate");
-
- rv = pthread_attr_setschedparam(&attr, ¶m);
- T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_attr_setschedparam");
-
- rv = pthread_create(&new_thread, &attr, start_routine, NULL);
- T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_create");
-
- if (priority == 97) {
- set_realtime(new_thread);
- }
- rv = pthread_attr_destroy(&attr);
- T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_attr_destroy");
+ rv = proc_pidinfo(getpid(), PROC_PIDTASKINFO, 0, &taskinfo, sizeof(taskinfo));
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "PROC_PIDTASKINFO");
- return new_thread;
+ return taskinfo.pti_csw;
}
-static void *
-thread_fn(__unused void *arg)
+// noinline hopefully keeps the optimizer from hoisting it out of the loop
+// until rdar://68253516 is fixed.
+__attribute__((noinline))
+static uint32_t
+fixed_os_cpu_number(void)
{
- T_QUIET; T_EXPECT_TRUE(true, "initialize darwintest on this thread");
-
- kern_return_t kr;
-
- kr = semaphore_wait_signal(g_go_sem, g_readysem);
- T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
+ uint32_t cpu_number = _os_cpu_number();
- /* atomic inc to say hello */
- g_ready_threads++;
+ return cpu_number;
+}
- uint64_t timeout = nanos_to_abs(g_spin_ms * NSEC_PER_MSEC) + mach_absolute_time();
- /*
- * spin to force the other threads to spread out across the cores
- * may take some time if cores are masked and CLPC needs to warm up to unmask them
- */
- while (g_ready_threads < g_threads && mach_absolute_time() < timeout) {
- ;
- }
+T_DECL(count_cpus, "Tests we can schedule bound threads on all hw.ncpus cores and that _os_cpu_number matches")
+{
+ int rv;
- T_QUIET; T_ASSERT_GE(timeout, mach_absolute_time(), "waiting for all threads took too long");
+ setvbuf(stdout, NULL, _IONBF, 0);
+ setvbuf(stderr, NULL, _IONBF, 0);
- timeout = nanos_to_abs(g_spin_ms * NSEC_PER_MSEC) + mach_absolute_time();
+ /* Validate what kind of kernel we're on */
+ size_t kernel_version_size = sizeof(kernel_version);
+ rv = sysctlbyname("kern.version", kernel_version, &kernel_version_size, NULL, 0);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "kern.version");
- int iteration = 0;
- uint32_t cpunum = 0;
+ T_LOG("kern.version: %s\n", kernel_version);
- /* search for new CPUs for the duration */
- while (mach_absolute_time() < timeout) {
- cpunum = _os_cpu_number();
+ /* Double check that darwintest set the boot arg we requested */
+ size_t kernel_bootargs_size = sizeof(kernel_bootargs);
+ rv = sysctlbyname("kern.bootargs", kernel_bootargs, &kernel_bootargs_size, NULL, 0);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "kern.bootargs");
- assert(cpunum < max_threads);
+ T_LOG("kern.bootargs: %s\n", kernel_bootargs);
- g_cpu_seen[cpunum] = true;
+ if (NULL == strstr(kernel_bootargs, "enable_skstb=1")) {
+ T_FAIL("enable_skstb=1 boot-arg is missing");
+ }
- if (iteration++ % 10000) {
- uint32_t cpus_seen = 0;
+ kern_return_t kr;
+ kr = mach_timebase_info(&timebase_info);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_timebase_info");
- for (uint32_t i = 0; i < g_threads; i++) {
- if (g_cpu_seen[i]) {
- cpus_seen++;
- }
- }
+ int bound_cpu_out = 0;
+ size_t bound_cpu_out_size = sizeof(bound_cpu_out);
+ rv = sysctlbyname("kern.sched_thread_bind_cpu", &bound_cpu_out, &bound_cpu_out_size, NULL, 0);
- /* bail out early if we saw all CPUs */
- if (cpus_seen == g_threads) {
- break;
- }
+ if (rv == -1) {
+ if (errno == ENOENT) {
+ T_FAIL("kern.sched_thread_bind_cpu doesn't exist, must set enable_skstb=1 boot-arg on development kernel");
+ }
+ if (errno == EPERM) {
+ T_FAIL("must run as root");
}
}
- g_bail = true;
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "read kern.sched_thread_bind_cpu");
+ T_QUIET; T_ASSERT_EQ(bound_cpu_out, -1, "kern.sched_thread_bind_cpu should exist, start unbound");
- printf("thread cpunum: %d\n", cpunum);
+ struct sched_param param = {.sched_priority = 63};
- kr = semaphore_wait_signal(g_go_sem, g_readysem);
- T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
+ rv = pthread_setschedparam(pthread_self(), SCHED_FIFO, ¶m);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_setschedparam");
- return NULL;
-}
+ uint32_t sysctl_ncpu = 0;
+ size_t ncpu_size = sizeof(sysctl_ncpu);
+ rv = sysctlbyname("hw.ncpu", &sysctl_ncpu, &ncpu_size, NULL, 0);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "sysctlbyname(hw.ncpu)");
-static void *
-spin_fn(__unused void *arg)
-{
- T_QUIET; T_EXPECT_TRUE(true, "initialize darwintest on this thread");
+ T_LOG("hw.ncpu: %2d\n", sysctl_ncpu);
- kern_return_t kr;
+ T_ASSERT_GT(sysctl_ncpu, 0, "at least one CPU exists");
- kr = semaphore_wait_signal(g_go_sem, g_readysem);
- T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
+ for (uint32_t cpu_to_bind = 0; cpu_to_bind < sysctl_ncpu; cpu_to_bind++) {
+ int32_t before_csw_count = get_csw_count();
+ T_LOG("(csw %4d) attempting to bind to cpu %2d\n", before_csw_count, cpu_to_bind);
- uint64_t timeout = nanos_to_abs(g_spin_ms * NSEC_PER_MSEC * 2) + mach_absolute_time();
+ uint64_t start = mach_absolute_time();
- /*
- * run and sleep a bit to force some scheduler churn to get all the cores active
- * needed to work around bugs in the amp scheduler
- */
- while (mach_absolute_time() < timeout && g_bail == false) {
- usleep(500);
+ rv = sysctlbyname("kern.sched_thread_bind_cpu", NULL, 0, &cpu_to_bind, sizeof(cpu_to_bind));
- uint64_t inner_timeout = nanos_to_abs(1 * NSEC_PER_MSEC) + mach_absolute_time();
+ uint64_t end = mach_absolute_time();
- while (mach_absolute_time() < inner_timeout && g_bail == false) {
- ;
+ if (rv == -1 && errno == ENOTSUP) {
+ T_SKIP("Binding is available, but this process doesn't support binding (e.g. Rosetta on Aruba)");
}
- }
- kr = semaphore_wait_signal(g_go_sem, g_readysem);
- T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "kern.sched_thread_bind_cpu(%u)", cpu_to_bind);
- return NULL;
-}
+ uint32_t os_cpu_number_reported = fixed_os_cpu_number();
+ bound_cpu_out = 0;
+ rv = sysctlbyname("kern.sched_thread_bind_cpu", &bound_cpu_out, &bound_cpu_out_size, NULL, 0);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "read kern.sched_thread_bind_cpu");
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wgnu-flexible-array-initializer"
-T_DECL(count_cpus, "Tests we can schedule threads on all hw.ncpus cores according to _os_cpu_number",
- T_META_CHECK_LEAKS(false), T_META_ENABLED(false))
-#pragma clang diagnostic pop
-{
- setvbuf(stdout, NULL, _IONBF, 0);
- setvbuf(stderr, NULL, _IONBF, 0);
+ T_QUIET; T_EXPECT_EQ((int)cpu_to_bind, bound_cpu_out,
+ "should report bound cpu id matching requested bind target");
- int rv;
- kern_return_t kr;
- kr = mach_timebase_info(&timebase_info);
- T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_timebase_info");
-
- kr = semaphore_create(mach_task_self(), &g_readysem, SYNC_POLICY_FIFO, 0);
- T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_create");
-
- kr = semaphore_create(mach_task_self(), &g_go_sem, SYNC_POLICY_FIFO, 0);
- T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_create");
-
- size_t ncpu_size = sizeof(g_threads);
- rv = sysctlbyname("hw.ncpu", &g_threads, &ncpu_size, NULL, 0);
- T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "sysctlbyname(hw.ncpu)");
-
- printf("hw.ncpu: %2d\n", g_threads);
+ uint64_t delta_abs = end - start;
+ uint64_t delta_ns = abs_to_nanos(delta_abs);
- assert(g_threads < max_threads);
+ int32_t after_csw_count = get_csw_count();
- for (uint32_t i = 0; i < g_threads; i++) {
- create_thread(&thread_fn, g_thread_pri);
- }
-
- for (uint32_t i = 0; i < g_spin_threads; i++) {
- create_thread(&spin_fn, g_spin_threads_pri);
- }
+ T_LOG("(csw %4d) bound to cpu %2d in %f milliseconds\n",
+ after_csw_count, cpu_to_bind,
+ ((double)delta_ns / 1000000.0));
- for (uint32_t i = 0; i < g_threads + g_spin_threads; i++) {
- kr = semaphore_wait(g_readysem);
- T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait");
- }
+ if (cpu_to_bind > 0) {
+ T_QUIET; T_EXPECT_LT(before_csw_count, after_csw_count,
+ "should have had to context switch to execute the bind");
+ }
- uint64_t timeout = nanos_to_abs(g_spin_ms * NSEC_PER_MSEC) + mach_absolute_time();
+ T_LOG("cpu %2d reported id %2d\n",
+ cpu_to_bind, os_cpu_number_reported);
- /* spin to warm up CLPC :) */
- while (mach_absolute_time() < timeout) {
- ;
+ T_QUIET;
+ T_EXPECT_EQ(cpu_to_bind, os_cpu_number_reported,
+ "should report same CPU number as was bound to");
}
- kr = semaphore_signal_all(g_go_sem);
- T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_signal_all");
+ int unbind = -1; /* pass -1 in order to unbind the thread */
- for (uint32_t i = 0; i < g_threads + g_spin_threads; i++) {
- kr = semaphore_wait(g_readysem);
- T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait");
- }
+ rv = sysctlbyname("kern.sched_thread_bind_cpu", NULL, 0, &unbind, sizeof(unbind));
- uint32_t cpus_seen = 0;
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "kern.sched_thread_bind_cpu(%u)", unbind);
- for (uint32_t i = 0; i < g_threads; i++) {
- if (g_cpu_seen[i]) {
- cpus_seen++;
- }
+ rv = sysctlbyname("kern.sched_thread_bind_cpu", &bound_cpu_out, &bound_cpu_out_size, NULL, 0);
- printf("cpu %2d: %d\n", i, g_cpu_seen[i]);
- }
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "read kern.sched_thread_bind_cpu");
+ T_QUIET; T_ASSERT_EQ(bound_cpu_out, -1, "thread should be unbound at the end");
- T_ASSERT_EQ(cpus_seen, g_threads, "test should have run threads on all CPUS");
+ T_PASS("test has run threads on all CPUS");
}
input_struct_count, outputs, output_count, NULL, NULL
);
if (io_result != kIOReturnSuccess) {
- T_LOG("%s: call to AppleKeyStore method %d failed", __func__);
+ T_LOG("%s: call to AppleKeyStore method %d failed", __func__, command);
goto close;
}
static size_t
kern_memory_failure_handler(
+ __unused mach_port_t task,
+ __unused mach_port_t thread,
exception_type_t exception,
mach_exception_data_t code)
{
--- /dev/null
+#include <stdio.h>
+#include <fcntl.h>
+#include <util.h>
+#include <unistd.h>
+#include <darwintest.h>
+
+T_DECL(dev_zero,
+ "test reading from /dev/zero",
+ T_META_ASROOT(false))
+{
+ int dev = opendev("/dev/zero", O_RDONLY, NULL, NULL);
+ char buffer[100];
+
+ for (int i = 0; i < 100; i++) {
+ buffer[i] = 0xff;
+ }
+
+ int rd_sz = read(dev, buffer, sizeof(buffer));
+
+ T_EXPECT_EQ(rd_sz, 100, "read from /dev/zero failed");
+
+ for (int i = 0; i < 100; i++) {
+ if (buffer[i]) {
+ T_FAIL("Unexpected non-zero character read from /dev/zero");
+ }
+ }
+
+ close(dev);
+}
-PROJECT := xnu/darwintests
-
ifdef BASEDSTROOT
override DSTROOT = $(BASEDSTROOT)
endif
-INVALID_ARCHS = i386
-ENABLE_LTE_TESTS=YES
-
-OTHER_LTE_INCLUDE_FILES += \
- /System/Library/PrivateFrameworks/LoggingSupport.framework, \
- /System/Library/PrivateFrameworks/MobileKeyBag.framework, \
- /System/Library/Frameworks/IOSurface.framework, \
- /usr/local/lib/libdarwintest_utils.dylib, \
- /usr/lib/libapple_crypto.dylib,
-
-DEVELOPER_DIR ?= $(shell xcode-select -p)
# the xnu build system will only ever call us with the default target
.DEFAULT_GOAL := install
-SDKROOT ?= driverkit.internal
-
-include $(DEVELOPER_DIR)/AppleInternal/Makefiles/darwintest/Makefile.common
-
-DRIVERKIT_DIR := $(TARGETSDK)/System/DriverKit
-DRIVERKIT_TARGET := x86_64-apple-driverkit$(shell xcrun --sdk driverkit.internal --show-sdk-version)
-
-IIG := $(shell xcrun --sdk "$(SDKROOT)" -f iig)
-
-# Enumerate all directories in this folder, excluding the "build" directory
-DEXT_SRCS = $(filter-out build,$(shell find . -type d -depth 1 | sed -e "s:./::g"))
-
-# hack: reuse the default CXXFLAGS and LDFLAGS but remove -mmacosx-version-min and -arch. Also adds a few other required flags
-# These are used for both iig and clang
-DEXT_SHARED_CXXFLAGS := $(filter-out -mmacosx-version-min=%, $(shell echo $(CXXFLAGS) $(OTHER_CXXFLAGS) | sed -e "s/-arch [a-zA-Z0-9_]*//g")) -isystem$(DRIVERKIT_DIR)/usr/include -iframework$(DRIVERKIT_DIR)/System/Library/Frameworks -std=gnu++14
-
-# These are used just for clang
-DEXT_CXXFLAGS := $(DEXT_SHARED_CXXFLAGS) -target $(DRIVERKIT_TARGET)
-
-# These are used just for iig
-IIGFLAGS := -- $(DEXT_SHARED_CXXFLAGS) -D__IIG=1 -x c++
-
-# Used just for clang. LDFLAGS are not needed for iig
-DEXT_LDFLAGS := $(filter-out -mmacosx-version-min=%, $(shell echo $(LDFLAGS) $(OTHER_LDFLAGS) | sed -e "s/-arch [a-zA-Z0-9_]*//g")) -target $(DRIVERKIT_TARGET) -L$(DRIVERKIT_DIR)/usr/lib -F$(DRIVERKIT_DIR)/System/Library/Frameworks -framework DriverKit
+install:
+ mkdir -p $(DSTROOT)/AppleInternal
-# This generates rules to create dexts from each directory specified in DEXT_SRCS
-define GENERATE_DEXT_RULE
-## Given the following directory structure:
-## test_driver_123/
-## Info.plist
-## test_driver_123.entitlements
-## [cpp and iig files]
-## This produces a dext called com.apple.test_driver_123.dext:
-## com.apple.test_driver_123.dext/
-## com.apple.test_driver_123 [dext executable]
-## Info.plist
-## _CodeSignature/
-
-CUSTOM_TARGETS += com.apple.$1.dext
-
-com.apple.$1.dext : $(patsubst $1/%.cpp,$(OBJROOT)/$1/%.o,$(wildcard $1/*.cpp)) $(patsubst $1/%.iig,$(OBJROOT)/$1/DerivedSources/%.iig.o,$(wildcard $1/*.iig))
- # Create bundle directory
- mkdir -p $(SYMROOT)/$$@
- # Link object files
- $(CXX) $(DEXT_LDFLAGS) $$^ -o $(SYMROOT)/$$@/com.apple.$1
- # Copy Info.plist and sign
- cp $1/Info.plist $(SYMROOT)/$$@
- codesign -vvv --force --sign - --entitlements $1/$1.entitlements --timestamp=none $(SYMROOT)/$$@
-
-install-com.apple.$1.dext: com.apple.$1.dext
- mkdir -p $(INSTALLDIR)
- cp -R $(SYMROOT)/com.apple.$1.dext $(INSTALLDIR)
-
-$(OBJROOT)/$1/DerivedSources/%.iig.o: $(OBJROOT)/$1/DerivedSources/%.iig.cpp
- mkdir -p $(OBJROOT)/$1/DerivedSources
- # Compile *.iig.cpp to object file
- $(CXX) $(DEXT_CXXFLAGS) -I$1/ -I$(OBJROOT)/$1/DerivedSources -c $$^ -o $$@
-
-$(OBJROOT)/$1/DerivedSources/%.iig.cpp: $1/%.iig
- mkdir -p $(OBJROOT)/$1/DerivedSources
- # Generate *.iig.cpp and *.h header files from *.iig
- $(IIG) --def $$^ --impl $$@ --header $$(patsubst %.iig.cpp,%.h,$$@) $(IIGFLAGS)
-
-# Tell make not to delete the intermediate *.iig.cpp file since it is useful for debugging
-.PRECIOUS :: $(OBJROOT)/$1/DerivedSources/%.iig.cpp
-
-$(OBJROOT)/$1/%.o: $1/%.cpp $(patsubst $1/%.iig,$(OBJROOT)/$1/DerivedSources/%.iig.o,$(wildcard $1/*.iig))
- # Compile c++ file. The additional dependency is for headers emitted by iig
- $(CXX) $(DEXT_CXXFLAGS) -I$1/ -I$(OBJROOT)/$1/DerivedSources -c $$< -o $$@
-endef
-
-
-ifeq ($(PLATFORM),MacOSX)
-$(foreach DEXTSRCDIR,$(DEXT_SRCS),$(eval $(call GENERATE_DEXT_RULE,$(DEXTSRCDIR))))
-else
-EXCLUDED_SOURCES += $(DEXT_SRCS)
-endif
-
-include $(DEVELOPER_DIR)/AppleInternal/Makefiles/darwintest/Makefile.targets
+++ /dev/null
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-<dict>
- <key>BuildMachineOSBuild</key>
- <string>19A582a</string>
- <key>CFBundleDevelopmentRegion</key>
- <string>en</string>
- <key>CFBundleExecutable</key>
- <string>com.apple.test_intentionally_crashing_driver_56101852</string>
- <key>CFBundleIdentifier</key>
- <string>com.apple.test_intentionally_crashing_driver_56101852</string>
- <key>CFBundleInfoDictionaryVersion</key>
- <string>6.0</string>
- <key>CFBundleName</key>
- <string>com.apple.test_intentionally_crashing_driver_56101852</string>
- <key>CFBundlePackageType</key>
- <string>DEXT</string>
- <key>CFBundleShortVersionString</key>
- <string>1.0</string>
- <key>CFBundleSupportedPlatforms</key>
- <array>
- <string>MacOSX</string>
- </array>
- <key>CFBundleVersion</key>
- <string>1</string>
- <key>DTCompiler</key>
- <string>com.apple.compilers.llvm.clang.1_0</string>
- <key>DTPlatformBuild</key>
- <string>12A5026a</string>
- <key>DTPlatformName</key>
- <string>macosx</string>
- <key>DTPlatformVersion</key>
- <string>10.16</string>
- <key>DTSDKBuild</key>
- <string></string>
- <key>DTSDKName</key>
- <string>driverkit.macosx20.0</string>
- <key>DTXcode</key>
- <string>1200</string>
- <key>DTXcodeBuild</key>
- <string>12A5026a</string>
- <key>IOKitPersonalities</key>
- <dict>
- <key>test_intentionally_crashing_driver_56101852</key>
- <dict>
- <key>CFBundleIdentifier</key>
- <string>com.apple.test_intentionally_crashing_driver_56101852</string>
- <key>CFBundleIdentifierKernel</key>
- <string>com.apple.kpi.iokit</string>
- <key>IOClass</key>
- <string>IOUserService</string>
- <key>IOMatchCategory</key>
- <string>com.apple.test_intentionally_crashing_driver_56101852</string>
- <key>IOProviderClass</key>
- <string>IOUserResources</string>
- <key>IOResourceMatch</key>
- <string>IOKit</string>
- <key>IOUserClass</key>
- <string>test_intentionally_crashing_driver_56101852</string>
- <key>IOUserServerName</key>
- <string>com.apple.test_intentionally_crashing_driver_56101852</string>
- </dict>
- </dict>
- <key>OSBundleUsageDescription</key>
- <string></string>
- <key>OSMinimumDriverKitVersion</key>
- <string>20.0</string>
-</dict>
-</plist>
+++ /dev/null
-//
-// test_intentionally_crashing_driver_56101852.cpp
-// test_intentionally_crashing_driver_56101852
-//
-// Copyright © 2019 Apple Inc. All rights reserved.
-//
-
-#include <os/log.h>
-
-#include <DriverKit/IOUserServer.h>
-#include <DriverKit/IOLib.h>
-
-#include "test_intentionally_crashing_driver_56101852.h"
-
-kern_return_t
-IMPL(test_intentionally_crashing_driver_56101852, Start)
-{
- kern_return_t ret;
- ret = Start(provider, SUPERDISPATCH);
- os_log(OS_LOG_DEFAULT, "Hello World");
- return ret;
-}
-
-/* Intentionally crash */
-__attribute__((constructor)) void
-crash()
-{
- /* cause SIGILL */
- __builtin_trap();
-}
+++ /dev/null
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-<dict>
- <key>com.apple.developer.driverkit</key>
- <true/>
- <key>com.apple.security.app-sandbox</key>
- <true/>
-</dict>
-</plist>
+++ /dev/null
-//
-// test_intentionally_crashing_driver_56101852.iig
-// test_intentionally_crashing_driver_56101852
-//
-// Copyright © 2019 Apple Inc. All rights reserved.
-//
-
-#ifndef test_intentionally_crashing_driver_56101852_h
-#define test_intentionally_crashing_driver_56101852_h
-
-#include <Availability.h>
-#include <DriverKit/IOService.iig>
-
-class test_intentionally_crashing_driver_56101852: public IOService
-{
-public:
- virtual kern_return_t
- Start(IOService * provider) override;
-};
-
-#endif /* test_intentionally_crashing_driver_56101852_h */
#include <uuid/uuid.h>
#endif
+#include "drop_priv.h"
+
#if TARGET_OS_OSX
#define INVOKER_UID "SUDO_UID"
#define INVOKER_GID "SUDO_GID"
}
#endif /* TARGET_OS_OSX */
-void
-drop_priv(void);
void
drop_priv(void)
{
--- /dev/null
+#ifndef __DROP_PRIV_H
+#define __DROP_PRIV_H
+
+void drop_priv(void);
+
+#endif /* __DROP_PRIV_H */
__builtin_unreachable();
}
+/**
+ * This has to be defined for linking purposes, but it's unused.
+ */
+kern_return_t
+catch_mach_exception_raise_state(
+ mach_port_t exception_port,
+ exception_type_t type,
+ exception_data_t codes,
+ mach_msg_type_number_t code_count,
+ int *flavor,
+ thread_state_t in_state,
+ mach_msg_type_number_t in_state_count,
+ thread_state_t out_state,
+ mach_msg_type_number_t *out_state_count)
+{
+#pragma unused(exception_port, type, codes, code_count, flavor, in_state, in_state_count, out_state, out_state_count)
+ T_FAIL("Triggered catch_mach_exception_raise_state() which shouldn't happen...");
+ __builtin_unreachable();
+}
+
/**
* Called by mach_exc_server() to handle the exception. This will call the
* test's exception-handler callback and will then modify
* the thread state to move to the next instruction.
*/
kern_return_t
-catch_mach_exception_raise_state(
+catch_mach_exception_raise_state_identity(
mach_port_t exception_port __unused,
+ mach_port_t thread,
+ mach_port_t task,
exception_type_t type,
exception_data_t codes,
mach_msg_type_number_t code_count,
T_ASSERT_EQ(*flavor, EXCEPTION_THREAD_STATE, "The thread state flavor is EXCEPTION_THREAD_STATE");
T_ASSERT_EQ(in_state_count, EXCEPTION_THREAD_STATE_COUNT, "The thread state count is EXCEPTION_THREAD_STATE_COUNT");
- size_t advance_pc = exc_handler_callback(type, codes_64);
+ size_t advance_pc = exc_handler_callback(task, thread, type, codes_64);
/**
* Increment the PC by the requested amount so the thread doesn't cause
pc = ptrauth_sign_unauthenticated(pc, ptrauth_key_function_pointer, 0);
arm_thread_state64_set_pc_fptr(*state, pc);
#else
+ (void)advance_pc;
T_FAIL("catch_mach_exception_raise_state() not fully implemented on this architecture");
__builtin_unreachable();
#endif
return KERN_SUCCESS;
}
-/**
- * This has to be defined for linking purposes, but it's unused.
- */
-kern_return_t
-catch_mach_exception_raise_state_identity(
- mach_port_t exception_port,
- mach_port_t thread,
- mach_port_t task,
- exception_type_t type,
- exception_data_t codes,
- mach_msg_type_number_t code_count,
- int *flavor,
- thread_state_t in_state,
- mach_msg_type_number_t in_state_count,
- thread_state_t out_state,
- mach_msg_type_number_t *out_state_count)
-{
-#pragma unused(exception_port, thread, task, type, codes, code_count, flavor, in_state, in_state_count, out_state, out_state_count)
- T_FAIL("Triggered catch_mach_exception_raise_state_identity() which shouldn't happen...");
- __builtin_unreachable();
-}
-
mach_port_t
create_exception_port(exception_mask_t exception_mask)
{
thread,
exception_mask,
exc_port,
- (exception_behavior_t)(EXCEPTION_STATE | MACH_EXCEPTION_CODES),
+ (exception_behavior_t)(EXCEPTION_STATE_IDENTITY | MACH_EXCEPTION_CODES),
EXCEPTION_THREAD_STATE);
T_ASSERT_MACH_SUCCESS(kr, "Set the exception port to my custom handler");
* Callback invoked by run_exception_handler() when a Mach exception is
* received.
*
- * @param type exception type received from the kernel
- * @param codes exception codes received from the kernel
+ * @param task the task causing the exception
+ * @param thread the task causing the exception
+ * @param type exception type received from the kernel
+ * @param codes exception codes received from the kernel
*
* @return how much the exception handler should advance the program
* counter, in bytes (in order to move past the code causing the
* exception)
*/
-typedef size_t (*exc_handler_callback_t)(exception_type_t type, mach_exception_data_t codes);
+typedef size_t (*exc_handler_callback_t)(mach_port_t task, mach_port_t thread,
+ exception_type_t type, mach_exception_data_t codes);
mach_port_t
create_exception_port(exception_mask_t exception_mask);
--- /dev/null
+#include <darwintest.h>
+#include <mach/mach.h>
+#include <mach/mach_types.h>
+#include <mach/task.h>
+#include <mach/thread_act.h>
+#include <mach_debug/ipc_info.h>
+
+T_GLOBAL_META(
+ T_META_NAMESPACE("xnu.ipc"),
+ T_META_RUN_CONCURRENTLY(true));
+
+T_DECL(exception_ports_info, "Test {task, thread}_get_exception_ports_info")
+{
+ kern_return_t kr;
+ mach_port_t exc_port1, exc_port2, exc_port3;
+
+ mach_msg_type_number_t count = EXC_TYPES_COUNT;
+ exception_mask_t masks[EXC_TYPES_COUNT];
+ ipc_info_port_t ports_info[EXC_TYPES_COUNT];
+ exception_behavior_t behaviors[EXC_TYPES_COUNT];
+ thread_state_flavor_t flavors[EXC_TYPES_COUNT];
+
+ mach_msg_type_number_t count2 = EXC_TYPES_COUNT;
+ exception_mask_t masks2[EXC_TYPES_COUNT];
+ mach_port_t ports[EXC_TYPES_COUNT];
+ exception_behavior_t behaviors2[EXC_TYPES_COUNT];
+ thread_state_flavor_t flavors2[EXC_TYPES_COUNT];
+
+ unsigned int exc_port1_kotype = 0, exc_port1_kaddr = 0;
+ unsigned int exc_port2_kotype = 0, exc_port2_kaddr = 0;
+ unsigned int kotype = 0, kobject = 0, exc_port3_kotype = 0, exc_port3_kaddr = 0;
+ boolean_t found_exc_port1 = false;
+ boolean_t found_exc_port2 = false;
+ boolean_t found_exc_port3 = false;
+
+ ipc_info_space_t info_space;
+ ipc_info_name_array_t table;
+ ipc_info_tree_name_array_t tree;
+ mach_msg_type_number_t tblcnt = 0, treecnt = 0;
+
+ /* Create the mach port the exception messages will be sent to. */
+ kr = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &exc_port1);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "Allocated mach exception port");
+ kr = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &exc_port2);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "Allocated mach exception port");
+ kr = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &exc_port3);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "Allocated mach exception port");
+
+ /*
+ * Insert a send right into the exception port that the kernel will use to
+ * send the exception thread the exception messages.
+ */
+ kr = mach_port_insert_right(mach_task_self(), exc_port1, exc_port1, MACH_MSG_TYPE_MAKE_SEND);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "Inserted a SEND right into the exception port");
+ kr = mach_port_insert_right(mach_task_self(), exc_port2, exc_port2, MACH_MSG_TYPE_MAKE_SEND);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "Inserted a SEND right into the exception port");
+ kr = mach_port_insert_right(mach_task_self(), exc_port3, exc_port3, MACH_MSG_TYPE_MAKE_SEND);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "Inserted a SEND right into the exception port");
+
+ T_LOG("exc_port1: 0x%x", exc_port1);
+ T_LOG("exc_port2: 0x%x", exc_port2);
+ T_LOG("exc_port3: 0x%x", exc_port3);
+
+ /* Tell the kernel what port to send exceptions to. */
+ kr = task_set_exception_ports(
+ mach_task_self(),
+ EXC_MASK_GUARD,
+ exc_port1,
+ (exception_behavior_t)(EXCEPTION_STATE_IDENTITY | MACH_EXCEPTION_CODES),
+ THREAD_STATE_NONE);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "Set the exception port to my custom handler");
+
+ kr = task_set_exception_ports(
+ mach_task_self(),
+ EXC_MASK_RPC_ALERT, /* why can't be EXC_CRASH or EXC_MASK_CORPSE_NOTIFY ? */
+ exc_port2,
+ (exception_behavior_t)(EXCEPTION_STATE_IDENTITY | MACH_EXCEPTION_CODES),
+ THREAD_STATE_NONE);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "Set the exception port to my custom handler");
+
+ kr = task_set_exception_ports(
+ mach_task_self(),
+ EXC_MASK_RESOURCE | EXC_MASK_BREAKPOINT | EXC_MASK_SYSCALL,
+ exc_port3,
+ (exception_behavior_t)(EXCEPTION_STATE_IDENTITY | MACH_EXCEPTION_CODES),
+ THREAD_STATE_NONE);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "Set the exception port to my custom handler");
+
+ /* now, get exception ports info */
+ kr = thread_get_exception_ports(mach_thread_self(), EXC_MASK_ALL, masks2, &count2, ports, behaviors2, flavors2);
+ T_EXPECT_MACH_SUCCESS(kr, "thread_get_exception_ports(): 0x%x", kr);
+ T_EXPECT_EQ(count2, 0, "should have 0 exception ports");
+
+ kr = thread_get_exception_ports_info(mach_thread_self(), EXC_MASK_ALL, masks, &count, ports_info, behaviors, flavors);
+ T_EXPECT_MACH_SUCCESS(kr, "thread_get_exception_ports_info(): 0x%x", kr);
+ T_EXPECT_EQ(count, 0, "should have 0 exception ports");
+
+ count = EXC_TYPES_COUNT;
+ count2 = EXC_TYPES_COUNT;
+
+ kr = task_get_exception_ports_info(mach_task_self(), EXC_MASK_ALL, masks, &count, ports_info, behaviors, flavors);
+ T_EXPECT_MACH_SUCCESS(kr, "task_get_exception_ports_info(): 0x%x", kr);
+ T_EXPECT_EQ(count, 4, "should have 4 masks"); /* Returns 3 if one exc_port registers for EXC_CRASH */
+
+ /* get exception ports */
+ kr = task_get_exception_ports(mach_task_self(), EXC_MASK_ALL, masks2, &count2, ports, behaviors2, flavors2);
+ T_EXPECT_MACH_SUCCESS(kr, "task_get_exception_ports(): 0x%x", kr);
+
+ for (int i = 0; i < count2; i++) {
+ T_LOG("exception port name: 0x%x", ports[i]);
+ }
+ T_EXPECT_EQ(count, count2, "should return same mask count");
+
+ kr = memcmp(masks, masks2, count * sizeof(exception_mask_t));
+ T_EXPECT_EQ(kr, 0, "masks should be the same");
+
+ kr = memcmp(behaviors, behaviors2, count * sizeof(exception_behavior_t));
+ T_EXPECT_EQ(kr, 0, "behaviors should be the same");
+
+ kr = memcmp(flavors, flavors, count * sizeof(thread_state_flavor_t));
+ T_EXPECT_EQ(kr, 0, "flavors should be the same");
+
+ kr = mach_port_kernel_object(mach_task_self(), mach_task_self(), &kotype, &kobject);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_port_kernel_object(): 0x%x", kr);
+ T_LOG("task_self kobject: 0x%x", kobject);
+
+ T_QUIET; T_EXPECT_MACH_SUCCESS(mach_port_space_info(mach_task_self(), &info_space, &table,
+ &tblcnt, &tree, &treecnt), "mach_port_space_info(): 0x%x", kr);
+
+ for (int i = 0; i < tblcnt; i++) {
+ if (table[i].iin_name == exc_port1) {
+ exc_port1_kaddr = table[i].iin_object;
+ }
+ if (table[i].iin_name == exc_port2) {
+ exc_port2_kaddr = table[i].iin_object;
+ }
+ if (table[i].iin_name == exc_port3) {
+ exc_port3_kaddr = table[i].iin_object;
+ }
+ }
+
+ T_LOG("exc_port_1_kaddr: 0x%x", exc_port1_kaddr);
+ T_LOG("exc_port_2_kaddr: 0x%x", exc_port2_kaddr);
+ T_LOG("exc_port_3_kaddr: 0x%x", exc_port3_kaddr);
+
+ for (int i = 0; i < count; i++) {
+ T_LOG("ports_info[%d].iip_port_object: 0x%x", i, ports_info[i].iip_port_object);
+
+ if (ports_info[i].iip_port_object == exc_port1_kaddr) {
+ T_EXPECT_NE(ports_info[i].iip_port_object, 0,
+ "on debug/kernel, port object should be non-zero: 0x%x", ports_info[i].iip_port_object);
+ T_EXPECT_EQ(ports_info[i].iip_receiver_object, kobject,
+ "receiver object should match task self kobject: 0x%x", ports_info[i].iip_receiver_object);
+ T_EXPECT_EQ(masks[i], EXC_MASK_GUARD, "check if mask for exc_port1 is correct");
+ found_exc_port1 = true;
+ }
+ if (ports_info[i].iip_port_object == exc_port2_kaddr) {
+ T_EXPECT_NE(ports_info[i].iip_port_object, 0,
+ "on debug/kernel, port object should be non-zero: 0x%x", ports_info[i].iip_port_object);
+ T_EXPECT_EQ(ports_info[i].iip_receiver_object, kobject,
+ "receiver object should match task self kobject: 0x%x", ports_info[i].iip_receiver_object);
+ T_EXPECT_EQ(masks[i], EXC_MASK_RPC_ALERT, "check if mask for exc_port2 is correct");
+ found_exc_port2 = true;
+ }
+ if (ports_info[i].iip_port_object == exc_port3_kaddr) {
+ T_EXPECT_NE(ports_info[i].iip_port_object, 0,
+ "on debug/kernel, port object should be non-zero: 0x%x", ports_info[i].iip_port_object);
+ T_EXPECT_EQ(ports_info[i].iip_receiver_object, kobject,
+ "receiver object should match task self kobject: 0x%x", ports_info[i].iip_receiver_object);
+ T_EXPECT_EQ(masks[i], EXC_MASK_RESOURCE | EXC_MASK_BREAKPOINT | EXC_MASK_SYSCALL, "check if mask for exc_port3 is correct");
+ found_exc_port3 = true;
+ }
+ }
+
+ T_EXPECT_TRUE(found_exc_port1, "should find exc_port1");
+ T_EXPECT_TRUE(found_exc_port2, "should find exc_port2");
+ T_EXPECT_TRUE(found_exc_port3, "should find exc_port3");
+}
--- /dev/null
+#include <darwintest.h>
+#include <pthread/private.h>
+#include <sys/sysctl.h>
+#include "exc_helpers.h"
+
+T_GLOBAL_META(
+ T_META_NAMESPACE("xnu.ipc"),
+ T_META_RUN_CONCURRENTLY(true));
+
+static size_t
+exc_immovable_handler(
+ mach_port_t task,
+ mach_port_t thread,
+ __unused exception_type_t type,
+ __unused mach_exception_data_t codes)
+{
+ T_EXPECT_EQ(task, mach_task_self(), "Received immovable task port");
+ T_EXPECT_EQ(thread, pthread_mach_thread_np(pthread_main_thread_np()),
+ "Received immovable thread port");
+ T_END;
+}
+
+T_DECL(exc_immovable, "Test that exceptions receive immovable ports")
+{
+ mach_port_t exc_port = create_exception_port(EXC_MASK_BAD_ACCESS);
+ uint32_t opts = 0;
+ size_t size = sizeof(&opts);
+ mach_port_t mp;
+ kern_return_t kr;
+
+ T_LOG("Check if task_exc_guard exception has been enabled\n");
+ int ret = sysctlbyname("kern.ipc_control_port_options", &opts, &size, NULL, 0);
+ T_EXPECT_POSIX_SUCCESS(ret, "sysctlbyname(kern.ipc_control_port_options)");
+
+ if ((opts & 0x30) == 0) {
+ T_SKIP("immovable rights aren't enabled");
+ }
+
+ kr = task_get_special_port(mach_task_self(), TASK_KERNEL_PORT, &mp);
+ T_EXPECT_MACH_SUCCESS(kr, "task_get_special_port");
+ T_EXPECT_NE(mp, mach_task_self(), "should receive movable port");
+
+ /*
+ * do not deallocate the port we received on purpose to check
+ * that the exception will not coalesce with the movable port
+ * we have in our space now
+ */
+
+ run_exception_handler(exc_port, exc_immovable_handler);
+ *(void *volatile*)0 = 0;
+}
--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+ <key>com.apple.security.get-movable-control-port</key>
+ <true/>
+</dict>
+</plist>
--- /dev/null
+#include <assert.h>
+#include <stdio.h>
+#include <pthread.h>
+#include <signal.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/wait.h>
+
+#include <darwintest.h>
+
+// rdar://58566604
+// Exercise races of signal delivery vs exec in multi-threaded processes
+
+T_GLOBAL_META(T_META_NAMESPACE("xnu.exec"),
+ T_META_CHECK_LEAKS(false),
+ T_META_ALL_VALID_ARCHS(true));
+
+enum { KILL_ONCE, KILL_MANY, KILL_LAST } kill_mode;
+enum { EXEC_FIRST, EXEC_SECOND, EXEC_LAST } exec_mode;
+
+static int fd[2];
+
+static void
+do_exec(void)
+{
+ char echo_arg[50] = "";
+
+ snprintf(echo_arg, sizeof(echo_arg), " Child[%d] says hello after exec", getpid());
+
+ char * new_argv[] = {
+ "/bin/echo",
+ echo_arg,
+ NULL
+ };
+
+ int ret = execv(new_argv[0], new_argv);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "execv()");
+}
+
+static void*
+thread_main(void* arg)
+{
+ T_LOG("mode: %d, %d: Child[%d] created second thread\n",
+ kill_mode, exec_mode, getpid());
+
+ if (exec_mode == EXEC_SECOND) {
+ int ret = dprintf(fd[1], "Hi!");
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "dprintf()");
+ do_exec();
+ }
+
+ while (1) {
+ }
+ return NULL;
+}
+
+void
+run_test(void)
+{
+ T_LOG("mode: %d, %d: Parent[%d]: forking\n",
+ kill_mode, exec_mode, getpid());
+
+ pid_t child_pid = fork();
+
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(child_pid, "fork()");
+
+ int ret = 0;
+
+ if (child_pid == 0) {
+ pthread_t thread;
+ ret = pthread_create(&thread, NULL, thread_main, NULL);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "pthread_create()");
+
+ if (exec_mode == EXEC_FIRST) {
+ ret = dprintf(fd[1], "Hi!");
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "dprintf()");
+
+ do_exec();
+ }
+
+ while (1) {
+ }
+ } else {
+ char buffer[4] = "";
+ ret = read(fd[0], buffer, sizeof(buffer));
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "read()");
+
+ T_LOG("mode: %d, %d: Parent[%d]: got: '%s' from execing child, trying to kill and wait\n",
+ kill_mode, exec_mode, getpid(), buffer);
+
+ int killcount = 0, status = 0, waitedpid = 0;
+
+ switch (kill_mode) {
+ case KILL_ONCE:
+ ret = kill(child_pid, SIGKILL);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kill()");
+
+ waitedpid = waitpid(child_pid, &status, 0);
+
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(waitedpid, "waitpid()");
+
+ killcount++;
+ break;
+ case KILL_MANY:
+ while (waitedpid == 0) {
+ ret = kill(child_pid, SIGKILL);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kill()");
+
+ waitedpid = waitpid(child_pid, &status, WNOHANG);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(waitedpid, "waitpid()");
+
+ killcount++;
+ }
+ break;
+ default:
+ break;
+ }
+
+ T_LOG("mode: %d, %d: Parent[%d]: waitpid returned: %d, errno %d (%s), exit signal %d, after %d loops\n",
+ kill_mode, exec_mode, getpid(), waitedpid, errno, strerror(errno), WTERMSIG(status), killcount);
+ }
+}
+
+T_DECL(exec_exit_race_once_first, "Exec-exit race, one kill, exec on first thread") {
+ int rv = pipe(fd);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pipe()");
+
+ kill_mode = KILL_ONCE;
+ exec_mode = EXEC_FIRST;
+
+ for (int i = 0; i < 1000; i++) {
+ run_test();
+ }
+}
+
+T_DECL(exec_exit_race_many_first, "Exec-exit race, many kill, exec on first thread") {
+ int rv = pipe(fd);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pipe()");
+
+ kill_mode = KILL_MANY;
+ exec_mode = EXEC_FIRST;
+
+ for (int i = 0; i < 1000; i++) {
+ run_test();
+ }
+}
+
+T_DECL(exec_exit_race_once_second, "Exec-exit race, one kill, exec on second thread") {
+ int rv = pipe(fd);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pipe()");
+
+ kill_mode = KILL_ONCE;
+ exec_mode = EXEC_SECOND;
+
+ for (int i = 0; i < 1000; i++) {
+ run_test();
+ }
+}
+
+T_DECL(exec_exit_race_many_second, "Exec-exit race, many kill, exec on second thread") {
+ int rv = pipe(fd);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pipe()");
+
+ kill_mode = KILL_MANY;
+ exec_mode = EXEC_SECOND;
+
+ for (int i = 0; i < 1000; i++) {
+ run_test();
+ }
+}
--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <darwintest.h>
+#include <mach/mach.h>
+#include <mach/mach_vm.h>
+#include <sys/sysctl.h>
+#include <spawn.h>
+#include <signal.h>
+
+#define IKOT_TASK_CONTROL 2
+
+T_GLOBAL_META(
+ T_META_NAMESPACE("xnu.ipc"),
+ T_META_RUN_CONCURRENTLY(TRUE));
+
+static void
+test_extract_immovable_task_port(pid_t pid)
+{
+ kern_return_t kr;
+ mach_port_t tport = MACH_PORT_NULL;
+ ipc_info_space_t space_info;
+ ipc_info_name_array_t table;
+ mach_msg_type_number_t tableCount;
+ ipc_info_tree_name_array_t tree; /* unused */
+ mach_msg_type_number_t treeCount; /* unused */
+
+ mach_port_t extracted;
+ mach_msg_type_name_t right;
+
+
+ kr = task_for_pid(mach_task_self(), pid, &tport);
+ T_EXPECT_MACH_SUCCESS(kr, "task_for_pid(), tport: 0x%x", tport);
+
+ T_LOG("Target pid: %d", pid);
+
+ if (pid == getpid()) {
+ /* self extraction should succeed */
+ kr = mach_port_extract_right(mach_task_self(), mach_task_self(), MACH_MSG_TYPE_COPY_SEND, &extracted, &right);
+ T_EXPECT_MACH_SUCCESS(kr, "mach_port_extract_right() on immovable port in current space should succeed");
+ } else {
+ unsigned int kotype = 0, kobject = 0;
+ mach_port_name_t tport_name = MACH_PORT_NULL;
+ kr = mach_port_space_info(tport, &space_info, &table, &tableCount, &tree, &treeCount);
+ T_EXPECT_MACH_SUCCESS(kr, "mach_port_space_info()");
+
+ for (int i = 0; i < tableCount; i++) {
+ T_LOG("Searching for task port..name: 0x%x", table[i].iin_name);
+ kr = mach_port_kernel_object(tport, table[i].iin_name, &kotype, &kobject);
+ if (KERN_SUCCESS == kr && kotype == IKOT_TASK_CONTROL) {
+ tport_name = table[i].iin_name;
+ break;
+ } else if (kr) {
+ T_LOG("mach_port_kernel_object() failed on name 0x%x, kr: 0x%x", table[i].iin_name, kr);
+ }
+ }
+
+ if (!tport_name) {
+ T_FAIL("Did not find task port in child's space");
+ }
+ T_LOG("Remote tport name: 0x%x", tport_name);
+ kr = mach_port_extract_right(tport, tport_name, MACH_MSG_TYPE_COPY_SEND, &extracted, &right);
+ T_EXPECT_EQ(kr, KERN_INVALID_CAPABILITY, "mach_port_extract_right() on immovable port in child's space should fail (no crash): 0x%x", kr);
+
+ T_LOG("Still alive..");
+ }
+}
+
+T_DECL(extract_right_soft_fail, "Test mach_port_extract_right() fail on extracting child process's task port without crash",
+ T_META_CHECK_LEAKS(false))
+{
+ uint32_t opts = 0;
+ size_t size = sizeof(&opts);
+ pid_t child_pid;
+ kern_return_t ret;
+ int status, fd[2];
+
+ T_LOG("Check if immovable control port has been enabled\n");
+ ret = sysctlbyname("kern.ipc_control_port_options", &opts, &size, NULL, 0);
+
+ if (!ret && (opts & 0x20) == 0) {
+ T_SKIP("immovable control port hard enforcement isn't enabled");
+ }
+
+ /* extracting mach_task_self() should succeed */
+ test_extract_immovable_task_port(getpid());
+
+ ret = pipe(fd);
+ T_EXPECT_NE(ret, -1, "pipe creation");
+
+
+ child_pid = fork();
+
+ if (child_pid < 0) {
+ T_FAIL("fork failed()");
+ }
+
+ if (child_pid == 0) {
+ close(fd[0]);
+ write(fd[1], "wakeup", 6);
+ close(fd[1]);
+ } else {
+ close(fd[1]);
+ char data[6];
+ read(fd[0], data, 6); /* blocks until data available */
+ close(fd[0]);
+
+ /* extracting child's immovable task port should fail without crash */
+ test_extract_immovable_task_port(child_pid);
+
+ kill(child_pid, SIGKILL);
+ wait(&status);
+ }
+}
--- /dev/null
+#include <darwintest.h>
+#include <darwintest_utils.h>
+#include <dispatch/dispatch.h>
+#include <mach/mach.h>
+#include <signal.h>
+#include <sys/socket.h>
+
+T_GLOBAL_META(
+ T_META_NAMESPACE("xnu.fd"),
+ T_META_RUN_CONCURRENTLY(true));
+
+
+#define SOCKETPAIR(pair) \
+ T_ASSERT_POSIX_SUCCESS(socketpair(PF_LOCAL, SOCK_STREAM, 0, pair), "socketpair")
+
+
+static errno_t
+send_fd(int sock, int fd)
+{
+ struct iovec iovec[1];
+ struct msghdr msg;
+ struct cmsghdr *cmsghdrp;
+ char buf[CMSG_SPACE(sizeof(int))];
+
+ iovec[0].iov_base = "";
+ iovec[0].iov_len = 1;
+ msg.msg_name = 0;
+ msg.msg_namelen = 0;
+ msg.msg_iov = iovec;
+ msg.msg_iovlen = 1;
+ msg.msg_control = buf;
+ msg.msg_controllen = CMSG_SPACE(sizeof(int));
+
+ cmsghdrp = CMSG_FIRSTHDR(&msg);
+ cmsghdrp->cmsg_len = CMSG_LEN(sizeof(int));
+ cmsghdrp->cmsg_level = SOL_SOCKET;
+ cmsghdrp->cmsg_type = SCM_RIGHTS;
+
+ memcpy(CMSG_DATA(cmsghdrp), &fd, sizeof(fd));
+
+ if (sendmsg(sock, &msg, 0) < 0) {
+ return errno;
+ }
+
+ return 0;
+}
+
+static errno_t
+recv_fd(int sock, int *fdp)
+{
+ u_char c;
+ struct iovec iovec[1];
+ struct msghdr msg;
+ struct cmsghdr *cmsghdrp;
+ char buf[CMSG_SPACE(sizeof(int))];
+
+ iovec[0].iov_base = &c;
+ iovec[0].iov_len = 1;
+
+ msg.msg_name = 0;
+ msg.msg_namelen = 0;
+ msg.msg_iov = iovec;
+ msg.msg_iovlen = 1;
+ msg.msg_control = buf;
+ msg.msg_controllen = CMSG_SPACE(sizeof(int));
+ msg.msg_flags = 0;
+
+ if (recvmsg(sock, &msg, 0) < 0) {
+ return errno;
+ }
+
+ cmsghdrp = CMSG_FIRSTHDR(&msg);
+ if (cmsghdrp == NULL) {
+ return ENOENT;
+ }
+
+ if (cmsghdrp->cmsg_len != CMSG_LEN(sizeof(int))) {
+ return ENOENT;
+ }
+ if (cmsghdrp->cmsg_level != SOL_SOCKET) {
+ return ENOENT;
+ }
+ if (cmsghdrp->cmsg_type != SCM_RIGHTS) {
+ return ENOENT;
+ }
+
+ memcpy(fdp, CMSG_DATA(cmsghdrp), sizeof(*fdp));
+ return 0;
+}
+
+T_DECL(send, "test for 30465592")
+{
+ int pair[2], fd, status;
+ pid_t child;
+
+ T_ASSERT_POSIX_SUCCESS(socketpair(PF_LOCAL, SOCK_STREAM, 0, pair),
+ "socketpair");
+
+ child = fork();
+ if (child != 0) {
+ fd = open("/dev/null", O_RDWR);
+ T_ASSERT_POSIX_SUCCESS(fd, "open(/dev/null)");
+
+ T_ASSERT_EQ(send_fd(pair[0], fd), 0, "send_fd");
+ T_ASSERT_POSIX_SUCCESS(close(fd), "close(fd)");
+
+ T_EXPECT_POSIX_SUCCESS(waitpid(child, &status, 0), "waitpid");
+ } else {
+ T_QUIET; T_ASSERT_EQ(recv_fd(pair[1], &fd), 0, "recv_fd");
+ T_QUIET; T_ASSERT_NE(fd, -1, "received a proper fd");
+ T_QUIET; T_EXPECT_POSIX_SUCCESS(close(fd), "close(fd)");
+ raise(SIGKILL); /* do not confuse the test system */
+ }
+}
+
+T_DECL(send_kill, "test for 30465592")
+{
+ int pair[2], fd, status;
+ pid_t child;
+
+ T_QUIET; SOCKETPAIR(pair);
+
+ child = fork();
+ if (child != 0) {
+ fd = open("/dev/null", O_RDWR);
+ T_ASSERT_POSIX_SUCCESS(fd, "open(/dev/null)");
+
+ T_ASSERT_EQ(send_fd(pair[0], fd), 0, "send_fd");
+ T_ASSERT_POSIX_SUCCESS(close(fd), "close(fd)");
+
+ T_EXPECT_POSIX_SUCCESS(kill(child, SIGKILL), "kill(child)");
+
+ T_EXPECT_POSIX_SUCCESS(waitpid(child, &status, 0), "waitpid");
+ } else {
+ T_QUIET; T_ASSERT_EQ(recv_fd(pair[1], &fd), 0, "recv_fd");
+ T_QUIET; T_ASSERT_NE(fd, -1, "received a proper fd");
+ T_QUIET; T_EXPECT_POSIX_SUCCESS(close(fd), "close(fd)");
+ raise(SIGKILL); /* do not confuse the test system */
+ }
+}
+
+T_DECL(send_sock, "test for 30465592")
+{
+ int pair[2], fd, status;
+ pid_t child;
+
+ T_QUIET; SOCKETPAIR(pair);
+
+ child = fork();
+ if (child != 0) {
+ int sock[2];
+
+ T_QUIET; SOCKETPAIR(sock);
+
+ T_ASSERT_EQ(send_fd(pair[0], sock[0]), 0, "send_fd");
+ T_ASSERT_POSIX_SUCCESS(close(sock[0]), "close(sock[0])");
+ T_ASSERT_POSIX_SUCCESS(close(sock[1]), "close(sock[1])");
+
+ T_EXPECT_POSIX_SUCCESS(waitpid(child, &status, 0), "waitpid");
+ } else {
+ T_QUIET; T_ASSERT_EQ(recv_fd(pair[1], &fd), 0, "recv_fd");
+ T_QUIET; T_ASSERT_NE(fd, -1, "received a proper fd");
+ T_QUIET; T_EXPECT_POSIX_SUCCESS(close(fd), "close(fd)");
+ raise(SIGKILL); /* do not confuse the test system */
+ }
+}
+
+T_DECL(send_stress, "test for 67133384")
+{
+ int fd;
+
+ fd = open("/dev/null", O_RDWR);
+ T_ASSERT_POSIX_SUCCESS(fd, "open(/dev/null)");
+
+ dispatch_apply(10, NULL, ^(size_t worker) {
+ dispatch_queue_t q = dispatch_queue_create("receiver", NULL);
+ dispatch_group_t g = dispatch_group_create();
+ int pairbuf[2], *pair = pairbuf;
+ int n = 1000;
+
+ SOCKETPAIR(pair);
+
+ dispatch_group_async(g, q, ^{
+ int tmp;
+
+ for (int i = 0; i < n; i++) {
+ T_QUIET; T_ASSERT_EQ(recv_fd(pair[1], &tmp), 0, "recv_fd");
+ T_QUIET; T_ASSERT_NE(tmp, -1, "received a proper fd");
+ T_QUIET; T_EXPECT_POSIX_SUCCESS(close(tmp), "close(tmp)");
+ }
+ });
+ dispatch_release(q);
+
+ for (int i = 0; i < n; i++) {
+ int tmp = dup(fd);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(tmp, "dup");
+ T_QUIET; T_ASSERT_EQ(send_fd(pair[0], tmp), 0, "send_fd");
+ T_QUIET; T_EXPECT_POSIX_SUCCESS(close(tmp), "close(tmp)");
+ }
+ dispatch_group_wait(g, DISPATCH_TIME_FOREVER);
+
+ T_PASS("sent and received %d fds in worker %zd", n, worker);
+
+ T_QUIET; T_EXPECT_POSIX_SUCCESS(close(pair[0]), "close(pair[0])");
+ T_QUIET; T_EXPECT_POSIX_SUCCESS(close(pair[1]), "close(pair[1])");
+ });
+}
#ifdef __arm64__
static size_t
exc_arithmetic_handler(
+ __unused mach_port_t task,
+ __unused mach_port_t thread,
exception_type_t type,
mach_exception_data_t codes_64)
{
+++ /dev/null
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-<dict>
- <key>com.apple.private.hypervisor</key>
- <true/>
-</dict>
-</plist>
+++ /dev/null
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-<dict>
- <key>com.apple.security.hypervisor</key>
- <true/>
-</dict>
-</plist>
static void
vm_cleanup()
{
- T_ASSERT_EQ(hv_vm_destroy(), HV_SUCCESS, "Destroyed vm");
+ T_ASSERT_EQ(hv_vm_destroy(), HV_SUCCESS, "Destroyed vm");
free_page_cache();
+
+ pml4 = NULL;
+ pml4_gpa = 0;
}
static pthread_cond_t ready_cond = PTHREAD_COND_INITIALIZER;
vm_cleanup();
}
+
+// Get the number of messages waiting for the specified port
+static int
+get_count(mach_port_t port)
+{
+ int count;
+
+ count = 0;
+ while (true) {
+ hv_ion_message_t msg = {
+ .header.msgh_size = sizeof (msg),
+ .header.msgh_local_port = port,
+ };
+
+ kern_return_t ret = mach_msg(&msg.header, MACH_RCV_MSG | MACH_RCV_TIMEOUT,
+ 0, sizeof (msg), port, 0, MACH_PORT_NULL);
+
+ if (ret != MACH_MSG_SUCCESS) {
+ break;
+ }
+
+ T_QUIET; T_ASSERT_TRUE(msg.addr == 0xab || msg.addr == 0xcd || msg.addr == 0xef,
+ "address is 0xab, 0xcd or 0xef");
+ T_QUIET; T_ASSERT_EQ(msg.value, 0xaaULL, "value written is 0xaa");
+ T_QUIET; T_ASSERT_TRUE(msg.size == 1 || msg.size == 4, "size is 1 or 4");
+
+ count++;
+ }
+
+ return count;
+}
+
+static void *
+pio_monitor(void *arg, hv_vcpuid_t vcpu)
+{
+
+ size_t guest_pages_size = round_page((uintptr_t)&hvtest_end - (uintptr_t)&hvtest_begin);
+ const size_t mem_size = 1 * 1024 * 1024;
+ uint8_t *guest_pages_shadow = valloc(mem_size);
+ int handle_io_count = 0;
+ uint64_t exit_reason = 0;
+
+ setup_real_mode(vcpu);
+
+ bzero(guest_pages_shadow, mem_size);
+ memcpy(guest_pages_shadow+0x1000, &hvtest_begin, guest_pages_size);
+
+ T_ASSERT_EQ(hv_vm_map(guest_pages_shadow, 0x0, mem_size, HV_MEMORY_READ | HV_MEMORY_EXEC), HV_SUCCESS,
+ "map guest memory");
+
+ while (true) {
+ T_QUIET; T_ASSERT_EQ(hv_vcpu_run_until(vcpu, ~(uint64_t)0), HV_SUCCESS, "run VCPU");
+ exit_reason = get_vmcs(vcpu, VMCS_RO_EXIT_REASON);
+
+ if (exit_reason == VMX_REASON_VMCALL) {
+ break;
+ }
+
+ if (exit_reason == VMX_REASON_IRQ) {
+ continue;
+ }
+
+ T_QUIET; T_ASSERT_EQ(exit_reason, (uint64_t)VMX_REASON_IO, "exit reason is IO");
+
+ union {
+ struct {
+ uint64_t io_size:3;
+ uint64_t io_dirn:1;
+ uint64_t io_string:1;
+ uint64_t io_rep:1;
+ uint64_t io_encoding:1;
+ uint64_t __io_resvd0:9;
+ uint64_t io_port:16;
+ uint64_t __io_resvd1:32;
+ } io;
+ uint64_t reg64;
+ } info = {
+ .reg64 = get_vmcs(vcpu, VMCS_RO_EXIT_QUALIFIC),
+ };
+
+ T_QUIET; T_ASSERT_EQ(info.io.io_port, 0xefULL, "exit is a port IO on 0xef");
+
+ handle_io_count++;
+
+ set_vmcs(vcpu, VMCS_GUEST_RIP, get_reg(vcpu, HV_X86_RIP) + get_vmcs(vcpu, VMCS_RO_VMEXIT_INSTR_LEN));
+ }
+
+ free(guest_pages_shadow);
+
+ *((int *)arg) = handle_io_count;
+
+ return NULL;
+}
+
+T_DECL(pio_notifier_arguments, "test adding and removing port IO notifiers")
+{
+ mach_port_t notify_port = MACH_PORT_NULL;
+ kern_return_t kret = KERN_FAILURE;
+ hv_return_t hret = HV_ERROR;
+
+ T_SETUPBEGIN;
+
+ /* Setup notification port. */
+ kret = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE,
+ ¬ify_port);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "allocate mach port");
+
+ kret = mach_port_insert_right(mach_task_self(), notify_port, notify_port,
+ MACH_MSG_TYPE_MAKE_SEND);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "insert send right");
+
+ /* Setup VM */
+ vm_setup();
+
+ T_SETUPEND;
+
+ /* Add with bad size. */
+ hret = hv_vm_add_pio_notifier(0xab, 7, 1, notify_port, HV_ION_NONE);
+ T_ASSERT_NE(hret, HV_SUCCESS, "adding notifier with bad size");
+
+ /* Add with bad data. */
+ hret = hv_vm_add_pio_notifier(0xab, 1, UINT16_MAX, notify_port, HV_ION_NONE);
+ T_ASSERT_NE(hret, HV_SUCCESS, "adding notifier with bad data");
+
+ /* Add with bad mach port. */
+ hret = hv_vm_add_pio_notifier(0xab, 1, UINT16_MAX, MACH_PORT_NULL, HV_ION_NONE);
+ T_ASSERT_NE(hret, HV_SUCCESS, "adding notifier with bad port");
+
+ /* Add with bad flags. */
+ hret = hv_vm_add_pio_notifier(0xab, 1, 1, notify_port, 0xffff);
+ T_ASSERT_NE(hret, HV_SUCCESS, "adding notifier with bad flags");
+
+ /* Remove when none are installed. */
+ hret = hv_vm_remove_pio_notifier(0xab, 1, 1, notify_port, HV_ION_NONE);
+ T_ASSERT_NE(hret, HV_SUCCESS, "removing a non-existent notifier");
+
+ /* Add duplicate. */
+ hret = hv_vm_add_pio_notifier(0xab, 1, 1, notify_port, HV_ION_NONE);
+ T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "adding notifier");
+ hret = hv_vm_add_pio_notifier(0xab, 1, 1, notify_port, HV_ION_NONE);
+ T_ASSERT_NE(hret, HV_SUCCESS, "adding duplicate notifier");
+ hret = hv_vm_remove_pio_notifier(0xab, 1, 1, notify_port, HV_ION_NONE);
+ T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "removing notifier");
+
+ /* Add then remove. */
+ hret = hv_vm_add_pio_notifier(0xab, 1, 1, notify_port, HV_ION_NONE);
+ T_ASSERT_EQ(hret, HV_SUCCESS, "adding notifier");
+ hret = hv_vm_remove_pio_notifier(0xab, 1, 1, notify_port, HV_ION_NONE);
+ T_ASSERT_EQ(hret, HV_SUCCESS, "removing notifier");
+
+ /* Add two, remove in reverse order. */
+ hret = hv_vm_add_pio_notifier(0xab, 1, 1, notify_port, HV_ION_NONE);
+ T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "adding 1st notifier");
+ hret = hv_vm_add_pio_notifier(0xab, 2, 1, notify_port, HV_ION_NONE);
+ T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "adding 2nd notifier");
+ hret = hv_vm_remove_pio_notifier(0xab, 2, 1, notify_port, HV_ION_NONE);
+ T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "removing 2nd notifier");
+ hret = hv_vm_remove_pio_notifier(0xab, 1, 1, notify_port, HV_ION_NONE);
+ T_ASSERT_EQ(hret, HV_SUCCESS, "removing notifier in reverse order");
+
+ /* Add with ANY_SIZE and remove. */
+ hret = hv_vm_add_pio_notifier(0xab, 0, 1, notify_port, HV_ION_ANY_SIZE);
+ T_ASSERT_EQ(hret, HV_SUCCESS, "adding notifier with ANY_SIZE");
+ hret = hv_vm_remove_pio_notifier(0xab, 0, 1, notify_port, HV_ION_ANY_SIZE);
+ T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "removing notifier with ANY_SIZE");
+
+ /* Add with ANY_VALUE and remove. */
+ hret = hv_vm_add_pio_notifier(0xab, 1, 1, notify_port, HV_ION_ANY_VALUE);
+ T_ASSERT_EQ(hret, HV_SUCCESS, "adding notifier with ANY_VALUE");
+ hret = hv_vm_remove_pio_notifier(0xab, 1, 1, notify_port, HV_ION_ANY_VALUE);
+ T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "removing notifier with ANY_VALUE");
+
+ vm_cleanup();
+
+ mach_port_mod_refs(mach_task_self(), notify_port, MACH_PORT_RIGHT_RECEIVE, -1);
+}
+
+T_DECL(pio_notifier_bad_port, "test port IO notifiers when the port is destroyed/deallocated/has no receive right")
+{
+ pthread_t vcpu_thread;
+ mach_port_t notify_port = MACH_PORT_NULL;
+ int handle_io_count = 0;
+ kern_return_t kret = KERN_FAILURE;
+ hv_return_t hret = HV_ERROR;
+
+ /* Setup VM */
+ vm_setup();
+
+ /*
+ * Test that nothing bad happens when the notification port is
+ * added and mach_port_destroy() is called.
+ */
+
+ /* Add a notification port. */
+ kret = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE,
+ ¬ify_port);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "allocate mach port");
+
+ /* Insert send right. */
+ kret = mach_port_insert_right(mach_task_self(), notify_port, notify_port,
+ MACH_MSG_TYPE_MAKE_SEND);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "insert send right");
+
+ /* All port writes to 0xef. */
+ hret = hv_vm_add_pio_notifier(0xef, 0, 0, notify_port,
+ HV_ION_ANY_VALUE | HV_ION_ANY_SIZE);
+ T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "adding notifier for all writes "
+ "to port 0xef");
+
+ /* After adding, destroy the port. */
+ kret = mach_port_destroy(mach_task_self(), notify_port);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "destroying notify port");
+
+ vcpu_thread = create_vcpu_thread((vcpu_entry_function)
+ (((uintptr_t)pio_entry_basic & PAGE_MASK) + 0x1000), 0, pio_monitor,
+ &handle_io_count);
+ T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu_thread, NULL), "join vcpu");
+
+ /* Expect the messages to be lost. */
+ T_ASSERT_EQ(0, handle_io_count, "0 expected IO exits when port destroyed");
+
+ hret = hv_vm_remove_pio_notifier(0xef, 0, 0, notify_port, HV_ION_ANY_SIZE | HV_ION_ANY_VALUE);
+ T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "removing notifier for all writes to port 0xef");
+
+ vm_cleanup();
+
+
+ vm_setup();
+ /*
+ * Test that nothing bad happens when the notification port is added and
+ * mach_port_mod_refs() is called.
+ */
+
+ /* Add a notification port. */
+ kret = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE,
+ ¬ify_port);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "allocate mach port");
+
+ /* Insert send right. */
+ kret = mach_port_insert_right(mach_task_self(), notify_port, notify_port,
+ MACH_MSG_TYPE_MAKE_SEND);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "insert send right");
+
+ /* All port writes to 0xef. */
+ hret = hv_vm_add_pio_notifier(0xef, 0, 0, notify_port,
+ HV_ION_ANY_VALUE | HV_ION_ANY_SIZE);
+ T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "adding notifier for all writes "
+ "to port 0xef");
+
+ /* After adding, remove receive right. */
+ mach_port_mod_refs(mach_task_self(), notify_port, MACH_PORT_RIGHT_RECEIVE, -1);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "removing receive right");
+
+ vcpu_thread = create_vcpu_thread((vcpu_entry_function)
+ (((uintptr_t)pio_entry_basic & PAGE_MASK) + 0x1000), 0, pio_monitor,
+ &handle_io_count);
+ T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu_thread, NULL), "join vcpu");
+
+ /* Expect messages to be lost. */
+ T_ASSERT_EQ(0, handle_io_count, "0 expected IO exits when receive right removed");
+
+ hret = hv_vm_remove_pio_notifier(0xef, 0, 0, notify_port, HV_ION_ANY_SIZE | HV_ION_ANY_VALUE);
+ T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "removing notifier for all writes to port 0xef");
+
+ vm_cleanup();
+
+
+ vm_setup();
+ /*
+ * Test that nothing bad happens when the notification port is added and
+ * mach_port_deallocate() is called.
+ */
+
+ /* Add a notification port. */
+ kret = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE,
+ ¬ify_port);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "allocate mach port");
+
+ /* Insert send right. */
+ kret = mach_port_insert_right(mach_task_self(), notify_port, notify_port,
+ MACH_MSG_TYPE_MAKE_SEND);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "insert send right");
+
+ /* All port writes to 0xef. */
+ hret = hv_vm_add_pio_notifier(0xef, 0, 0, notify_port,
+ HV_ION_ANY_VALUE | HV_ION_ANY_SIZE);
+ T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "adding notifier for all writes "
+ "to port 0xef");
+
+ /* After adding, call mach_port_deallocate(). */
+ kret = mach_port_deallocate(mach_task_self(), notify_port);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "destroying notify port");
+
+ vcpu_thread = create_vcpu_thread((vcpu_entry_function)
+ (((uintptr_t)pio_entry_basic & PAGE_MASK) + 0x1000), 0, pio_monitor,
+ &handle_io_count);
+ T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu_thread, NULL), "join vcpu");
+
+ /* Expect messages to be lost. */
+ T_ASSERT_EQ(0, handle_io_count, "0 expected IO exits when port deallocated");
+
+ hret = hv_vm_remove_pio_notifier(0xef, 0, 0, notify_port, HV_ION_ANY_SIZE | HV_ION_ANY_VALUE);
+ T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "removing notifier for all writes to port 0xef");
+
+ vm_cleanup();
+}
+
+T_DECL(pio_notifier, "test port IO notifiers")
+{
+ #define MACH_PORT_COUNT 4
+ mach_port_t notify_port[MACH_PORT_COUNT] = { MACH_PORT_NULL };
+ int handle_io_count = 0;
+ kern_return_t kret = KERN_FAILURE;
+ hv_return_t hret = HV_ERROR;
+
+ T_SETUPBEGIN;
+
+ /* Setup notification ports. */
+ for (int i = 0; i < MACH_PORT_COUNT; i++) {
+ kret = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE,
+ ¬ify_port[i]);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "allocate mach port");
+
+ kret = mach_port_insert_right(mach_task_self(), notify_port[i], notify_port[i],
+ MACH_MSG_TYPE_MAKE_SEND);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "insert send right");
+ }
+ /* Setup VM */
+ vm_setup();
+
+ T_SETUPEND;
+
+ /* Test that messages are properly sent to mach port notifiers. */
+
+ /* One for all port writes to 0xab. */
+ hret = hv_vm_add_pio_notifier(0xab, 0, 0, notify_port[0],
+ HV_ION_ANY_VALUE | HV_ION_ANY_SIZE);
+ T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "adding notifier for all writes "
+ "to port 0xab");
+
+ /* One for for 4 byte writes of 0xaa. */
+ hret = hv_vm_add_pio_notifier(0xab, 4, 0xaa, notify_port[1], HV_ION_NONE);
+ T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "adding notifier for 4 byte writes "
+ "to port 0xab");
+
+ /* One for all writes to 0xcd (ignoring queue full errors). */
+ hret = hv_vm_add_pio_notifier(0xcd, 0, 0, notify_port[2],
+ HV_ION_ANY_SIZE | HV_ION_ANY_VALUE);
+ T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "adding notifier for all writes "
+ "to port 0xcd, ignoring if the queue fills");
+
+ /* One for writes to 0xef asking for exits when the queue is full. */
+ hret = hv_vm_add_pio_notifier(0xef, 0, 0, notify_port[3],
+ HV_ION_ANY_SIZE | HV_ION_ANY_VALUE | HV_ION_EXIT_FULL);
+ T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "adding notifier for all writes "
+ "to port 0xef, not ignoring if the queue fills");
+
+ pthread_t vcpu_thread = create_vcpu_thread((vcpu_entry_function)
+ (((uintptr_t)pio_entry & PAGE_MASK) + 0x1000), 0, pio_monitor,
+ &handle_io_count);
+ T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu_thread, NULL), "join vcpu");
+
+ /* Expect messages to be waiting. */
+ T_ASSERT_EQ(4, get_count(notify_port[0]), "expected 4 messages");
+ T_ASSERT_EQ(1, get_count(notify_port[1]), "expected 1 messages");
+ T_ASSERT_EQ(10, get_count(notify_port[2]) + handle_io_count, "expected IO exits");
+ T_ASSERT_EQ(5, get_count(notify_port[3]), "expected 5 messages");
+
+ hret = hv_vm_remove_pio_notifier(0xab, 0, 0, notify_port[0], HV_ION_ANY_SIZE | HV_ION_ANY_VALUE);
+ T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "removing notifier for all writes to port 0xab");
+
+ hret = hv_vm_remove_pio_notifier(0xab, 4, 0xaa, notify_port[1], HV_ION_NONE);
+ T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "removing notifier for 4 byte writes "
+ "to port 0xab");
+
+ hret = hv_vm_remove_pio_notifier(0xcd, 0, 0, notify_port[2], HV_ION_ANY_SIZE | HV_ION_ANY_VALUE);
+ T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "removing notifier for all writes "
+ "to port 0xcd, ignoring if the queue fills");
+
+ hret = hv_vm_remove_pio_notifier(0xef, 0, 0, notify_port[3], HV_ION_ANY_SIZE | HV_ION_ANY_VALUE | HV_ION_EXIT_FULL);
+ T_QUIET; T_ASSERT_EQ(hret, HV_SUCCESS, "removing notifier for all writes "
+ "to port 0xef, not ignoring if the queue fills");
+
+ vm_cleanup();
+
+ for (int i = 0; i < MACH_PORT_COUNT; i++) {
+ mach_port_mod_refs(mach_task_self(), notify_port[i], MACH_PORT_RIGHT_RECEIVE, -1);
+ }
+}
vmcall
+.code16
+
+ // Perform a fixed number of port I/Os with various arguments.
+ .global _pio_entry
+_pio_entry:
+
+ movl $0xaa, %eax
+
+ outl %eax, $0xab
+
+ movl $3, %ecx
+1: outb %al, $0xab
+ loop 1b
+
+ movl $10, %ecx
+1: outb %al, $0xcd
+ loop 1b
+
+ movl $10, %ecx
+1: outb %al, $0xef
+ loop 1b
+
+ movl $0x23456, %eax
+ vmcall
+
+.code16
+ // Perform 10 port I/Os on 0xef.
+ .global _pio_entry_basic
+_pio_entry_basic:
+
+ movl $10, %ecx
+1: outb %al, $0xef
+ loop 1b
+
+ movl $0x23456, %eax
+ vmcall
+
.global _hvtest_end
_hvtest_end:
extern void radar61961809_prepare(uint64_t) OS_NORETURN;
extern void radar61961809_loop64(uint64_t) OS_NORETURN;
extern void radar60691363_entry(uint64_t) OS_NORETURN;
+extern void pio_entry(uint64_t) OS_NORETURN;
+extern void pio_entry_basic(uint64_t) OS_NORETURN;
#define MSR_IA32_STAR 0xc0000081
#define MSR_IA32_LSTAR 0xc0000082
--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <darwintest.h>
+#include <mach/mach.h>
+#include <mach/mach_vm.h>
+#include <excserver.h>
+#include <sys/sysctl.h>
+#include <spawn.h>
+#include <signal.h>
+#include <TargetConditionals.h>
+
+#define MAX_ARGV 3
+#define EXC_CODE_SHIFT 32
+#define EXC_GUARD_TYPE_SHIFT 29
+#define MAX_TEST_NUM 13
+
+#define TASK_EXC_GUARD_MP_DELIVER 0x10
+
+extern char **environ;
+static uint64_t exception_code = 0;
+static exception_type_t exception_taken = 0;
+
+#define IKOT_TASK_CONTROL 2
+
+/*
+ * This test verifies behaviors of immovable/pinned task/thread ports.
+ *
+ * 1. Compare and verifies port names of mach_{task, thread}_self(),
+ * {TASK, THREAD}_KERNEL_PORT, and ports returned from task_threads()
+ * and processor_set_tasks().
+ * 2. Make sure correct exceptions are raised resulting from moving immovable
+ * task/thread control, read and inspect ports.
+ * 3. Make sure correct exceptions are raised resulting from deallocating pinned
+ * task/thread control ports.
+ * 4. Make sure immovable ports cannot be stashed:
+ * rdar://70585367 (Disallow immovable port stashing with *_set_special_port() and mach_port_register())
+ */
+T_GLOBAL_META(
+ T_META_NAMESPACE("xnu.ipc"),
+ T_META_RUN_CONCURRENTLY(TRUE));
+
+static uint64_t test_exception_code[] = {
+ /* Pinning tests. Currently delivered as soft crash */
+ EXC_GUARD, // Soft crash delivered as EXC_CORPSE_NOTIFY
+ EXC_GUARD,
+ EXC_GUARD,
+ EXC_GUARD,
+ EXC_GUARD,
+
+ /* Immovable tests. Currently delivered as hard crash */
+ (GUARD_TYPE_MACH_PORT << EXC_GUARD_TYPE_SHIFT) | kGUARD_EXC_IMMOVABLE,
+ (GUARD_TYPE_MACH_PORT << EXC_GUARD_TYPE_SHIFT) | kGUARD_EXC_IMMOVABLE,
+ (GUARD_TYPE_MACH_PORT << EXC_GUARD_TYPE_SHIFT) | kGUARD_EXC_IMMOVABLE,
+ (GUARD_TYPE_MACH_PORT << EXC_GUARD_TYPE_SHIFT) | kGUARD_EXC_IMMOVABLE,
+ (GUARD_TYPE_MACH_PORT << EXC_GUARD_TYPE_SHIFT) | kGUARD_EXC_IMMOVABLE,
+ (GUARD_TYPE_MACH_PORT << EXC_GUARD_TYPE_SHIFT) | kGUARD_EXC_IMMOVABLE,
+ (GUARD_TYPE_MACH_PORT << EXC_GUARD_TYPE_SHIFT) | kGUARD_EXC_IMMOVABLE,
+ (GUARD_TYPE_MACH_PORT << EXC_GUARD_TYPE_SHIFT) | kGUARD_EXC_IMMOVABLE,
+};
+
+kern_return_t
+catch_mach_exception_raise_state(mach_port_t exception_port,
+ exception_type_t exception,
+ const mach_exception_data_t code,
+ mach_msg_type_number_t code_count,
+ int * flavor,
+ const thread_state_t old_state,
+ mach_msg_type_number_t old_state_count,
+ thread_state_t new_state,
+ mach_msg_type_number_t * new_state_count)
+{
+#pragma unused(exception_port, exception, code, code_count, flavor, old_state, old_state_count, new_state, new_state_count)
+ T_FAIL("Unsupported catch_mach_exception_raise_state");
+ return KERN_NOT_SUPPORTED;
+}
+
+kern_return_t
+catch_mach_exception_raise_state_identity(mach_port_t exception_port,
+ mach_port_t thread,
+ mach_port_t task,
+ exception_type_t exception,
+ mach_exception_data_t code,
+ mach_msg_type_number_t code_count,
+ int * flavor,
+ thread_state_t old_state,
+ mach_msg_type_number_t old_state_count,
+ thread_state_t new_state,
+ mach_msg_type_number_t * new_state_count)
+{
+#pragma unused(exception_port, thread, task, exception, code, code_count, flavor, old_state, old_state_count, new_state, new_state_count)
+ T_FAIL("Unsupported catch_mach_exception_raise_state_identity");
+ return KERN_NOT_SUPPORTED;
+}
+
+kern_return_t
+catch_mach_exception_raise(mach_port_t exception_port,
+ mach_port_t thread,
+ mach_port_t task,
+ exception_type_t exception,
+ mach_exception_data_t code,
+ mach_msg_type_number_t code_count)
+{
+#pragma unused(exception_port, code_count)
+ pid_t pid;
+ kern_return_t kr = pid_for_task(task, &pid);
+ T_EXPECT_MACH_SUCCESS(kr, "pid_for_task");
+ T_LOG("Crashing child pid: %d, continuing...\n", pid);
+
+ kr = mach_port_deallocate(mach_task_self(), thread);
+ T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_deallocate");
+ kr = mach_port_deallocate(mach_task_self(), task);
+ T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_deallocate");
+
+ T_LOG("Caught exception type: %d code: 0x%llx", exception, *((uint64_t*)code));
+ if (exception == EXC_GUARD || exception == EXC_CORPSE_NOTIFY) {
+ exception_taken = exception;
+ exception_code = *((uint64_t *)code);
+ } else {
+ T_FAIL("Unexpected exception");
+ }
+ return KERN_SUCCESS;
+}
+
+static void *
+exception_server_thread(void *arg)
+{
+ kern_return_t kr;
+ mach_port_t exc_port = *(mach_port_t *)arg;
+
+ /* Handle exceptions on exc_port */
+ kr = mach_msg_server_once(mach_exc_server, 4096, exc_port, 0);
+ T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_msg_server_once");
+
+ return NULL;
+}
+
+static mach_port_t
+alloc_exception_port(void)
+{
+ kern_return_t kret;
+ mach_port_t exc_port = MACH_PORT_NULL;
+ mach_port_t task = mach_task_self();
+
+ kret = mach_port_allocate(task, MACH_PORT_RIGHT_RECEIVE, &exc_port);
+ T_QUIET; T_EXPECT_MACH_SUCCESS(kret, "mach_port_allocate exc_port");
+
+ kret = mach_port_insert_right(task, exc_port, exc_port, MACH_MSG_TYPE_MAKE_SEND);
+ T_QUIET; T_EXPECT_MACH_SUCCESS(kret, "mach_port_insert_right exc_port");
+
+ return exc_port;
+}
+
+static void
+test_immovable_port_stashing(void)
+{
+ kern_return_t kr;
+ mach_port_t port;
+
+ kr = task_set_special_port(mach_task_self(), TASK_BOOTSTRAP_PORT, mach_task_self());
+ T_EXPECT_EQ(kr, KERN_INVALID_RIGHT, "should disallow task_set_special_port() with immovable port");
+
+ kr = thread_set_special_port(mach_thread_self(), THREAD_KERNEL_PORT, mach_thread_self());
+ T_EXPECT_EQ(kr, KERN_INVALID_RIGHT, "should disallow task_set_special_port() with immovable port");
+
+ mach_port_t stash[1] = {mach_task_self()};
+ kr = mach_ports_register(mach_task_self(), stash, 1);
+ T_EXPECT_EQ(kr, KERN_INVALID_RIGHT, "should disallow mach_ports_register() with immovable port");
+
+ T_QUIET; T_ASSERT_MACH_SUCCESS(mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &port), "mach_port_allocate");
+ T_QUIET; T_ASSERT_MACH_SUCCESS(mach_port_insert_right(mach_task_self(), port, port, MACH_MSG_TYPE_MAKE_SEND), "mach_port_insert_right");
+
+ stash[0] = port;
+ kr = mach_ports_register(mach_task_self(), stash, 1);
+ T_EXPECT_MACH_SUCCESS(kr, "mach_ports_register() should succeed with movable port");
+}
+
+static void
+test_task_thread_port_values(void)
+{
+ T_LOG("Compare various task/thread control port values\n");
+ kern_return_t kr;
+ mach_port_t port, th_self;
+ thread_array_t threadList;
+ mach_msg_type_number_t threadCount = 0;
+ boolean_t found_self = false;
+ processor_set_name_array_t psets;
+ processor_set_t pset_priv;
+ task_array_t taskList;
+ mach_msg_type_number_t pcnt = 0, tcnt = 0;
+ mach_port_t host = mach_host_self();
+
+ /* Compare with task/thread_get_special_port() */
+ kr = task_get_special_port(mach_task_self(), TASK_KERNEL_PORT, &port);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "task_get_special_port() - TASK_KERNEL_PORT");
+ T_EXPECT_NE(port, mach_task_self(), "TASK_KERNEL_PORT should not match mach_task_self()");
+ mach_port_deallocate(mach_task_self(), port);
+
+ kr = task_for_pid(mach_task_self(), getpid(), &port);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "task_for_pid()");
+ T_EXPECT_EQ(port, mach_task_self(), "task_for_pid(self) should match mach_task_self()");
+ mach_port_deallocate(mach_task_self(), port);
+
+ th_self = mach_thread_self();
+ kr = thread_get_special_port(th_self, THREAD_KERNEL_PORT, &port);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "thread_get_special_port() - THREAD_KERNEL_PORT");
+ T_EXPECT_NE(port, th_self, "THREAD_KERNEL_PORT should not match mach_thread_self()");
+ mach_port_deallocate(mach_task_self(), port);
+
+ /* Make sure task_threads() return immovable thread ports */
+ kr = task_threads(mach_task_self(), &threadList, &threadCount);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "task_threads()");
+ T_QUIET; T_ASSERT_GE(threadCount, 1, "should have at least 1 thread");
+
+ for (size_t i = 0; i < threadCount; i++) {
+ if (th_self == threadList[i]) { /* th_self is immovable */
+ found_self = true;
+ break;
+ }
+ }
+
+ T_EXPECT_TRUE(found_self, "task_threads() should return immovable thread self");
+
+ for (size_t i = 0; i < threadCount; i++) {
+ mach_port_deallocate(mach_task_self(), threadList[i]);
+ }
+
+ if (threadCount > 0) {
+ mach_vm_deallocate(mach_task_self(),
+ (mach_vm_address_t)threadList,
+ threadCount * sizeof(mach_port_t));
+ }
+
+ mach_port_deallocate(mach_task_self(), th_self);
+
+ /* Make sure processor_set_tasks() return immovable task self */
+ kr = host_processor_sets(host, &psets, &pcnt);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "host_processor_sets");
+ T_QUIET; T_ASSERT_GE(pcnt, 1, "should have at least 1 processor set");
+
+ kr = host_processor_set_priv(host, psets[0], &pset_priv);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "host_processor_set_priv");
+ for (size_t i = 0; i < pcnt; i++) {
+ mach_port_deallocate(mach_task_self(), psets[i]);
+ }
+ mach_port_deallocate(mach_task_self(), host);
+ vm_deallocate(mach_task_self(), (vm_address_t)psets, (vm_size_t)pcnt * sizeof(mach_port_t));
+
+ kr = processor_set_tasks_with_flavor(pset_priv, TASK_FLAVOR_CONTROL, &taskList, &tcnt);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "processor_set_tasks_with_flavor");
+ T_QUIET; T_ASSERT_GE(tcnt, 1, "should have at least 1 task");
+ mach_port_deallocate(mach_task_self(), pset_priv);
+
+ found_self = false;
+ for (size_t i = 0; i < tcnt; i++) {
+ if (taskList[i] == mach_task_self()) {
+ found_self = true;
+ break;
+ }
+ }
+
+ T_EXPECT_TRUE(found_self, " processor_set_tasks() should return immovable task self");
+
+ for (size_t i = 0; i < tcnt; i++) {
+ mach_port_deallocate(mach_task_self(), taskList[i]);
+ }
+
+ if (tcnt > 0) {
+ mach_vm_deallocate(mach_task_self(),
+ (mach_vm_address_t)taskList,
+ tcnt * sizeof(mach_port_t));
+ }
+}
+
+T_DECL(imm_pinned_control_port, "Test pinned & immovable task and thread control ports",
+ T_META_IGNORECRASHES(".*pinned_rights_child.*"),
+ T_META_CHECK_LEAKS(false))
+{
+ uint32_t task_exc_guard = 0;
+ size_t te_size = sizeof(&task_exc_guard);
+ posix_spawnattr_t attrs;
+ char *test_prog_name = "./imm_pinned_control_port_crasher";
+ char *child_args[MAX_ARGV];
+ pid_t client_pid = 0;
+ uint32_t opts = 0;
+ size_t size = sizeof(&opts);
+ mach_port_t exc_port;
+ pthread_t s_exc_thread;
+ uint64_t exc_id;
+
+ T_LOG("Check if task_exc_guard exception has been enabled\n");
+ int ret = sysctlbyname("kern.task_exc_guard_default", &task_exc_guard, &te_size, NULL, 0);
+ T_ASSERT_EQ(ret, 0, "sysctlbyname");
+
+ if (!(task_exc_guard & TASK_EXC_GUARD_MP_DELIVER)) {
+ T_SKIP("task_exc_guard exception is not enabled");
+ }
+
+ T_LOG("Check if immovable control port has been enabled\n");
+ ret = sysctlbyname("kern.ipc_control_port_options", &opts, &size, NULL, 0);
+
+ if (!ret && (opts & 0x30) == 0) {
+ T_SKIP("immovable control port isn't enabled");
+ }
+
+ /* first, try out comparing various task/thread ports */
+ test_task_thread_port_values();
+
+ /* try stashing immovable ports: rdar://70585367 */
+ test_immovable_port_stashing();
+
+ /* spawn a child and see if EXC_GUARD are correctly generated */
+ for (int i = 0; i < MAX_TEST_NUM; i++) {
+ /* Create the exception port for the child */
+ exc_port = alloc_exception_port();
+ T_QUIET; T_ASSERT_NE(exc_port, MACH_PORT_NULL, "Create a new exception port");
+
+ /* Create exception serving thread */
+ ret = pthread_create(&s_exc_thread, NULL, exception_server_thread, &exc_port);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "pthread_create exception_server_thread");
+
+ /* Initialize posix_spawn attributes */
+ posix_spawnattr_init(&attrs);
+
+ int err = posix_spawnattr_setexceptionports_np(&attrs, EXC_MASK_GUARD | EXC_MASK_CORPSE_NOTIFY, exc_port,
+ (exception_behavior_t) (EXCEPTION_DEFAULT | MACH_EXCEPTION_CODES), 0);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(err, "posix_spawnattr_setflags");
+
+ child_args[0] = test_prog_name;
+ char test_num[10];
+ sprintf(test_num, "%d", i);
+ child_args[1] = test_num;
+ child_args[2] = NULL;
+
+ T_LOG("========== Spawning new child ==========");
+ err = posix_spawn(&client_pid, child_args[0], NULL, &attrs, &child_args[0], environ);
+ T_ASSERT_POSIX_SUCCESS(err, "posix_spawn control_port_options_client = %d test_num = %d", client_pid, i);
+
+ /* try extracting child task port: rdar://71744817
+ * Moved to tests/extract_right_soft_fail.c
+ */
+ // test_extract_immovable_task_port(client_pid);
+
+ int child_status;
+ /* Wait for child and check for exception */
+ if (-1 == waitpid(-1, &child_status, 0)) {
+ T_FAIL("waitpid: child mia");
+ }
+
+ if (WIFEXITED(child_status) && WEXITSTATUS(child_status)) {
+ T_FAIL("Child exited with status = %x", child_status);
+ T_END;
+ }
+
+ sleep(1);
+ kill(1, SIGKILL);
+
+ ret = pthread_join(s_exc_thread, NULL);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "pthread_join");
+
+ if (exception_taken == EXC_GUARD) {
+ exc_id = exception_code >> EXC_CODE_SHIFT;
+ } else {
+ exc_id = exception_code;
+ }
+
+ T_LOG("Exception code: Received code = 0x%llx Expected code = 0x%llx", exc_id, test_exception_code[i]);
+ T_EXPECT_EQ(exc_id, test_exception_code[i], "Exception code: Received == Expected");
+ }
+}
--- /dev/null
+#include <mach/mach.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <assert.h>
+
+/*
+ * DO NOT run this test file by itself.
+ * This test is meant to be invoked by control_port_options darwintest.
+ *
+ * If hard enforcement for pinned control port is on, pinned_test_main_thread_mod_ref-5 are
+ * expected to generate fatal EXC_GUARD.
+ *
+ * If hard enforcement for immovable control port is on, immovable_test_move_send_task_self-13 are
+ * expected to generate fatal EXC_GUARD.
+ *
+ * The type of exception raised (if any) is checked on control_port_options side.
+ */
+#define MAX_TEST_NUM 13
+
+static int
+attempt_send_immovable_port(mach_port_name_t port, mach_msg_type_name_t disp)
+{
+ mach_port_t server;
+ kern_return_t kr;
+ kr = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &server);
+ assert(kr == 0);
+
+ kr = mach_port_insert_right(mach_task_self(), server, server, MACH_MSG_TYPE_MAKE_SEND);
+ assert(kr == 0);
+
+ struct {
+ mach_msg_header_t header;
+ mach_msg_body_t body;
+ mach_msg_port_descriptor_t desc;
+ } msg;
+
+ msg.header.msgh_remote_port = server;
+ msg.header.msgh_local_port = MACH_PORT_NULL;
+ msg.header.msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, 0) | MACH_MSGH_BITS_COMPLEX;
+ msg.header.msgh_size = sizeof msg;
+
+ msg.body.msgh_descriptor_count = 1;
+
+ msg.desc.name = port;
+ msg.desc.disposition = disp;
+ msg.desc.type = MACH_MSG_PORT_DESCRIPTOR;
+
+ return mach_msg_send(&msg.header);
+}
+
+static void
+pinned_test_main_thread_mod_ref()
+{
+ printf("[Crasher]: Mod refs main thread's self port to 0\n");
+ mach_port_t thread_self = mach_thread_self();
+ kern_return_t kr = mach_port_mod_refs(mach_task_self(), thread_self, MACH_PORT_RIGHT_SEND, -2);
+
+ printf("[Crasher pinned_test_main_thread_mod_ref] mach_port_mod_refs returned %s \n.", mach_error_string(kr));
+}
+
+static void*
+pthread_run()
+{
+ printf("[Crasher]: Deallocate pthread_self\n");
+ mach_port_t th_self = pthread_mach_thread_np(pthread_self());
+ kern_return_t kr = mach_port_deallocate(mach_task_self(), th_self);
+
+ printf("[Crasher pinned_test_pthread_dealloc] mach_port_deallocate returned %s \n.", mach_error_string(kr));
+ return NULL;
+}
+
+static void
+pinned_test_pthread_dealloc()
+{
+ printf("[Crasher]: Create a pthread and deallocate its self port\n");
+ pthread_t thread;
+ int ret = pthread_create(&thread, NULL, pthread_run, NULL);
+ assert(ret == 0);
+ ret = pthread_join(thread, NULL);
+ assert(ret == 0);
+}
+
+static void
+pinned_test_task_self_dealloc()
+{
+ printf("[Crasher]: Deallocate mach_task_self twice\n");
+ mach_port_t task_self = mach_task_self();
+ kern_return_t kr = mach_port_deallocate(task_self, task_self);
+ assert(kr == 0);
+ kr = mach_port_deallocate(task_self, task_self);
+
+ printf("[Crasher pinned_test_task_self_dealloc] mach_port_deallocate returned %s \n.", mach_error_string(kr));
+}
+
+static void
+pinned_test_task_self_mod_ref()
+{
+ printf("[Crasher]: Mod refs mach_task_self() to 0\n");
+ kern_return_t kr = mach_port_mod_refs(mach_task_self(), mach_task_self(), MACH_PORT_RIGHT_SEND, -2);
+
+ printf("[Crasher pinned_test_task_self_mod_ref] mach_port_mod_refs returned %s \n.", mach_error_string(kr));
+}
+
+static void
+pinned_test_task_threads_mod_ref()
+{
+ printf("[Crasher]: task_threads should return pinned thread ports. Mod refs them to 0\n");
+ thread_array_t th_list;
+ mach_msg_type_number_t th_cnt;
+ kern_return_t kr;
+ mach_port_t th_kp = mach_thread_self();
+ mach_port_deallocate(mach_task_self(), th_kp);
+
+ kr = task_threads(mach_task_self(), &th_list, &th_cnt);
+ mach_port_deallocate(mach_task_self(), th_list[0]);
+
+ kr = mach_port_mod_refs(mach_task_self(), th_list[0], MACH_PORT_RIGHT_SEND, -1);
+
+ printf("[Crasher pinned_test_task_threads_mod_ref] mach_port_mod_refs returned %s \n.", mach_error_string(kr));
+}
+
+static void
+immovable_test_move_send_task_self()
+{
+ kern_return_t kr;
+ printf("[Crasher]: Move send mach_task_self_\n");
+ kr = attempt_send_immovable_port(mach_task_self(), MACH_MSG_TYPE_MOVE_SEND);
+
+ printf("[Crasher immovable_test_move_send_task_self] attempt_send_immovable_port returned %s \n.", mach_error_string(kr));
+}
+
+static void
+immovable_test_copy_send_task_self()
+{
+ kern_return_t kr;
+ printf("[Crasher]: Copy send mach_task_self_\n");
+ kr = attempt_send_immovable_port(mach_task_self(), MACH_MSG_TYPE_COPY_SEND);
+
+ printf("[Crasher immovable_test_copy_send_task_self] attempt_send_immovable_port returned %s \n.", mach_error_string(kr));
+}
+
+static void
+immovable_test_move_send_thread_self()
+{
+ kern_return_t kr;
+ printf("[Crasher]: Move send main thread's self port\n");
+ kr = attempt_send_immovable_port(mach_thread_self(), MACH_MSG_TYPE_MOVE_SEND);
+
+ printf("[Crasher immovable_test_move_send_thread_self] attempt_send_immovable_port returned %s \n.", mach_error_string(kr));
+}
+
+static void
+immovable_test_copy_send_thread_self()
+{
+ kern_return_t kr;
+ mach_port_t port;
+ printf("[Crasher]: Copy send main thread's self port\n");
+ port = mach_thread_self();
+ kr = attempt_send_immovable_port(port, MACH_MSG_TYPE_COPY_SEND);
+ printf("[Crasher immovable_test_copy_send_thread_self] attempt_send_immovable_port returned %s \n.", mach_error_string(kr));
+
+ mach_port_deallocate(mach_task_self(), port);
+}
+
+static void
+immovable_test_copy_send_task_read()
+{
+ kern_return_t kr;
+ mach_port_t port;
+ printf("[Crasher]: Copy send task read port\n");
+ kr = task_get_special_port(mach_task_self(), TASK_READ_PORT, &port);
+ assert(kr == 0);
+ kr = attempt_send_immovable_port(port, MACH_MSG_TYPE_COPY_SEND);
+ printf("[Crasher immovable_test_copy_send_task_read] attempt_send_immovable_port returned %s \n.", mach_error_string(kr));
+
+ mach_port_deallocate(mach_task_self(), port);
+}
+
+static void
+immovable_test_copy_send_task_inspect()
+{
+ kern_return_t kr;
+ mach_port_t port;
+ printf("[Crasher]: Move send task inspect port\n");
+ kr = task_get_special_port(mach_task_self(), TASK_INSPECT_PORT, &port);
+ assert(kr == 0);
+ kr = attempt_send_immovable_port(port, MACH_MSG_TYPE_MOVE_SEND);
+ printf("[Crasher immovable_test_copy_send_task_inspect] attempt_send_immovable_port returned %s \n.", mach_error_string(kr));
+}
+
+static void
+immovable_test_move_send_thread_inspect()
+{
+ kern_return_t kr;
+ mach_port_t port;
+ mach_port_t th_port = mach_thread_self();
+
+ printf("[Crasher]: Move send thread inspect port\n");
+ kr = thread_get_special_port(th_port, THREAD_INSPECT_PORT, &port);
+ assert(kr == 0);
+ kr = attempt_send_immovable_port(port, MACH_MSG_TYPE_MOVE_SEND);
+ printf("[Crasher immovable_test_move_send_thread_inspect] attempt_send_immovable_port returned %s \n.", mach_error_string(kr));
+
+ mach_port_deallocate(mach_task_self(), th_port);
+}
+
+static void
+immovable_test_copy_send_thread_read()
+{
+ kern_return_t kr;
+ mach_port_t port;
+ mach_port_t th_port = mach_thread_self();
+
+ printf("[Crasher]: Copy send thread read port\n");
+ kr = thread_get_special_port(th_port, THREAD_READ_PORT, &port);
+ assert(kr == 0);
+ kr = attempt_send_immovable_port(port, MACH_MSG_TYPE_COPY_SEND);
+ printf("[Crasher immovable_test_copy_send_thread_read] attempt_send_immovable_port returned %s \n.", mach_error_string(kr));
+
+ mach_port_deallocate(mach_task_self(), port);
+ mach_port_deallocate(mach_task_self(), th_port);
+}
+
+int
+main(int argc, char *argv[])
+{
+ void (*tests[MAX_TEST_NUM])(void) = {
+ pinned_test_main_thread_mod_ref,
+ pinned_test_pthread_dealloc,
+ pinned_test_task_self_dealloc,
+ pinned_test_task_self_mod_ref,
+ pinned_test_task_threads_mod_ref,
+
+ immovable_test_move_send_task_self,
+ immovable_test_copy_send_task_self,
+ immovable_test_move_send_thread_self,
+ immovable_test_copy_send_thread_self,
+ immovable_test_copy_send_task_read,
+ immovable_test_copy_send_task_inspect,
+ immovable_test_move_send_thread_inspect,
+ immovable_test_copy_send_thread_read,
+ };
+ printf("[Crasher]: My Pid: %d\n", getpid());
+
+ if (argc < 2) {
+ printf("[Crasher]: Specify a test to run.");
+ exit(-1);
+ }
+
+ int test_num = atoi(argv[1]);
+
+ if (test_num >= 0 && test_num < MAX_TEST_NUM) {
+ (*tests[test_num])();
+ } else {
+ printf("[Crasher]: Invalid test num. Exiting...\n");
+ exit(-1);
+ }
+
+ exit(0);
+}
+++ /dev/null
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <darwintest.h>
-#include <pthread.h>
-#include <signal.h>
-#include <libproc.h>
-#include <mach/mach.h>
-#include <mach/mach_vm.h>
-#include <mach/mach_error.h>
-#include <System/sys/codesign.h>
-#include <sys/proc.h>
-
-int task_for_pid(mach_port_name_t target_tport, int pid, mach_port_name_t *t);
-int task_read_for_pid(mach_port_name_t target_tport, int pid, mach_port_name_t *t);
-int task_inspect_for_pid(mach_port_name_t target_tport, int pid, mach_port_name_t *t);
-int task_name_for_pid(mach_port_name_t target_tport, int pid, mach_port_name_t *t);
-static int test_conversion_eval(pid_t current, pid_t victim, int translation);
-
-static int g_tfpFail = 0;
-static int g_trfpFail = 0;
-static int g_tifpFail = 0;
-static int g_tnfpFail = 0;
-
-static pthread_mutex_t g_lock;
-
-#define NAME 0
-#define INSPECT 1
-#define READ 2
-#define FULL 3
-#define POLY 4
-
-/*
- * 3. child still spawn as platform binary
- */
-
-/* Mimic the behavior of task_conversion_eval in kernel.
- */
-static int
-test_conversion_eval(pid_t current, pid_t victim, int translation)
-{
- uint32_t my_csflags = 0;
- uint32_t victim_csflags = 0;
- csops(victim, CS_OPS_STATUS, &victim_csflags, sizeof(victim_csflags));
- csops(current, CS_OPS_STATUS, &my_csflags, sizeof(my_csflags));
-
- switch (translation) {
- case FULL:
- case READ:
- if (victim == 0) {
- return false;
- }
- if (!(my_csflags & CS_PLATFORM_BINARY) && (victim_csflags & CS_PLATFORM_BINARY)) {
- return false;
- }
- break;
- default:
- break;
- }
-
- return true;
-}
-
-static void
-check_result(kern_return_t kr, int port_type, int translation, int low, char *test_str, pid_t victim)
-{
- char error[100];
-
- if (translation == POLY) {
- if (port_type == FULL) {
- translation = INSPECT;
- } else {
- translation = port_type;
- }
- }
-
- if (port_type < low) {
- goto fail;
- } else if (port_type < translation) {
- goto fail;
- } else if (!test_conversion_eval(getpid(), victim, translation)) {
- goto fail;
- } else {
- goto success;
- }
-
-fail:
- snprintf(error, sizeof(error), "%s should fail with %d on %d.\n", test_str, port_type, victim);
- T_QUIET; T_EXPECT_NE(kr, 0, "check_result: %s", error);
- return;
-success:
- snprintf(error, sizeof(error), "%s should succeed with %d on %d.\n", test_str, port_type, victim);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "check_result: %s", error);
- return;
-}
-
-static void
-test_thread_port(mach_port_name_t thread, int type, pid_t victim)
-{
- kern_return_t kr;
- mach_port_t name = MACH_PORT_NULL;
- thread_info_data_t th_info;
- mach_msg_type_number_t th_info_cnt = THREAD_INFO_MAX;
-
- kr = thread_info(thread, THREAD_BASIC_INFO, (thread_info_t)th_info, &th_info_cnt);
- check_result(kr, type, INSPECT, INSPECT, "thread_info", victim);
-
- kr = thread_get_special_port(thread, THREAD_KERNEL_PORT, &name);
- check_result(kr, type, POLY, FULL, "thread_get_special_port: THREAD_KERNEL_PORT", victim);
- kr = mach_port_deallocate(mach_task_self(), name);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_deallocate");
-
- kr = thread_get_special_port(thread, THREAD_READ_PORT, &name);
- check_result(kr, type, POLY, READ, "thread_get_special_port: THREAD_READ_PORT", victim);
- kr = mach_port_deallocate(mach_task_self(), name);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_deallocate");
-
- kr = thread_get_special_port(thread, THREAD_INSPECT_PORT, &name);
- check_result(kr, type, POLY, INSPECT, "thread_get_special_port: THREAD_INSPECT_PORT", victim);
- kr = mach_port_deallocate(mach_task_self(), name);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_deallocate");
-}
-
-static void
-test_task_port(mach_port_name_t port, int type)
-{
- kern_return_t kr;
- volatile int data = 0x4141;
- volatile int new_value = 0x4242;
- pid_t victim;
- if (port == MACH_PORT_NULL) {
- return;
- }
- kr = pid_for_task(port, &victim);
- if (victim == -1) {
- T_LOG("pid_for_task: port = 0x%x, type = %u is not valid anymore", port, type);
- return;
- }
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "pid_for_task, port = 0x%x, type = %u, pid = %u", port, type, victim);
-
- /************* TASK_INFO ************/
- struct task_basic_info info = {};
- mach_msg_type_number_t cnt = sizeof(info);
- kr = task_info(port, TASK_BASIC_INFO, (task_info_t)&info, &cnt);
- check_result(kr, type, NAME, NAME, "task_info", victim);
-
- /************ MACH_VM_* ************/
-
- if (victim == getpid()) {
- kr = mach_vm_write(port,
- (mach_vm_address_t)&data,
- (vm_offset_t)&new_value,
- (mach_msg_type_number_t)sizeof(int));
- check_result(kr, type, FULL, FULL, "mach_vm_write", victim);
-
- vm_offset_t read_value = 0;
- mach_msg_type_number_t read_cnt = 0;
- kr = mach_vm_read(port,
- (mach_vm_address_t)&data,
- (mach_msg_type_number_t)sizeof(int),
- &read_value,
- &read_cnt);
- check_result(kr, type, READ, READ, "mach_vm_read", victim);
- }
-
- /************ TASK_GET_SPECIAL_PORT ************/
-
- mach_port_t name = MACH_PORT_NULL;
- kr = task_get_special_port(port, TASK_KERNEL_PORT, &name);
- check_result(kr, type, POLY, FULL, "task_get_special_port: TASK_KERNEL_PORT", victim);
-
- name = MACH_PORT_NULL;
- kr = task_get_special_port(port, TASK_READ_PORT, &name);
- check_result(kr, type, POLY, READ, "task_get_special_port: TASK_READ_PORT", victim);
- if (kr == KERN_SUCCESS) {
- kr = mach_port_deallocate(mach_task_self(), name);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_deallocate");
- }
-
- name = MACH_PORT_NULL;
- kr = task_get_special_port(port, TASK_INSPECT_PORT, &name);
- check_result(kr, type, POLY, INSPECT, "task_get_special_port: TASK_INSPECT_PORT", victim);
- if (kr == KERN_SUCCESS) {
- kr = mach_port_deallocate(mach_task_self(), name);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_deallocate");
- }
-
- name = MACH_PORT_NULL;
- kr = task_get_special_port(port, TASK_NAME_PORT, &name);
- check_result(kr, type, POLY, INSPECT, "task_get_special_port: TASK_NAME_PORT", victim);
- if (kr == KERN_SUCCESS) {
- kr = mach_port_deallocate(mach_task_self(), name);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_deallocate");
- }
-
- name = MACH_PORT_NULL;
- kr = task_get_special_port(port, TASK_HOST_PORT, &name);
- check_result(kr, type, POLY, FULL, "task_get_special_port: TASK_HOST_PORT", victim);
- if (kr == KERN_SUCCESS) {
- if (victim == getpid()) {
- mach_port_t host = mach_host_self();
- T_QUIET; T_EXPECT_EQ(host, name, "mach_host_self == task_get_special_port(.. TASK_HOST_PORT)");
- }
- }
-
- name = MACH_PORT_NULL;
- kr = task_get_special_port(port, TASK_BOOTSTRAP_PORT, &name);
- check_result(kr, type, POLY, FULL, "task_get_special_port: TASK_BOOTSTRAP_PORT", victim);
-
- /************ TEST IPC_SPACE_READ AND IPC_SPACE_INSPECT ************/
- if (victim == getpid()) {
- mach_port_status_t status;
- mach_msg_type_number_t statusCnt = MACH_PORT_LIMITS_INFO_COUNT;
- kr = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &name);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, 0, "mach_port_allocate should succeed");
-
- kr = mach_port_get_attributes(port, name, MACH_PORT_LIMITS_INFO, (mach_port_info_t)&status, &statusCnt);
- check_result(kr, type, POLY, READ, "mach_port_get_attributes", victim);
-
- mach_port_context_t context;
- kr = mach_port_get_context(port, name, &context);
- check_result(kr, type, POLY, READ, "mach_port_get_context", victim);
-
- kr = mach_port_destruct(mach_task_self(), name, 0, 0);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_destruct");
- }
-
- ipc_info_space_basic_t sinfo;
- kr = mach_port_space_basic_info(port, &sinfo);
- check_result(kr, type, INSPECT, INSPECT, "mach_port_space_basic_info", victim);
-
- /************ MACH_PORT_ALLOCATE ************/
-
- mach_port_t new_port = MACH_PORT_NULL;
- kr = mach_port_allocate(port, MACH_PORT_RIGHT_RECEIVE, &new_port);
- check_result(kr, type, FULL, FULL, "mach_port_allocate", victim);
- if (kr == KERN_SUCCESS) {
- kr = mach_port_destruct(port, new_port, 0, 0);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_destruct");
- }
-
- /************ INSPECT INTERFACES ************/
- int counts[2];
- mach_msg_type_number_t size = TASK_INSPECT_BASIC_COUNTS_COUNT;
- kr = task_inspect(port, TASK_INSPECT_BASIC_COUNTS, counts, &size);
- check_result(kr, type, INSPECT, INSPECT, "task_inspect", victim);
-
- /************ TASK_SET_SPECIAL_PORT ************/
-
- if (type == FULL) {
- new_port = MACH_PORT_NULL;
- kr = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &new_port);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_allocate");
- kr = mach_port_insert_right(mach_task_self(), new_port, new_port, MACH_MSG_TYPE_MAKE_SEND);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_insert_right");
-
- mach_port_t backup;
- kr = task_get_special_port(port, TASK_BOOTSTRAP_PORT, &backup);
- check_result(kr, type, POLY, FULL, "task_get_special_port", victim);
- kr = task_set_special_port(port, TASK_BOOTSTRAP_PORT, new_port);
- check_result(kr, type, FULL, FULL, "task_set_special_port", victim);
- kr = task_set_special_port(port, TASK_BOOTSTRAP_PORT, backup);
- check_result(kr, type, FULL, FULL, "task_set_special_port", victim);
-
- kr = mach_port_deallocate(mach_task_self(), new_port);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_deallocate");
- mach_port_mod_refs(mach_task_self(), new_port, MACH_PORT_RIGHT_RECEIVE, -1);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_mod_refs");
- }
- /************ TASK_THREADS ************/
- thread_array_t th_list;
- mach_msg_type_number_t th_cnt = 0;
-
- kr = task_threads(port, &th_list, &th_cnt);
- check_result(kr, type, POLY, INSPECT, "task_threads", victim);
-
- /* Skip thread ports tests if task_threads() fails */
- if (kr != KERN_SUCCESS) {
- return;
- }
-
- /************ THREAD_GET_SPECIAL_PORT ************/
- mach_port_t special = MACH_PORT_NULL;
-
- switch (type) {
- case FULL:
- kr = thread_get_special_port(th_list[0], THREAD_KERNEL_PORT, &special);
- break;
- case READ:
- kr = thread_get_special_port(th_list[0], THREAD_READ_PORT, &special);
- break;
- case INSPECT:
- kr = thread_get_special_port(th_list[0], THREAD_INSPECT_PORT, &special);
- break;
- default:
- break;
- }
-
- T_QUIET; T_EXPECT_EQ(special, th_list[0], "thread_get_special_port should match task_threads");
-
- kr = mach_port_deallocate(mach_task_self(), special);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_deallocate");
-
- for (unsigned int i = 0; i < th_cnt; i++) {
- test_thread_port(th_list[i], type, victim); /* polymorphic */
- kr = mach_port_deallocate(mach_task_self(), th_list[i]);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_deallocate");
- }
-}
-
-static void
-test_get_child_port(int with_sleep)
-{
- pid_t child_pid;
- kern_return_t kr;
- mach_port_name_t tr, ti, tp, tn;
-
- child_pid = fork();
-
- if (child_pid < 0) {
- T_FAIL("fork failed in test_get_child_port.");
- }
-
- if (child_pid == 0) {
- while (1) {
- sleep(10);
- }
- }
-
- kr = task_for_pid(mach_task_self(), child_pid, &tp);
- if (with_sleep) {
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "task_for_pid for child %u", child_pid);
- } else if (kr != 0) {
- g_tfpFail++;
- }
-
- kr = task_read_for_pid(mach_task_self(), child_pid, &tr);
- if (with_sleep) {
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "task_read_for_pid for child %u", child_pid);
- } else if (kr != 0) {
- g_trfpFail++;
- }
-
- kr = task_inspect_for_pid(mach_task_self(), child_pid, &ti);
- if (with_sleep) {
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "task_inspect_for_pid for child %u", child_pid);
- } else if (kr != 0) {
- g_tifpFail++;
- }
-
- kr = task_name_for_pid(mach_task_self(), child_pid, &tn);
- if (with_sleep) {
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "task_name_for_pid for child %u", child_pid);
- } else if (kr != 0) {
- g_tnfpFail++;
- }
-
- kr = mach_port_deallocate(mach_task_self(), tp);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_deallocate");
- kr = mach_port_deallocate(mach_task_self(), tr);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_deallocate");
- kr = mach_port_deallocate(mach_task_self(), ti);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_deallocate");
- kr = mach_port_deallocate(mach_task_self(), tn);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_deallocate");
-
- kill(child_pid, SIGKILL);
- int status;
- wait(&status);
-}
-
-static void
-test_child_exec()
-{
- pid_t child_pid;
- kern_return_t kr;
- mach_port_name_t tr2, ti2, tp2, tn2;
-
- child_pid = fork();
-
- if (child_pid < 0) {
- T_FAIL("fork failed in test_child_exec.");
- }
-
- if (child_pid == 0) {
- execve("/bin/bash", NULL, NULL);
- }
-
- sleep(10);
-
- kr = task_name_for_pid(mach_task_self(), child_pid, &tn2);
- test_task_port(tn2, NAME);
-
- kr = task_for_pid(mach_task_self(), child_pid, &tp2);
- test_task_port(tp2, FULL);
-
- kr = task_read_for_pid(mach_task_self(), child_pid, &tr2);
- test_task_port(tr2, READ);
-
- kr = task_inspect_for_pid(mach_task_self(), child_pid, &ti2);
- test_task_port(ti2, INSPECT);
-
- kr = mach_port_deallocate(mach_task_self(), tp2);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_deallocate");
- kr = mach_port_deallocate(mach_task_self(), tr2);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_deallocate");
- kr = mach_port_deallocate(mach_task_self(), ti2);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_deallocate");
- kr = mach_port_deallocate(mach_task_self(), tn2);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_deallocate");
-
- kill(child_pid, SIGKILL);
- int status;
- wait(&status);
-}
-
-static void *
-thread_run()
-{
- pthread_mutex_lock(&g_lock);
- pthread_mutex_unlock(&g_lock);
-
- pthread_exit(NULL);
-
- return NULL;
-}
-
-#ifdef T_NOCODESIGN
-#define TEST_NAME inspect_read_port_nocodesign
-#else
-#define TEST_NAME inspect_read_port
-#endif
-
-T_DECL(TEST_NAME, "inspect and read port test", T_META_ASROOT(true))
-{
- kern_return_t kr;
- pid_t pid = 0;
- mach_port_t port = MACH_PORT_NULL;
-
- kr = pid_for_task(mach_task_self(), &pid);
- T_EXPECT_MACH_SUCCESS(kr, "pid_for_task: My Pid = %d", pid);
-
-#ifdef T_NOCODESIGN
- T_LOG("Running as non-platform binary...\n");
-#else
- T_LOG("Running as platform binary...\n");
-#endif
-
- kr = task_for_pid(mach_task_self(), pid, &port);
- T_EXPECT_EQ(kr, 0, "task_for_pid(mach_task_self..): %u", port);
- T_EXPECT_EQ(port, mach_task_self(), "task_for_pid == mach_task_self");
- test_task_port(port, FULL);
-
- port = MACH_PORT_NULL;
- kr = task_read_for_pid(mach_task_self(), pid, &port);
- T_EXPECT_EQ(kr, 0, "task_read_for_pid(mach_task_self..): read port = %u", port);
- test_task_port(port, READ);
- kr = mach_port_deallocate(mach_task_self(), port);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_deallocate");
-
- port = MACH_PORT_NULL;
- kr = task_inspect_for_pid(mach_task_self(), pid, &port);
- T_EXPECT_EQ(kr, 0, "task_inspect_for_pid(mach_task_self..): inspect port = %u", port);
- test_task_port(port, INSPECT);
- kr = mach_port_deallocate(mach_task_self(), port);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_deallocate");
-
- port = MACH_PORT_NULL;
- kr = task_name_for_pid(mach_task_self(), pid, &port);
- T_EXPECT_EQ(kr, 0, "task_name_for_pid(mach_task_self..): name port = %u", port);
- test_task_port(port, NAME);
- kr = mach_port_deallocate(mach_task_self(), port);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_deallocate");
-
- port = MACH_PORT_NULL;
- kr = task_read_for_pid(mach_task_self(), 0, &port);
- T_EXPECT_NE(kr, 0, "task_read_for_pid for kernel should fail");
-
- /* task_read_for_pid loop, check for leaks */
- for (int i = 0; i < 0x1000; i++) {
- kr = task_read_for_pid(mach_task_self(), pid, &port);
- test_task_port(port, READ);
- kr = mach_port_deallocate(mach_task_self(), port);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_deallocate");
- }
-
- /* task_inspect_for_pid loop, check for leaks */
- for (int i = 0; i < 0x1000; i++) {
- kr = task_inspect_for_pid(mach_task_self(), pid, &port);
- test_task_port(port, INSPECT);
- kr = mach_port_deallocate(mach_task_self(), port);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_deallocate");
- }
-
- /* fork-exec a child process */
- test_child_exec();
-
- /* fork, get full/read/inspect/name port for the child then kill it */
- for (int i = 0; i < 10; i++) {
- test_get_child_port(TRUE);
- }
-
- T_LOG("tfp fail: %d, trfp fail: %d, tifp fail: %d, tnfp fail: %d, TOTAL: 10\n",
- g_tfpFail, g_trfpFail, g_tifpFail, g_tnfpFail);
-
-
- /* task thread loop, check for leaks */
- thread_array_t th_list;
- mach_msg_type_number_t th_cnt;
- pthread_t thread;
-
- pthread_mutex_init(&g_lock, NULL);
- pthread_mutex_lock(&g_lock);
-
- for (unsigned i = 0; i < 0x100; i++) {
- pthread_create(&thread, NULL, thread_run, NULL);
- }
-
- for (unsigned i = 0; i < 0x1000; i++) {
- kr = task_threads(mach_task_self(), &th_list, &th_cnt);
- T_QUIET; T_ASSERT_EQ(th_cnt, 0x101, "257 threads");
-
- for (unsigned j = 0; j < th_cnt; j++) {
- kr = mach_port_deallocate(mach_task_self(), th_list[j]);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_deallocate");
- }
- }
- pthread_mutex_unlock(&g_lock);
-
- /* processor_set_tasks_with_flavor */
-
- processor_set_name_array_t psets;
- processor_set_t pset;
- task_array_t tasks;
- mach_msg_type_number_t pcnt, tcnt;
- mach_port_t host = mach_host_self();
-
- kr = host_processor_sets(host, &psets, &pcnt);
- kr = host_processor_set_priv(host, psets[0], &pset);
-
- kr = processor_set_tasks_with_flavor(pset, TASK_FLAVOR_CONTROL, &tasks, &tcnt);
- T_EXPECT_EQ(kr, 0, "processor_set_tasks_with_flavor: TASK_FLAVOR_CONTROL should succeed");
- for (unsigned int i = 0; i < tcnt; i++) {
- test_task_port(tasks[i], FULL);
- kr = mach_port_deallocate(mach_task_self(), tasks[i]);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_deallocate");
- }
-
- kr = processor_set_tasks_with_flavor(pset, TASK_FLAVOR_READ, &tasks, &tcnt);
- T_EXPECT_EQ(kr, 0, "processor_set_tasks_with_flavor: TASK_FLAVOR_READ should succeed");
- for (unsigned int i = 0; i < tcnt; i++) {
- test_task_port(tasks[i], READ);
- kr = mach_port_deallocate(mach_task_self(), tasks[i]);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_deallocate");
- }
-
- kr = processor_set_tasks_with_flavor(pset, TASK_FLAVOR_INSPECT, &tasks, &tcnt);
- T_EXPECT_EQ(kr, 0, "processor_set_tasks_with_flavor: TASK_FLAVOR_INSPECT should succeed");
- for (unsigned int i = 0; i < tcnt; i++) {
- test_task_port(tasks[i], INSPECT);
- kr = mach_port_deallocate(mach_task_self(), tasks[i]);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_deallocate");
- }
-
- kr = processor_set_tasks_with_flavor(pset, TASK_FLAVOR_NAME, &tasks, &tcnt);
- T_EXPECT_EQ(kr, 0, "processor_set_tasks_with_flavor: TASK_FLAVOR_NAME should succeed");
- for (unsigned int i = 0; i < tcnt; i++) {
- test_task_port(tasks[i], NAME);
- kr = mach_port_deallocate(mach_task_self(), tasks[i]);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_deallocate");
- }
-
- // Cleanup
- for (unsigned int i = 0; i < pcnt; i++) {
- kr = mach_port_deallocate(mach_task_self(), psets[i]);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_deallocate");
- }
-
- kr = mach_port_deallocate(mach_task_self(), pset);
- T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_port_deallocate");
-}
--- /dev/null
+#include <darwintest.h>
+#include <darwintest_multiprocess.h>
+#include <launch.h>
+#include <servers/bootstrap.h>
+#include <sys/sysctl.h>
+#include "exc_helpers.h"
+
+T_GLOBAL_META(
+ T_META_NAMESPACE("xnu.ipc"),
+ T_META_RUN_CONCURRENTLY(true));
+
+#pragma mark - helpers
+
+#define SERVICE_NAME "com.apple.xnu.test.mach_port"
+
+struct one_port_msg {
+ mach_msg_header_t header;
+ mach_msg_body_t body;
+ mach_msg_port_descriptor_t port_descriptor;
+ mach_msg_trailer_t trailer; // subtract this when sending
+};
+
+static mach_port_t
+server_checkin(void)
+{
+ mach_port_t mp;
+ kern_return_t kr;
+
+ kr = bootstrap_check_in(bootstrap_port, SERVICE_NAME, &mp);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "bootstrap_check_in");
+ return mp;
+}
+
+static mach_port_t
+server_lookup(void)
+{
+ mach_port_t mp;
+ kern_return_t kr;
+
+ kr = bootstrap_look_up(bootstrap_port, SERVICE_NAME, &mp);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "bootstrap_look_up");
+ return mp;
+}
+
+static mach_port_t
+make_sr_port(void)
+{
+ mach_port_options_t opts = {
+ .flags = MPO_INSERT_SEND_RIGHT,
+ };
+ kern_return_t kr;
+ mach_port_t port;
+
+ kr = mach_port_construct(mach_task_self(), &opts, 0ull, &port);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_port_construct");
+ return port;
+}
+
+static void
+destroy_port(mach_port_t port, bool receive, int srights)
+{
+ kern_return_t kr;
+
+ if (srights) {
+ kr = mach_port_mod_refs(mach_task_self(), port,
+ MACH_PORT_RIGHT_SEND, -srights);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "srights -= %d", srights);
+ }
+ if (receive) {
+ kr = mach_port_mod_refs(mach_task_self(), port,
+ MACH_PORT_RIGHT_RECEIVE, -1);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "receive -= 1");
+ }
+}
+
+static void
+send_port(
+ mach_msg_id_t id,
+ mach_port_t dest,
+ mach_port_t right,
+ mach_msg_type_name_t disp)
+{
+ struct one_port_msg msg = {
+ .header = {
+ .msgh_remote_port = dest,
+ .msgh_bits = MACH_MSGH_BITS_SET(MACH_MSG_TYPE_COPY_SEND,
+ 0, MACH_MSG_TYPE_MOVE_SEND, MACH_MSGH_BITS_COMPLEX),
+ .msgh_id = id,
+ .msgh_size = offsetof(struct one_port_msg, trailer),
+ },
+ .body = {
+ .msgh_descriptor_count = 1,
+ },
+ .port_descriptor = {
+ .name = right,
+ .disposition = disp,
+ .type = MACH_MSG_PORT_DESCRIPTOR,
+ },
+ };
+ kern_return_t kr;
+
+ kr = mach_msg(&msg.header, MACH_SEND_MSG | MACH_SEND_TIMEOUT,
+ msg.header.msgh_size, 0, MACH_PORT_NULL, 10000, 0);
+
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "send(%d)", id);
+}
+
+#pragma mark - basic test about right deduplication
+
+static mach_port_t
+receive_port(
+ mach_msg_id_t expected_id,
+ mach_port_t rcv_port,
+ mach_msg_type_name_t expected_disp)
+{
+ struct one_port_msg msg = { };
+ kern_return_t kr;
+
+ T_LOG("waiting for message %d", expected_id);
+ kr = mach_msg(&msg.header, MACH_RCV_MSG, 0,
+ sizeof(msg), rcv_port, 0, 0);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "receive(%d)", expected_id);
+ T_QUIET; T_ASSERT_EQ(msg.header.msgh_id, expected_id, "message id matches");
+ T_QUIET; T_ASSERT_NE(msg.header.msgh_bits & MACH_MSGH_BITS_COMPLEX, 0,
+ "message is complex");
+ T_QUIET; T_ASSERT_EQ(msg.body.msgh_descriptor_count, 1, "message has one right");
+ T_QUIET; T_ASSERT_EQ(msg.port_descriptor.disposition, expected_disp,
+ "port has right disposition");
+ return msg.port_descriptor.name;
+}
+
+T_HELPER_DECL(right_dedup_server, "right_dedup_server")
+{
+ mach_port_t svc_port = server_checkin();
+ mach_port_t ports[3];
+
+ ports[0] = receive_port(1, svc_port, MACH_MSG_TYPE_MOVE_RECEIVE);
+ ports[1] = receive_port(2, svc_port, MACH_MSG_TYPE_MOVE_SEND);
+ ports[2] = receive_port(3, svc_port, MACH_MSG_TYPE_MOVE_SEND);
+ T_ASSERT_EQ(ports[0], ports[1], "receive, send, send");
+ T_ASSERT_EQ(ports[0], ports[2], "receive, send, send");
+ destroy_port(ports[0], true, 2);
+
+ ports[0] = receive_port(4, svc_port, MACH_MSG_TYPE_MOVE_SEND);
+ ports[1] = receive_port(5, svc_port, MACH_MSG_TYPE_MOVE_RECEIVE);
+ ports[2] = receive_port(6, svc_port, MACH_MSG_TYPE_MOVE_SEND);
+ T_ASSERT_EQ(ports[0], ports[1], "send, receive, send");
+ T_ASSERT_EQ(ports[0], ports[2], "send, receive, send");
+ destroy_port(ports[0], true, 2);
+
+ ports[0] = receive_port(7, svc_port, MACH_MSG_TYPE_MOVE_SEND);
+ ports[1] = receive_port(8, svc_port, MACH_MSG_TYPE_MOVE_SEND);
+ ports[2] = receive_port(9, svc_port, MACH_MSG_TYPE_MOVE_RECEIVE);
+ T_ASSERT_EQ(ports[0], ports[1], "send, send, receive");
+ T_ASSERT_EQ(ports[0], ports[2], "send, send, receive");
+ destroy_port(ports[0], true, 2);
+
+ T_END;
+}
+
+T_HELPER_DECL(right_dedup_client, "right_dedup_client")
+{
+ mach_port_t svc_port = server_lookup();
+ mach_port_t port;
+
+ port = make_sr_port();
+ send_port(1, svc_port, port, MACH_MSG_TYPE_MOVE_RECEIVE);
+ send_port(2, svc_port, port, MACH_MSG_TYPE_COPY_SEND);
+ send_port(3, svc_port, port, MACH_MSG_TYPE_MOVE_SEND);
+
+ port = make_sr_port();
+ send_port(4, svc_port, port, MACH_MSG_TYPE_COPY_SEND);
+ send_port(5, svc_port, port, MACH_MSG_TYPE_MOVE_RECEIVE);
+ send_port(6, svc_port, port, MACH_MSG_TYPE_MOVE_SEND);
+
+ port = make_sr_port();
+ send_port(7, svc_port, port, MACH_MSG_TYPE_COPY_SEND);
+ send_port(8, svc_port, port, MACH_MSG_TYPE_MOVE_SEND);
+ send_port(9, svc_port, port, MACH_MSG_TYPE_MOVE_RECEIVE);
+}
+
+T_DECL(right_dedup, "make sure right deduplication works")
+{
+ dt_helper_t helpers[] = {
+ dt_launchd_helper_domain("com.apple.xnu.test.mach_port.plist",
+ "right_dedup_server", NULL, LAUNCH_SYSTEM_DOMAIN),
+ dt_fork_helper("right_dedup_client"),
+ };
+ dt_run_helpers(helpers, 2, 600);
+}
#include <stdint.h>
#include "ktrace_helpers.h"
+#include "test_utils.h"
T_GLOBAL_META(
T_META_NAMESPACE("xnu.ktrace"),
BSDDBG_CODE(DBG_BSD_KDEBUG_TEST, 19),
};
-static bool
-is_development_kernel(void)
-{
- static dispatch_once_t is_development_once;
- static bool is_development;
-
- dispatch_once(&is_development_once, ^{
- int dev;
- size_t dev_size = sizeof(dev);
-
- T_QUIET;
- T_ASSERT_POSIX_SUCCESS(sysctlbyname("kern.development", &dev,
- &dev_size, NULL, 0), NULL);
- is_development = (dev != 0);
- });
-
- return is_development;
-}
-
static void
expect_event(struct trace_point *tp, const char *name, unsigned int *events,
const uint32_t *event_ids, size_t event_ids_len)
--- /dev/null
+#ifdef T_NAMESPACE
+#undef T_NAMESPACE
+#endif
+
+#include <darwintest.h>
+
+#include <mach/host_priv.h>
+#include <mach/mach.h>
+#include <mach/mach_types.h>
+#include <mach/mach_vm.h>
+#include <mach/processor_set.h>
+#include <mach/task.h>
+#include <sys/sysctl.h>
+#include <mach_debug/ipc_info.h>
+#include <unistd.h>
+
+T_GLOBAL_META(T_META_NAMESPACE("xnu.ipc"),
+ T_META_RUN_CONCURRENTLY(true));
+
+/*
+ * Attempt to inspect kernel_task using a task_inspect_t. Interact with the
+ * kernel in the same way top(1) and lsmp(1) do.
+ */
+
+static int found_kernel_task = 0;
+
+static void
+check_secure_kernel(void)
+{
+ int secure_kern = 0;
+ size_t secure_kern_size = sizeof(secure_kern);
+
+ T_ASSERT_POSIX_SUCCESS(sysctlbyname("kern.secure_kernel", &secure_kern,
+ &secure_kern_size, NULL, 0), NULL);
+
+ if (secure_kern) {
+ T_SKIP("secure kernel: processor_set_tasks will not return kernel_task");
+ }
+}
+
+static void
+attempt_kernel_inspection(task_t task)
+{
+ pid_t pid = (pid_t)-1;
+ mach_msg_type_number_t i, count, thcnt;
+ struct task_basic_info_64 ti;
+ thread_act_array_t threads;
+
+ if (pid_for_task(task, &pid)) {
+ return;
+ }
+
+ T_QUIET; T_LOG("Checking pid %d", pid);
+
+ if (pid != 0) {
+ return;
+ }
+
+ T_LOG("found kernel_task, attempting to inspect");
+ found_kernel_task++;
+
+ count = TASK_BASIC_INFO_64_COUNT;
+ T_EXPECT_MACH_SUCCESS(task_info(task, TASK_BASIC_INFO_64, (task_info_t)&ti,
+ &count), "task_info(... TASK_BASIC_INFO_64 ...)");
+
+ T_EXPECT_MACH_SUCCESS(task_threads(task, &threads, &thcnt), "task_threads");
+ T_LOG("Found %d kernel threads.", thcnt);
+ for (i = 0; i < thcnt; i++) {
+ kern_return_t kr;
+ thread_basic_info_data_t basic_info;
+ mach_msg_type_number_t bi_count = THREAD_BASIC_INFO_COUNT;
+
+ kr = thread_info(threads[i], THREAD_BASIC_INFO,
+ (thread_info_t)&basic_info, &bi_count);
+ /*
+ * Ignore threads that have gone away.
+ */
+ if (kr == MACH_SEND_INVALID_DEST) {
+ T_LOG("ignoring thread that has been destroyed");
+ continue;
+ }
+ T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "thread_info(... THREAD_BASIC_INFO ...)");
+
+ /* Now try out READ (skip eval) interfaces on kernel thread */
+ mach_msg_type_number_t msk_count = EXC_TYPES_COUNT;
+ exception_mask_t masks[EXC_TYPES_COUNT];
+ ipc_info_port_t ports_info[EXC_TYPES_COUNT];
+ exception_behavior_t behaviors[EXC_TYPES_COUNT];
+ thread_state_flavor_t flavors[EXC_TYPES_COUNT];
+ kr = thread_get_exception_ports_info(threads[i], EXC_MASK_ALL, masks, &msk_count, ports_info, behaviors, flavors);
+ T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "thread_get_exception_ports_info() on kernel thread: 0x%x", kr);
+
+ /* READ (with eval) interfaces should fail */
+ mach_port_t voucher;
+ kr = thread_get_mach_voucher(threads[i], 0, &voucher);
+ T_QUIET; T_EXPECT_EQ(kr, KERN_INVALID_ARGUMENT, "thread_get_mach_voucher() should fail with KERN_INVALID_ARGUMENT");
+
+ (void)mach_port_deallocate(mach_task_self(), threads[i]);
+ }
+ mach_vm_deallocate(mach_task_self(),
+ (mach_vm_address_t)(uintptr_t)threads,
+ thcnt * sizeof(*threads));
+
+ ipc_info_space_basic_t basic_info;
+ T_EXPECT_MACH_SUCCESS(mach_port_space_basic_info(task, &basic_info), "mach_port_space_basic_info");
+
+ ipc_info_space_t info_space;
+ ipc_info_name_array_t table;
+ ipc_info_tree_name_array_t tree;
+ mach_msg_type_number_t tblcnt = 0, treecnt = 0;
+ T_EXPECT_MACH_SUCCESS(mach_port_space_info(task, &info_space, &table,
+ &tblcnt, &tree, &treecnt), "mach_port_space_info");
+ if (tblcnt > 0) {
+ mach_vm_deallocate(mach_task_self(),
+ (mach_vm_address_t)(uintptr_t)table,
+ tblcnt * sizeof(*table));
+ }
+ if (treecnt > 0) {
+ mach_vm_deallocate(mach_task_self(),
+ (mach_vm_address_t)(uintptr_t)tree,
+ treecnt * sizeof(*tree));
+ }
+
+ /* Now try out READ (skip eval) interfaces on kernel task */
+ mach_msg_type_number_t msk_count = EXC_TYPES_COUNT;
+ exception_mask_t masks[EXC_TYPES_COUNT];
+ ipc_info_port_t ports_info[EXC_TYPES_COUNT];
+ exception_behavior_t behaviors[EXC_TYPES_COUNT];
+ thread_state_flavor_t flavors[EXC_TYPES_COUNT];
+ kern_return_t kr = task_get_exception_ports_info(task, EXC_MASK_ALL, masks, &msk_count, ports_info, behaviors, flavors);
+ T_EXPECT_MACH_SUCCESS(kr, "task_get_exception_ports_info() on kernel_task: 0x%x", kr);
+
+ /* READ (with eval) interfaces should fail */
+ vm_offset_t data;
+ mach_msg_type_number_t cnt;
+ mach_vm_address_t addr = 0x10000000; /* can be whatever, the call should fail before getting to VM */
+
+ kr = mach_vm_read(task, (mach_vm_address_t)addr, 8, &data, &cnt);
+ T_EXPECT_EQ(kr, KERN_INVALID_ARGUMENT, "mach_vm_read() should fail with KERN_INVALID_ARGUMENT");
+
+ mach_port_t voucher;
+ kr = task_get_mach_voucher(task, 0, &voucher);
+ T_EXPECT_EQ(kr, KERN_INVALID_TASK, "task_get_mach_voucher() should fail with KERN_INVALID_TASK");
+
+ /* Control interfaces should absolutely fail */
+ kr = task_set_mach_voucher(task, mach_task_self()); /* voucher arg is unused, can be whatever port */
+ T_EXPECT_EQ(kr, KERN_INVALID_TASK, "task_set_mach_voucher() should fail with KERN_INVALID_TASK");
+}
+
+T_DECL(inspect_kernel_task,
+ "ensure that kernel task can be inspected",
+ T_META_CHECK_LEAKS(false),
+ T_META_ASROOT(true))
+{
+ processor_set_name_array_t psets;
+ processor_set_t pset;
+ task_array_t tasks;
+ mach_msg_type_number_t i, j, tcnt, pcnt = 0;
+ mach_port_t self = mach_host_self();
+
+ check_secure_kernel();
+
+ T_ASSERT_MACH_SUCCESS(host_processor_sets(self, &psets, &pcnt),
+ NULL);
+
+ for (i = 0; i < pcnt; i++) {
+ T_ASSERT_MACH_SUCCESS(host_processor_set_priv(self, psets[i], &pset), NULL);
+ T_LOG("Checking pset %d/%d", i, pcnt - 1);
+
+ tcnt = 0;
+ T_LOG("Attempting kernel inspection with control port...");
+ T_ASSERT_MACH_SUCCESS(processor_set_tasks(pset, &tasks, &tcnt), NULL);
+
+ for (j = 0; j < tcnt; j++) {
+ attempt_kernel_inspection(tasks[j]);
+ mach_port_deallocate(self, tasks[j]);
+ }
+
+ /* free tasks array */
+ mach_vm_deallocate(mach_task_self(),
+ (mach_vm_address_t)(uintptr_t)tasks,
+ tcnt * sizeof(*tasks));
+
+ T_LOG("Attempting kernel inspection with read port...");
+ T_ASSERT_MACH_SUCCESS(processor_set_tasks_with_flavor(pset, TASK_FLAVOR_READ, &tasks, &tcnt), NULL);
+
+ for (j = 0; j < tcnt; j++) {
+ attempt_kernel_inspection(tasks[j]);
+ mach_port_deallocate(self, tasks[j]);
+ }
+
+ mach_vm_deallocate(mach_task_self(),
+ (mach_vm_address_t)(uintptr_t)tasks,
+ tcnt * sizeof(*tasks));
+
+ mach_port_deallocate(mach_task_self(), pset);
+ mach_port_deallocate(mach_task_self(), psets[i]);
+ }
+ mach_vm_deallocate(mach_task_self(),
+ (mach_vm_address_t)(uintptr_t)psets,
+ pcnt * sizeof(*psets));
+
+ if (found_kernel_task != 2) {
+ /* One for kernel control port test, one for kernel read port test. */
+ T_FAIL("could not find kernel_task in list of tasks returned");
+ }
+}
if ((filefd = open(test->t_watchfile, O_RDONLY | O_SYMLINK)) == -1) {
T_LOG("open() of watchfile %s failed: %d (%s)\n", test->t_watchfile,
errno, strerror(errno));
+ res = -1;
}
}
if (test->t_file_is_fifo) {
close(writefd);
}
- } else {
- T_LOG("Couldn't open test file %s to monitor: %d (%s)\n", test->t_watchfile);
- res = -1;
}
if (!test->t_is_poll_test) {
close(kqfd);
--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+ <key>MachServices</key>
+ <dict>
+ <key>com.apple.xnu.test.mach_port</key>
+ <dict>
+ <key>ResetAtClose</key>
+ <true/>
+ </dict>
+ </dict>
+ <key>ThrottleInterval</key>
+ <integer>1</integer>
+ <key>UserName</key>
+ <string>root</string>
+ <key>ProcessType</key>
+ <string>Adaptive</string>
+ <key>EnvironmentVariables</key>
+ <dict>
+ <key>MallocNanoZone</key>
+ <string>1</string>
+ </dict>
+ <key>LaunchOnlyOnce</key>
+ <true/>
+</dict>
+</plist>
--- /dev/null
+This Proof-of-Concept (PoC) is based on code from a security researcher
+(see rdar://70587638), and should not be used for any other purpose other
+than this test. In particular, this should not be used in other shipping
+code or as reference material to create shipping code without first checking
+with Apple Legal.
--- /dev/null
+#include <fcntl.h>
+#include <pthread.h>
+#include <signal.h>
+#include <sys/fcntl.h>
+#include <unistd.h>
+#include <darwintest.h>
+#include <darwintest_utils.h>
+
+T_GLOBAL_META(
+ T_META_NAMESPACE("xnu.ipc"),
+ T_META_RUN_CONCURRENTLY(TRUE));
+
+#define TMP_FILE_NAME "lockf_uaf_poc_70587638"
+
+static int fd0, fd1, fd2;
+
+static int other_failure = 0;
+static int other_failure_line = 0;
+
+static pthread_t thr0, thr1, thr2;
+
+#define RECORD_ERROR(err) do { \
+ if (other_failure_line == 0) { \
+ other_failure = (err); \
+ other_failure_line = __LINE__; \
+ } \
+} while (0);
+#define MYCHECK_ERRNO(res) do { \
+ if ((res) < 0) { \
+ RECORD_ERROR((errno)); \
+ return NULL; \
+ } \
+} while (0)
+#define MYCHECK_POSIX(res) do { \
+ if ((res) != 0) { \
+ RECORD_ERROR((res)); \
+ return NULL; \
+ } \
+} while (0)
+
+#define CHECK_OTHER_FAILURE() do { \
+ int my_other_failure = other_failure; \
+ int my_other_failure_line = other_failure_line; \
+ my_other_failure_line = 0; \
+ T_QUIET; \
+ T_ASSERT_EQ(my_other_failure_line, 0, \
+ "Other failure %d at line %d", \
+ my_other_failure, my_other_failure_line); \
+} while (0);
+
+static void *
+thr2_func(void *arg)
+{
+ int res;
+
+ /*
+ * Wait for thr1 to be blocking on attempting to acquire lock C. See the comment at the top of
+ * `thr1_func` for the reason why sleep is used.
+ */
+ (void) sleep(1u);
+
+ /*
+ * Acquire another shared lock (lock D) on the file. At this point the file has acquired 2
+ * locks; lock A and D which are both shared locks. It also has 2 exclusive locks currently
+ * blocking on lock A attempting to be acquired; lock B and C.
+ */
+ res = flock(fd2, LOCK_SH);
+ MYCHECK_ERRNO(res);
+
+ /*
+ * Unlock lock A, this will cause the first lock blocking on lock A to be unblocked (lock B)
+ * and all other locks blocking on it to be moved to blocking on the first blocked lock
+ * (lock C will now be blocking on lock B). Lock B's thread will be woken up resulting in it
+ * trying to re-acquire the lock on the file, as lock D is on the same file descriptor and
+ * already acquired on the file it will be promoted to an exclusive lock and B will be freed
+ * instead. At this point all locks blocking on lock B (lock C in this case) will now have a
+ * reference to a freed allocation.
+ */
+ res = flock(fd0, LOCK_UN);
+ MYCHECK_ERRNO(res);
+
+ return arg;
+}
+
+static void *
+thr1_func(void *arg)
+{
+ int res;
+ /*
+ * Wait for thr0 to be blocking on attempting to acquire lock B. Sleeping isn't great because
+ * it isn't an indication that the thread is blocked but I'm unsure how to detect a blocked
+ * thread programatically and a 1 second sleep has never failed so far of tests so for now that
+ * is what is done.
+ */
+ (void) sleep(1u);
+
+ // Another thread is required, spawn it now before blocking
+ res = pthread_create(&thr2, 0, thr2_func, 0);
+ MYCHECK_POSIX(res);
+
+ // Block attempting to acquire an exclusive lock - lock C
+ res = flock(fd1, LOCK_EX);
+ MYCHECK_ERRNO(res);
+
+ return arg;
+}
+
+static void *
+thr0_func(void *arg)
+{
+ int res;
+
+ // Acquire a shared lock - lock A
+ res = flock(fd0, LOCK_SH);
+ MYCHECK_ERRNO(res);
+
+ // Another thread is required, spawn it now before blocking
+ res = pthread_create(&thr1, 0, thr1_func, 0);
+ MYCHECK_POSIX(res);
+
+ // Block attempting to acquire an exclusive lock - lock B
+ res = flock(fd2, LOCK_EX);
+ MYCHECK_ERRNO(res);
+
+ return arg;
+}
+
+static void
+sigpipe_handler(int sig __unused, siginfo_t *sa __unused, void *ign __unused)
+{
+ return;
+}
+
+T_DECL(lockf_uaf_poc_70587638,
+ "Do a sequence which caused lf_setlock() to free something still in-use.",
+ T_META_ASROOT(true), T_META_CHECK_LEAKS(false))
+{
+ int res;
+ struct sigaction sa;
+
+ T_SETUPBEGIN;
+
+ (void) sigfillset(&sa.sa_mask);
+ sa.sa_sigaction = sigpipe_handler;
+ sa.sa_flags = SA_SIGINFO;
+ T_ASSERT_POSIX_SUCCESS(sigaction(SIGPIPE, &sa, NULL), "sigaction(SIGPIPE)");
+
+ // Setup all the file descriptors needed (fd0's open makes sure the file exists)
+ T_ASSERT_POSIX_SUCCESS(
+ fd0 = open(TMP_FILE_NAME, O_RDONLY | O_CREAT, 0666),
+ "open(\""TMP_FILE_NAME"\", O_RDONLY|O_CREAT, 0666)");
+ T_ASSERT_POSIX_SUCCESS(
+ fd1 = open(TMP_FILE_NAME, O_RDONLY, 0666),
+ "open(\""TMP_FILE_NAME"\", O_RDONLY, 0666)");
+ T_ASSERT_POSIX_SUCCESS(
+ fd2 = open(TMP_FILE_NAME, 0, 0666),
+ "open(\""TMP_FILE_NAME"\", O_RDONLY, 0666)");
+ T_SETUPEND;
+
+ /*
+ * Threads are used due to some locks blocking the thread when trying to acquire if a lock that
+ * blocks the requested lock already exists on the file. By using multiple threads there can be
+ * multiple locks blocking on attempting to acquire on a file.
+ */
+ res = pthread_create(&thr0, 0, thr0_func, 0);
+ T_ASSERT_POSIX_ZERO(res, "pthread_create thread 0");
+
+ /*
+ * Wait for lock B to be acquired which under the hood actually results in lock D being
+ * promoted to an exclusive lock and lock B being freed. At this point the bug has been
+ * triggered leaving lock C with a dangling pointer to lock B.
+ */
+ res = pthread_join(thr0, NULL);
+ T_ASSERT_POSIX_ZERO(res, "pthread_join thread 0");
+
+ CHECK_OTHER_FAILURE();
+
+ // Trigger a signal to wake lock C from sleep causing it to do a UAF access on lock B
+ res = pthread_kill(thr1, SIGPIPE);
+ T_ASSERT_POSIX_ZERO(res, "pthread_kill thread 1");
+
+ CHECK_OTHER_FAILURE();
+
+ /*
+ * The kernel should panic at this point. This is just to prevent the
+ * application exiting before lock C's thread has woken from the signal.
+ * The application exiting isn't a problem but it will cause all the
+ * fd to be closed which will cause locks to be unlocked. This
+ * shouldn't prevent the PoC from working but its just cleaner to
+ * wait here for the kernel to panic rather than exiting the process.
+ */
+ res = pthread_join(thr1, NULL);
+ T_ASSERT_POSIX_ZERO(res, "pthread_join thread 1");
+
+ CHECK_OTHER_FAILURE();
+
+ T_PASS("lockf_uaf_poc_70587638");
+}
#include <stdio.h>
#include <signal.h>
+#include <sys/proc.h>
#include <sys/sysctl.h>
#include <sys/kern_memorystatus.h>
#include <sys/kern_memorystatus_freeze.h>
pid_t pid;
char **launch_tool_args;
char testpath[PATH_MAX];
+ char *variant_cpy = strdup(variant);
uint32_t testpath_buf_size;
int ret;
launch_tool_args = (char *[]){
testpath,
"-n",
- variant,
+ variant_cpy,
NULL
};
ret = dt_launch_tool(&pid, launch_tool_args, false, NULL, NULL);
/* Set the process's managed bit, so that the kernel treats this process like an app instead of a sysproc. */
ret = memorystatus_control(MEMORYSTATUS_CMD_SET_PROCESS_IS_MANAGED, pid, 1, NULL, 0);
T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "memorystatus_control");
+ free(variant_cpy);
return pid;
}
/* these values will remain fixed during testing */
int active_limit_mb = 15; /* arbitrary */
int inactive_limit_mb = 7; /* arbitrary */
- int demote_value = 1;
+ __block int demote_value = 1;
/* Launch the child process, and elevate its priority */
int requestedpriority;
dispatch_source_t ds_signal, ds_exit;
}
static void
-drop_jetsam_snapshot_ownership(void)
+unset_testing_pid(void)
{
int ret;
- ret = memorystatus_control(MEMORYSTATUS_CMD_SET_JETSAM_SNAPSHOT_OWNERSHIP, 0, MEMORYSTATUS_FLAGS_SNAPSHOT_DROP_OWNERSHIP, NULL, 0);
+ ret = memorystatus_control(MEMORYSTATUS_CMD_SET_TESTING_PID, 0, MEMORYSTATUS_FLAGS_UNSET_TESTING_PID, NULL, 0);
T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, 0, "Drop ownership of jetsam snapshot");
}
static void
-take_jetsam_snapshot_ownership(void)
+set_testing_pid(void)
{
int ret;
- ret = memorystatus_control(MEMORYSTATUS_CMD_SET_JETSAM_SNAPSHOT_OWNERSHIP, 0, MEMORYSTATUS_FLAGS_SNAPSHOT_TAKE_OWNERSHIP, NULL, 0);
+ ret = memorystatus_control(MEMORYSTATUS_CMD_SET_TESTING_PID, 0, MEMORYSTATUS_FLAGS_SET_TESTING_PID, NULL, 0);
T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "Take ownership of jetsam snapshot");
- T_ATEND(drop_jetsam_snapshot_ownership);
+ T_ATEND(unset_testing_pid);
}
/*
return NULL;
}
+static dispatch_source_t
+run_block_after_signal(int sig, dispatch_block_t block)
+{
+ dispatch_source_t ds_signal;
+ signal(sig, SIG_IGN);
+ ds_signal = dispatch_source_create(DISPATCH_SOURCE_TYPE_SIGNAL, (uintptr_t) sig, 0, dispatch_get_main_queue());
+ T_QUIET; T_ASSERT_NOTNULL(ds_signal, "dispatch_source_create");
+ dispatch_source_set_event_handler(ds_signal, block);
+ return ds_signal;
+}
+
/*
* Launches the child & runs the given block after the child signals.
* If exit_with_child is true, the test will exit when the child exits.
{
dispatch_source_t ds_signal, ds_exit;
- /* Run the test block after the child launches & signals it's ready. */
- signal(SIGUSR1, SIG_IGN);
- ds_signal = dispatch_source_create(DISPATCH_SOURCE_TYPE_SIGNAL, SIGUSR1, 0, dispatch_get_main_queue());
- T_QUIET; T_ASSERT_NOTNULL(ds_signal, "dispatch_source_create");
- dispatch_source_set_event_handler(ds_signal, test_block);
+ ds_signal = run_block_after_signal(SIGUSR1, test_block);
/* Launch the child process. */
child_pid = launch_background_helper(variant);
/* Listen for exit. */
dispatch_activate(ds_exit);
}
dispatch_activate(ds_signal);
- dispatch_main();
}
T_DECL(get_frozen_procs, "List processes in the freezer") {
T_QUIET; T_ASSERT_POSIX_SUCCESS(kill(child_pid, SIGKILL), "Killed child process");
T_END;
});
+ dispatch_main();
}
T_DECL(frozen_to_swap_accounting, "jetsam snapshot has frozen_to_swap accounting") {
T_QUIET; T_ASSERT_POSIX_SUCCESS(kill(child_pid, SIGKILL), "Killed child process");
T_END;
});
+ dispatch_main();
}
T_DECL(freezer_snapshot, "App kills are recorded in the freezer snapshot") {
/* Take ownership of the snapshot to ensure we don't race with another process trying to consume them. */
- take_jetsam_snapshot_ownership();
+ set_testing_pid();
test_after_background_helper_launches(false, "frozen_background", ^{
int ret;
free(snapshot);
T_END;
});
+ dispatch_main();
}
T_DECL(freezer_snapshot_consume, "Freezer snapshot is consumed on read") {
/* Take ownership of the snapshot to ensure we don't race with another process trying to consume them. */
- take_jetsam_snapshot_ownership();
+ set_testing_pid();
test_after_background_helper_launches(false, "frozen_background", ^{
int ret;
free(snapshot);
T_END;
});
+ dispatch_main();
}
T_DECL(freezer_snapshot_frozen_state, "Frozen state is recorded in freezer snapshot") {
skip_if_freezer_is_disabled();
/* Take ownership of the snapshot to ensure we don't race with another process trying to consume them. */
- take_jetsam_snapshot_ownership();
+ set_testing_pid();
test_after_background_helper_launches(false, "frozen_background", ^{
int ret;
free(snapshot);
T_END;
});
+ dispatch_main();
}
T_DECL(freezer_snapshot_thaw_state, "Thaw count is recorded in freezer snapshot") {
skip_if_freezer_is_disabled();
/* Take ownership of the snapshot to ensure we don't race with another process trying to consume them. */
- take_jetsam_snapshot_ownership();
+ set_testing_pid();
test_after_background_helper_launches(false, "frozen_background", ^{
int ret;
/* Set the process to freezable */
kern_ret = memorystatus_control(MEMORYSTATUS_CMD_SET_PROCESS_IS_FREEZABLE, getpid(), 1, NULL, 0);
T_QUIET; T_ASSERT_POSIX_SUCCESS(kern_ret, "set process is freezable");
- /* Signal to our parent that we can be frozen */
- if (kill(getppid(), SIGUSR1) != 0) {
- T_LOG("Unable to signal to parent process!");
- exit(SIGNAL_TO_PARENT_FAILED);
- }
/* We should not be frozen yet. */
is_frozen = memorystatus_control(MEMORYSTATUS_CMD_GET_PROCESS_IS_FROZEN, getpid(), 0, NULL, 0);
exit(FROZEN_BIT_SET);
}
-
- sig_t sig_ret = signal(SIGUSR1, SIG_IGN);
- T_QUIET; T_WITH_ERRNO; T_ASSERT_NE(sig_ret, SIG_ERR, "signal(SIGUSR1, SIG_IGN)");
ds_signal = dispatch_source_create(DISPATCH_SOURCE_TYPE_SIGNAL, SIGUSR1, 0, dispatch_get_main_queue());
if (ds_signal == NULL) {
exit(DISPATCH_SOURCE_CREATE_FAILED);
});
dispatch_activate(ds_signal);
+ sig_t sig_ret = signal(SIGUSR1, SIG_IGN);
+ T_QUIET; T_WITH_ERRNO; T_ASSERT_NE(sig_ret, SIG_ERR, "signal(SIGUSR1, SIG_IGN)");
+
+ /* Signal to our parent that we can be frozen */
+ if (kill(getppid(), SIGUSR1) != 0) {
+ T_LOG("Unable to signal to parent process!");
+ exit(SIGNAL_TO_PARENT_FAILED);
+ }
+
dispatch_main();
}
kill(child_pid, SIGUSR1);
/* The child will checks its own frozen state & exit. */
});
+ dispatch_main();
+}
+
+static unsigned int freeze_pages_min_old;
+static int throttle_enabled_old;
+static void cleanup_memorystatus_freeze_top_process() {
+ sysctlbyname("kern.memorystatus_freeze_pages_min", NULL, NULL, &freeze_pages_min_old, sizeof(freeze_pages_min_old));
+ sysctlbyname("kern.memorystatus_freeze_throttle_enabled", NULL, NULL, &throttle_enabled_old, sizeof(throttle_enabled_old));
+}
+
+#define P_MEMSTAT_FROZEN 0x00000002
+T_DECL(memorystatus_freeze_top_process, "memorystatus_freeze_top_process chooses the correct process",
+ T_META_ASROOT(true),
+ T_META_REQUIRES_SYSCTL_EQ("kern.development", 1),
+ T_META_REQUIRES_SYSCTL_EQ("vm.freeze_enabled", 1)) {
+ int32_t memorystatus_freeze_band = 0;
+ size_t memorystatus_freeze_band_size = sizeof(memorystatus_freeze_band);
+ size_t freeze_pages_min_size = sizeof(freeze_pages_min_old);
+ unsigned int freeze_pages_min_new = 0;
+ size_t throttle_enabled_old_size = sizeof(throttle_enabled_old);
+ int throttle_enabled_new = 1;
+ __block errno_t ret;
+ __block int maxproc;
+ size_t maxproc_size = sizeof(maxproc);
+
+ ret = sysctlbyname("kern.maxproc", &maxproc, &maxproc_size, NULL, 0);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kern.maxproc");
+ sysctlbyname("kern.memorystatus_freeze_jetsam_band", &memorystatus_freeze_band, &memorystatus_freeze_band_size, NULL, 0);
+
+ /* Set min pages to 0 and disable the budget to ensure we can always freeze the child. */
+ ret = sysctlbyname("kern.memorystatus_freeze_pages_min", &freeze_pages_min_old, &freeze_pages_min_size, &freeze_pages_min_new, sizeof(freeze_pages_min_new));
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "set kern.memorystatus_freeze_pages_min");
+ ret = sysctlbyname("kern.memorystatus_freeze_throttle_enabled", &throttle_enabled_old, &throttle_enabled_old_size, &throttle_enabled_new, sizeof(throttle_enabled_new));
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "set kern.memorystatus_freeze_throttle_enabled");
+ T_ATEND(cleanup_memorystatus_freeze_top_process);
+ /* Take ownership of the freezer probabilities for the duration of the test so that we don't race with dasd. */
+ set_testing_pid();
+ test_after_background_helper_launches(true, "frozen_background", ^{
+ int32_t child_band = JETSAM_PRIORITY_DEFAULT;
+ /* Place the child in the idle band so that it gets elevated like a typical app. */
+ move_to_idle_band(child_pid);
+ ret = pid_suspend(child_pid);
+ T_ASSERT_POSIX_SUCCESS(ret, "child suspended");
+
+ size_t buffer_len = sizeof(memorystatus_properties_entry_v1_t) * (size_t) maxproc;
+ memorystatus_properties_entry_v1_t *properties_list = malloc(buffer_len);
+ T_QUIET; T_ASSERT_NOTNULL(properties_list, "malloc properties array");
+ size_t properties_list_len = 0;
+ /* The child needs to age down into the idle band before it's eligible to be frozen. */
+ T_LOG("Waiting for child to age into the idle band.");
+ while (child_band != JETSAM_PRIORITY_IDLE) {
+ memset(properties_list, 0, buffer_len);
+ properties_list_len = 0;
+ memorystatus_jetsam_snapshot_t *snapshot = get_jetsam_snapshot(MEMORYSTATUS_FLAGS_SNAPSHOT_ON_DEMAND, false);
+
+ bool found = false;
+ for (size_t i = 0; i < snapshot->entry_count; i++) {
+ memorystatus_jetsam_snapshot_entry_t *snapshot_entry = &snapshot->entries[i];
+ if (snapshot_entry->priority <= memorystatus_freeze_band && !snapshot_entry->killed) {
+ pid_t pid = snapshot_entry->pid;
+ memorystatus_properties_entry_v1_t *property_entry = &properties_list[properties_list_len++];
+ property_entry->version = 1;
+ property_entry->pid = pid;
+ if (pid == child_pid) {
+ found = true;
+ property_entry->use_probability = 1;
+ child_band = snapshot_entry->priority;
+ } else {
+ property_entry->use_probability = 0;
+ }
+ strncpy(property_entry->proc_name, snapshot_entry->name, MAXCOMLEN);
+ property_entry->proc_name[MAXCOMLEN] = '\0';
+ }
+ }
+ T_QUIET; T_ASSERT_TRUE(found, "Child is in on demand snapshot");
+ free(snapshot);
+ }
+ ret = memorystatus_control(MEMORYSTATUS_CMD_GRP_SET_PROPERTIES, 0, MEMORYSTATUS_FLAGS_GRP_SET_PROBABILITY, properties_list, sizeof(memorystatus_properties_entry_v1_t) * properties_list_len);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "MEMORYSTATUS_FLAGS_GRP_SET_PROBABILITY");
+ free(properties_list);
+ int val = 1;
+ ret = sysctlbyname("vm.memorystatus_freeze_top_process", NULL, NULL, &val, sizeof(val));
+ T_ASSERT_POSIX_SUCCESS(ret, "freeze_top_process");
+ /* Verify that the process was frozen. */
+ memorystatus_jetsam_snapshot_t *snapshot = get_jetsam_snapshot(MEMORYSTATUS_FLAGS_SNAPSHOT_ON_DEMAND, false);
+ memorystatus_jetsam_snapshot_entry_t *entry = get_jetsam_snapshot_entry(snapshot, child_pid);
+ T_ASSERT_NOTNULL(entry, "child is in snapshot");
+ if (!(entry->state & P_MEMSTAT_FROZEN)) {
+ T_LOG("Not frozen. Skip reason: %d", entry->jse_freeze_skip_reason);
+ }
+ T_ASSERT_TRUE(entry->state & P_MEMSTAT_FROZEN, "child is frozen");
+ free(snapshot);
+ ret = pid_resume(child_pid);
+ T_ASSERT_POSIX_SUCCESS(ret, "child resumed after freeze");
+
+ /* Kill the child */
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(kill(child_pid, SIGKILL), "Killed child process");
+ T_END;
+ });
+ dispatch_main();
+}
+
+static int
+memorystatus_freezer_thaw_percentage(void)
+{
+ int val;
+ size_t size = sizeof(val);
+ int ret = sysctlbyname("kern.memorystatus_freezer_thaw_percentage", &val, &size, NULL, 0);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "failed to query kern.memorystatus_freezer_thaw_percentage");
+ return val;
+}
+
+static void
+reset_interval(void)
+{
+ uint32_t freeze_daily_budget_mb = 0;
+ size_t size = sizeof(freeze_daily_budget_mb);
+ int ret;
+ uint64_t new_budget;
+ ret = sysctlbyname("kern.memorystatus_freeze_daily_mb_max", &freeze_daily_budget_mb, &size, NULL, 0);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "failed to query kern.memorystatus_freeze_daily_mb_max");
+ new_budget = (freeze_daily_budget_mb * (1UL << 20) / vm_page_size);
+ ret = sysctlbyname("kern.memorystatus_freeze_budget_pages_remaining", NULL, NULL, &new_budget, sizeof(new_budget));
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "failed to set kern.memorystatus_freeze_budget_pages_remaining");
+}
+
+static pid_t second_child;
+static void
+cleanup_memorystatus_freezer_thaw_percentage(void)
+{
+ kill(second_child, SIGKILL);
+}
+
+T_DECL(memorystatus_freezer_thaw_percentage, "memorystatus_freezer_thaw_percentage updates correctly",
+ T_META_ASROOT(true),
+ T_META_REQUIRES_SYSCTL_EQ("kern.development", 1),
+ T_META_REQUIRES_SYSCTL_EQ("vm.freeze_enabled", 1)) {
+ __block dispatch_source_t first_signal_block;
+ /* Take ownership of the freezer probabilities for the duration of the test so that nothing new gets frozen by dasd. */
+ set_testing_pid();
+ reset_interval();
+
+ /* Spawn one child that will remain frozen throughout the whole test & another that will be thawed. */
+ first_signal_block = run_block_after_signal(SIGUSR1, ^{
+ move_to_idle_band(second_child);
+ __block int ret = pid_suspend(second_child);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "child suspended");
+ freeze_process(second_child);
+ T_QUIET; T_ASSERT_EQ(memorystatus_freezer_thaw_percentage(), 0, "thaw percentage is still 0 after freeze");
+ dispatch_source_cancel(first_signal_block);
+ test_after_background_helper_launches(true, "frozen_background", ^{
+ reset_interval();
+ T_QUIET; T_ASSERT_EQ(memorystatus_freezer_thaw_percentage(), 0, "new interval starts with a thaw percentage of 0");
+ move_to_idle_band(child_pid);
+ ret = pid_suspend(child_pid);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "child suspended");
+ freeze_process(child_pid);
+ ret = pid_resume(child_pid);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "child resumed after freeze");
+ int percentage_after_thaw = memorystatus_freezer_thaw_percentage();
+ T_QUIET; T_ASSERT_GT(percentage_after_thaw, 0, "thaw percentage is higher after thaw");
+
+ ret = pid_suspend(child_pid);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "child suspended");
+ freeze_process(child_pid);
+ ret = pid_resume(child_pid);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "child resumed after freeze");
+ T_QUIET; T_ASSERT_EQ(memorystatus_freezer_thaw_percentage(), percentage_after_thaw, "thaw percentage is unchanged after second thaw");
+
+ ret = pid_suspend(child_pid);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "child suspended");
+ freeze_process(child_pid);
+ reset_interval();
+ T_QUIET; T_ASSERT_EQ(memorystatus_freezer_thaw_percentage(), 0, "new interval starts with a 0 thaw percentage");
+ ret = pid_resume(child_pid);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "child resumed after freeze");
+ T_QUIET; T_ASSERT_GT(memorystatus_freezer_thaw_percentage(), 0, "thaw percentage goes back up in new interval");
+
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(kill(child_pid, SIGKILL), "failed to kill child");
+ T_END;
+ });
+ });
+
+ second_child = launch_background_helper("frozen_background");
+ T_ATEND(cleanup_memorystatus_freezer_thaw_percentage);
+ dispatch_activate(first_signal_block);
+ dispatch_main();
}
pid_t mypid = getpid();
/* these values will remain fixed during testing */
- int active_limit_mb = 15; /* arbitrary */
- int inactive_limit_mb = 10; /* arbitrary */
+ int active_limit_mb = 35; /* arbitrary */
+ int inactive_limit_mb = 25; /* arbitrary */
/* these values may vary during test */
int requestedpriority = 0;
pid_t mypid = getpid();
/* these values will remain fixed during testing */
- int active_limit_mb = 15; /* arbitrary */
- int inactive_limit_mb = 10; /* arbitrary */
+ int active_limit_mb = 35; /* arbitrary */
+ int inactive_limit_mb = 25; /* arbitrary */
/* these values may vary during test */
int requestedpriority = JETSAM_PRIORITY_UI_SUPPORT;
pid_t mypid = getpid();
/* these values will remain fixed during testing */
- int active_limit_mb = 15; /* arbitrary */
- int inactive_limit_mb = 10; /* arbitrary */
+ int active_limit_mb = 35; /* arbitrary */
+ int inactive_limit_mb = 25; /* arbitrary */
int requestedpriority = JETSAM_PRIORITY_AUDIO_AND_ACCESSORY;
T_SETUPBEGIN;
#include <darwintest.h>
#include <darwintest_utils.h>
+#include "test_utils.h"
+
T_GLOBAL_META(
T_META_NAMESPACE("xnu.vm"),
T_META_CHECK_LEAKS(false)
"malloc() failed",
};
-/*
- * Corpse collection only happens in development kernels.
- * So we need this to detect if the test is relevant.
- */
-static boolean_t
-is_development_kernel(void)
-{
- int ret;
- int dev = 0;
- size_t dev_size = sizeof(dev);
-
- ret = sysctlbyname("kern.development", &dev, &dev_size, NULL, 0);
- if (ret != 0) {
- return FALSE;
- }
-
- return dev != 0;
-}
-
/*
* Set/Get the sysctl used to determine if corpse collection occurs.
* This is done by the kernel checking for a specific PID.
#include <TargetConditionals.h>
#include <perfcheck_keys.h>
+#include "benchmark/helpers.h"
+
T_GLOBAL_META(
T_META_NAMESPACE("xnu.vm.perf"),
T_META_CHECK_LEAKS(false),
static void *thread_setup(void *arg);
static void run_test(int fault_type, int mapping_variant, size_t memsize);
static void setup_and_run_test(int test, int threads);
-static int get_ncpu(void);
/* Allocates memory using the default mmap behavior. Each VM region created is capped at 128 MB. */
static void
T_END;
}
-static int
-get_ncpu(void)
-{
- int ncpu;
- size_t length = sizeof(ncpu);
-
- T_QUIET; T_ASSERT_POSIX_SUCCESS(sysctlbyname("hw.ncpu", &ncpu, &length, NULL, 0),
- "failed to query hw.ncpu");
- return ncpu;
-}
-
T_DECL(read_soft_fault,
"Read soft faults (single thread)")
{
TEST_HSP(HOST_SYSPOLICYD_PORT);
TEST_HSP(HOST_FILECOORDINATIOND_PORT);
TEST_HSP(HOST_FAIRPLAYD_PORT);
+ TEST_HSP(HOST_IOCOMPRESSIONSTATS_PORT);
#undef TEST_HSP
- T_EXPECT_EQ(HOST_FAIRPLAYD_PORT, HOST_MAX_SPECIAL_PORT,
+ T_EXPECT_EQ(HOST_IOCOMPRESSIONSTATS_PORT, HOST_MAX_SPECIAL_PORT,
"checked all of the ports");
const char *invalid_hsp =
portdef, #portdef)
TEST_TSP(TASK_KERNEL_PORT);
+ TEST_TSP(TASK_READ_PORT);
+ TEST_TSP(TASK_INSPECT_PORT);
TEST_TSP(TASK_HOST_PORT);
TEST_TSP(TASK_NAME_PORT);
TEST_TSP(TASK_BOOTSTRAP_PORT);
"invalid task special port description should be NULL");
}
+T_DECL(thread_special_port_descriptions,
+ "verify that thread special ports can be described")
+{
+#define TEST_TSP(portdef) \
+ expect_special_port_description(mach_thread_special_port_description, \
+ portdef, #portdef)
+
+ TEST_TSP(THREAD_KERNEL_PORT);
+ TEST_TSP(THREAD_READ_PORT);
+ TEST_TSP(THREAD_INSPECT_PORT);
+
+#undef TEST_TSP
+
+ T_EXPECT_EQ(THREAD_READ_PORT, THREAD_MAX_SPECIAL_PORT,
+ "checked all of the ports");
+
+ const char *invalid_tsp =
+ mach_thread_special_port_description(THREAD_MAX_SPECIAL_PORT + 1);
+ T_EXPECT_NULL(invalid_tsp,
+ "invalid thread special port description should be NULL");
+}
+
static void
expect_special_port_id(int (*fn)(const char *id), int port, const char *portid)
{
portdef, #portdef)
TEST_TSP(TASK_KERNEL_PORT);
+ TEST_TSP(TASK_READ_PORT);
+ TEST_TSP(TASK_INSPECT_PORT);
TEST_TSP(TASK_HOST_PORT);
TEST_TSP(TASK_NAME_PORT);
TEST_TSP(TASK_BOOTSTRAP_PORT);
T_EXPECT_EQ(invalid_tsp, -1,
"invalid task special port IDs should return -1");
}
+
+T_DECL(thread_special_port_mapping,
+ "verify that thread special port names can be mapped to numbers")
+{
+#define TEST_TSP(portdef) \
+ expect_special_port_id(mach_thread_special_port_for_id, \
+ portdef, #portdef)
+
+ TEST_TSP(THREAD_KERNEL_PORT);
+ TEST_TSP(THREAD_READ_PORT);
+ TEST_TSP(THREAD_INSPECT_PORT);
+
+#undef TEST_TSP
+
+ int invalid_tsp = mach_thread_special_port_for_id("BOGUS_SPECIAL_PORT_NAME");
+ T_EXPECT_EQ(invalid_tsp, -1,
+ "invalid thread special port IDs should return -1");
+}
#include <string.h>
#include <errno.h>
+#include "test_utils.h"
+
/*
* Any change to this structure must be reflected in iBoot / MacEFI / PanicDump / XNU Tests and vice versa.
*/
return res;
}
-static boolean_t
-is_development_kernel(void)
-{
- int ret;
- int dev = 0;
- size_t dev_size = sizeof(dev);
-
- ret = sysctlbyname("kern.development", &dev, &dev_size, NULL, 0);
- if (ret != 0) {
- return FALSE;
- }
-
- return dev != 0;
-}
-
/*
* Valid cases:
* 1. Development & Debug iBoot/macEFI provides a preoslog buffer.
T_ASSERT_GT(cpu_checkin_min_interval, 0, "kern.cpu_checkin_interval should be > 0");
- uint64_t* commpage_addr = (uint64_t *)(uintptr_t)_COMM_PAGE_CPU_QUIESCENT_COUNTER;
+ COMM_PAGE_SLOT_TYPE(uint64_t) commpage_addr = COMM_PAGE_SLOT(uint64_t, CPU_QUIESCENT_COUNTER);
- T_LOG("address of _COMM_PAGE_CPU_QUIESCENT_COUNTER is %p", (void*) commpage_addr);
+ T_LOG("address of _COMM_PAGE_CPU_QUIESCENT_COUNTER is %p", commpage_addr);
uint64_t counter = *commpage_addr;
uint64_t last_counter = counter;
--- /dev/null
+#include <darwintest.h>
+
+#include <mach/host_priv.h>
+#include <mach/mach.h>
+#include <mach/mach_types.h>
+#include <mach/mach_vm.h>
+#include <mach_debug/ipc_info.h>
+#include <mach/processor_set.h>
+#include <mach/task.h>
+#include <signal.h>
+#include <sys/wait.h>
+#include <sys/proc.h>
+#include <sys/sysctl.h>
+#include <unistd.h>
+#include <TargetConditionals.h>
+
+#define IKOT_THREAD_CONTROL 1
+#define IKOT_THREAD_READ 47
+#define IKOT_THREAD_INSPECT 46
+
+#define IKOT_TASK_CONTROL 2
+#define IKOT_TASK_READ 45
+#define IKOT_TASK_INSPECT 44
+#define IKOT_TASK_NAME 20
+
+
+/*
+ * This test verifies various security properties for task and thread
+ * read/inspect interfaces. Specifically, it checks and makes sure:
+ *
+ * 1. Task/thread can't get higher priv'ed ports from lower ones through
+ * {task, thread}_get_special_port()
+ * 2. Correct level of thread ports are returned from task_threads() with
+ * a given task port flavor
+ * 3. Correct level of task ports are returned from processor_set_tasks()
+ * 4. MIG intrans conversion and enforcement for task/thread port does not break.
+ * 5. task_{, read, inspect, name}_for_pid() works for self and other process
+ * 6. The new mach_vm_remap_new interface behaves correctly
+ */
+
+T_GLOBAL_META(
+ T_META_NAMESPACE("xnu.ipc"),
+ T_META_RUN_CONCURRENTLY(TRUE));
+
+static void
+RESULT_CHECK(
+ kern_return_t kr,
+ unsigned int flavor, /* task_flavor_t or thread_flavor_t */
+ unsigned int required, /* task_flavor_t or thread_flavor_t */
+ char *f_name)
+{
+ if (flavor <= required) {
+ T_EXPECT_EQ(kr, KERN_SUCCESS, "%s should succeed with task/thread flavor %d, kr: 0x%x", f_name, flavor, kr);
+ } else {
+ T_EXPECT_NE(kr, KERN_SUCCESS, "%s should fail with task/thread flavor %d, kr: 0x%x", f_name, flavor, kr);
+ }
+}
+
+static void
+test_task_get_special_port(
+ task_t tport,
+ task_flavor_t flavor)
+{
+ kern_return_t kr;
+ mach_port_t special_port = MACH_PORT_NULL;
+ mach_port_t tfp_port = MACH_PORT_NULL;
+
+ T_LOG("Testing task_get_special_port() with task flavor %d", flavor);
+ /* gettable with at least control port */
+ kr = task_get_special_port(tport, TASK_KERNEL_PORT, &special_port);
+ RESULT_CHECK(kr, flavor, TASK_FLAVOR_CONTROL, "task_get_special_port(TASK_KERNEL_PORT)");
+ mach_port_deallocate(mach_task_self(), special_port);
+ special_port = MACH_PORT_NULL;
+
+ kr = task_get_special_port(tport, TASK_BOOTSTRAP_PORT, &special_port);
+ RESULT_CHECK(kr, flavor, TASK_FLAVOR_CONTROL, "task_get_special_port(TASK_BOOTSTRAP_PORT)");
+ mach_port_deallocate(mach_task_self(), special_port);
+ special_port = MACH_PORT_NULL;
+
+ kr = task_get_special_port(tport, TASK_HOST_PORT, &special_port);
+ RESULT_CHECK(kr, flavor, TASK_FLAVOR_CONTROL, "task_get_special_port(TASK_HOST_PORT)");
+ mach_port_deallocate(mach_task_self(), special_port);
+ special_port = MACH_PORT_NULL;
+
+ /* gettable with at least read port */
+ kr = task_get_special_port(tport, TASK_READ_PORT, &special_port);
+ RESULT_CHECK(kr, flavor, TASK_FLAVOR_READ, "task_get_special_port(TASK_READ_PORT)");
+ if (KERN_SUCCESS == kr) {
+ kr = task_read_for_pid(mach_task_self(), getpid(), &tfp_port);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "task_read_for_pid()");
+ T_QUIET; T_EXPECT_EQ(tfp_port, special_port, "task_read_for_pid() should match TASK_READ_PORT");
+ mach_port_deallocate(mach_task_self(), tfp_port);
+ }
+ mach_port_deallocate(mach_task_self(), special_port);
+ special_port = MACH_PORT_NULL;
+
+ /* gettable with at least inspect port */
+ kr = task_get_special_port(tport, TASK_INSPECT_PORT, &special_port);
+ RESULT_CHECK(kr, flavor, TASK_FLAVOR_INSPECT, "task_get_special_port(TASK_INSPECT_PORT)");
+ if (KERN_SUCCESS == kr) {
+ kr = task_inspect_for_pid(mach_task_self(), getpid(), &tfp_port);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "task_inspect_for_pid()");
+ T_QUIET; T_EXPECT_EQ(tfp_port, special_port, "task_inspect_for_pid() should match TASK_INSPECT_PORT");
+ mach_port_deallocate(mach_task_self(), tfp_port);
+ }
+ mach_port_deallocate(mach_task_self(), special_port);
+ special_port = MACH_PORT_NULL;
+
+ /* gettable with at least name port */
+ kr = task_get_special_port(tport, TASK_NAME_PORT, &special_port);
+ RESULT_CHECK(kr, flavor, TASK_FLAVOR_INSPECT, "task_get_special_port(TASK_NAME_PORT)");
+ if (KERN_SUCCESS == kr) {
+ kr = task_name_for_pid(mach_task_self(), getpid(), &tfp_port);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "task_name_for_pid()");
+ T_QUIET; T_EXPECT_EQ(tfp_port, special_port, "task_name_for_pid() should match TASK_NAME_PORT");
+ mach_port_deallocate(mach_task_self(), tfp_port);
+ }
+ mach_port_deallocate(mach_task_self(), special_port);
+ special_port = MACH_PORT_NULL;
+}
+
+static void
+test_thread_get_special_port(
+ thread_t tport,
+ thread_flavor_t flavor)
+{
+ kern_return_t kr;
+ mach_port_t special_port = MACH_PORT_NULL;
+
+ T_LOG("Testing thread_get_special_port() with thread flavor %d", flavor);
+ /* gettable with at least control port */
+ kr = thread_get_special_port(tport, THREAD_KERNEL_PORT, &special_port);
+ RESULT_CHECK(kr, flavor, THREAD_FLAVOR_CONTROL, "thread_get_special_port(THREAD_KERNEL_PORT)");
+ mach_port_deallocate(mach_task_self(), special_port);
+ special_port = MACH_PORT_NULL;
+
+ /* gettable with at least read port */
+ kr = thread_get_special_port(tport, THREAD_READ_PORT, &special_port);
+ RESULT_CHECK(kr, flavor, THREAD_FLAVOR_READ, "thread_get_special_port(THREAD_READ_PORT)");
+ mach_port_deallocate(mach_task_self(), special_port);
+ special_port = MACH_PORT_NULL;
+
+ /* gettable with at least inspect port */
+ kr = thread_get_special_port(tport, THREAD_INSPECT_PORT, &special_port);
+ RESULT_CHECK(kr, flavor, THREAD_FLAVOR_INSPECT, "thread_get_special_port(THREAD_INSPECT_PORT)");
+ mach_port_deallocate(mach_task_self(), special_port);
+ special_port = MACH_PORT_NULL;
+}
+
+static void
+test_task_threads(
+ task_t tport,
+ task_flavor_t flavor)
+{
+ kern_return_t kr;
+ thread_array_t threadList;
+ mach_msg_type_number_t threadCount = 0;
+
+ unsigned int kotype;
+ unsigned int kaddr;
+
+ T_LOG("Testing task_threads() with task flavor %d", flavor);
+
+ kr = task_threads(tport, &threadList, &threadCount);
+ RESULT_CHECK(kr, flavor, TASK_FLAVOR_INSPECT, "task_threads");
+
+ if (kr) {
+ T_LOG("task_threads failed, skipping test_task_threads()");
+ return;
+ }
+
+ T_QUIET; T_ASSERT_GE(threadCount, 1, "threadCount should be at least 1");
+
+ /*
+ * TASK_FLAVOR_CONTROL -> THREAD_FLAVOR_CONTROL
+ * TASK_FLAVOR_READ -> THREAD_FLAVOR_READ
+ * TASK_FLAVOR_INSPECT -> THREAD_FLAVOR_INSPECT
+ * TASK_FLAOVR_NAME -> KERN_FAILURE
+ */
+ for (size_t i = 0; i < threadCount; i++) {
+ kr = mach_port_kernel_object(mach_task_self(), threadList[i], &kotype, &kaddr);
+ if (kr == KERN_INVALID_RIGHT) {
+ /* thread port is inactive */
+ T_LOG("thread port name 0x%x is inactive", threadList[i]);
+ continue;
+ } else if (kr) {
+ T_FAIL("mach_port_kernel_object() failed with kr: 0x%x", kr);
+ }
+ switch (flavor) {
+ case TASK_FLAVOR_CONTROL:
+ T_QUIET; T_EXPECT_EQ(kotype, IKOT_THREAD_CONTROL, "Task control port should yield thread control port");
+ break;
+ case TASK_FLAVOR_READ:
+ T_QUIET; T_EXPECT_EQ(kotype, IKOT_THREAD_READ, "Task read port should yield thread read port");
+ break;
+ case TASK_FLAVOR_INSPECT:
+ T_QUIET; T_EXPECT_EQ(kotype, IKOT_THREAD_INSPECT, "Task inspect port should yield thread inspect port");
+ break;
+ default:
+ T_FAIL("task_threads() returned thread ports with task name port??");
+ break;
+ }
+ }
+
+ for (size_t i = 0; i < threadCount; i++) {
+ mach_port_deallocate(mach_task_self(), threadList[i]);
+ }
+}
+
+static void
+test_processor_set_tasks(
+ task_flavor_t flavor)
+{
+ kern_return_t kr;
+ processor_set_name_array_t psets;
+ processor_set_t pset_priv;
+ task_array_t taskList;
+ mach_msg_type_number_t pcnt = 0, tcnt = 0;
+ mach_port_t host = mach_host_self();
+
+ unsigned int kotype;
+ unsigned int kaddr;
+
+ T_LOG("Testing processor_set_tasks() with task flavor %d", flavor);
+
+ kr = host_processor_sets(host, &psets, &pcnt);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "host_processor_sets");
+ T_QUIET; T_ASSERT_GE(pcnt, 1, "should have at least 1 processor set");
+
+ kr = host_processor_set_priv(host, psets[0], &pset_priv);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "host_processor_set_priv");
+ for (size_t i = 0; i < pcnt; i++) {
+ mach_port_deallocate(mach_task_self(), psets[i]);
+ }
+ mach_port_deallocate(mach_task_self(), host);
+
+ kr = processor_set_tasks_with_flavor(pset_priv, flavor, &taskList, &tcnt);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "processor_set_tasks_with_flavor");
+ T_QUIET; T_ASSERT_GE(tcnt, 1, "should have at least 1 task");
+ mach_port_deallocate(mach_task_self(), pset_priv);
+
+ for (size_t i = 0; i < tcnt; i++) {
+ kr = mach_port_kernel_object(mach_task_self(), taskList[i], &kotype, &kaddr);
+ if (kr == KERN_INVALID_RIGHT) {
+ /* task port is inactive */
+ T_LOG("task port name 0x%x is inactive", taskList[i]);
+ continue;
+ } else if (kr) {
+ T_FAIL("mach_port_kernel_object() failed with kr: 0x%x", kr);
+ }
+ switch (flavor) {
+ case TASK_FLAVOR_CONTROL:
+ T_QUIET; T_EXPECT_EQ(kotype, IKOT_TASK_CONTROL, "TASK_FLAVOR_CONTROL should yield control ports");
+ break;
+ case TASK_FLAVOR_READ:
+ T_QUIET; T_EXPECT_EQ(kotype, IKOT_TASK_READ, "TASK_FLAVOR_READ should yield read ports");
+ break;
+ case TASK_FLAVOR_INSPECT:
+ T_QUIET; T_EXPECT_EQ(kotype, IKOT_TASK_INSPECT, "TASK_FLAVOR_INSPECT should yield inspect ports");
+ break;
+ case TASK_FLAVOR_NAME:
+ T_QUIET; T_EXPECT_EQ(kotype, IKOT_TASK_NAME, "TASK_FLAVOR_NAME should yield name ports");
+ break;
+ default:
+ T_FAIL("strange flavor");
+ break;
+ }
+ }
+
+ for (size_t i = 0; i < tcnt; i++) {
+ mach_port_deallocate(mach_task_self(), taskList[i]);
+ }
+}
+
+static void
+test_task_port_mig_intrans(
+ task_t tport,
+ task_flavor_t flavor)
+{
+ kern_return_t kr;
+
+ T_LOG("Testing various MIG/manual intrans task interfaces with task flavor %d", flavor);
+
+ {
+ /* 1. Test some control port interfaces */
+ int data = 0x41;
+ int new_value = 0x42;
+ kr = mach_vm_write(tport,
+ (mach_vm_address_t)&data,
+ (vm_offset_t)&new_value,
+ (mach_msg_type_number_t)sizeof(int));
+ RESULT_CHECK(kr, flavor, TASK_FLAVOR_CONTROL, "mach_vm_write");
+
+ /* mach_vm_remap_new with max_protection VM_PROT_WRITE | VM_PROT_READ */
+ int *localAddress = 0;
+ mach_vm_address_t localMachVMAddress = 0;
+ vm_prot_t cur_protection = VM_PROT_WRITE | VM_PROT_READ;
+ vm_prot_t max_protection = VM_PROT_WRITE | VM_PROT_READ;
+ /* rdar://67706101 (mach_vm_remap flag that allows restricting protection of remapped region) */
+ kr = mach_vm_remap_new(mach_task_self(),
+ &localMachVMAddress,
+ sizeof(int),
+ 0,
+ VM_FLAGS_ANYWHERE,
+ tport, /* remote task, use self task port */
+ (mach_vm_address_t)&data,
+ false,
+ &cur_protection,
+ &max_protection,
+ VM_INHERIT_NONE);
+ localAddress = (int *)(uintptr_t)localMachVMAddress;
+
+ RESULT_CHECK(kr, flavor, TASK_FLAVOR_CONTROL, "mach_vm_remap_new - VM_PROT_WRITE");
+ if (KERN_SUCCESS == kr) {
+ T_QUIET; T_EXPECT_EQ(max_protection, VM_PROT_READ | VM_PROT_WRITE, NULL);
+ T_QUIET; T_EXPECT_EQ(cur_protection, VM_PROT_READ | VM_PROT_WRITE, NULL);
+ T_QUIET; T_EXPECT_EQ(*localAddress, data, NULL); /* read */
+ *localAddress = 0; /* write */
+ }
+
+ exception_mask_t masks[EXC_TYPES_COUNT] = {};
+ mach_msg_type_number_t nmasks = 0;
+ exception_port_t ports[EXC_TYPES_COUNT] = {};
+ exception_behavior_t behaviors[EXC_TYPES_COUNT] = {};
+ thread_state_flavor_t flavors[EXC_TYPES_COUNT] = {};
+ kr = task_get_exception_ports(tport, EXC_MASK_ALL,
+ masks, &nmasks, ports, behaviors, flavors);
+ RESULT_CHECK(kr, flavor, TASK_FLAVOR_CONTROL, "task_get_exception_ports");
+ for (size_t i = 0; i < EXC_TYPES_COUNT; i++) {
+ mach_port_deallocate(mach_task_self(), ports[i]);
+ }
+ }
+
+ {
+ /* 2. Test some read port interfaces */
+ vm_offset_t read_value = 0;
+ mach_msg_type_number_t read_cnt = 0;
+ int data = 0x41;
+ kr = mach_vm_read(tport,
+ (mach_vm_address_t)&data,
+ (mach_msg_type_number_t)sizeof(int),
+ &read_value,
+ &read_cnt);
+ RESULT_CHECK(kr, flavor, TASK_FLAVOR_READ, "mach_vm_read");
+
+ /* mach_vm_remap_new with max_protection VM_PROT_READ */
+ int *localAddress = 0;
+ mach_vm_address_t localMachVMAddress = 0;
+ vm_prot_t cur_protection = VM_PROT_READ;
+ vm_prot_t max_protection = VM_PROT_READ;
+ /* rdar://67706101 (mach_vm_remap flag that allows restricting protection of remapped region) */
+ kr = mach_vm_remap_new(mach_task_self(),
+ &localMachVMAddress,
+ sizeof(int),
+ 0,
+ VM_FLAGS_ANYWHERE,
+ tport, /* remote task, use self task port */
+ (mach_vm_address_t)&data,
+ false,
+ &cur_protection,
+ &max_protection,
+ VM_INHERIT_NONE);
+ localAddress = (int *)(uintptr_t)localMachVMAddress;
+
+ RESULT_CHECK(kr, flavor, TASK_FLAVOR_READ, "mach_vm_remap_new - VM_PROT_READ");
+ if (KERN_SUCCESS == kr) {
+ T_QUIET; T_EXPECT_EQ(max_protection, VM_PROT_READ, NULL);
+ T_QUIET; T_EXPECT_EQ(cur_protection, VM_PROT_READ, NULL);
+ T_QUIET; T_EXPECT_EQ(*localAddress, data, NULL); /* read */
+ }
+
+ /* mach_vm_remap_new with copy == TRUE */
+ int data2 = 0x42;
+ localAddress = 0;
+ localMachVMAddress = 0;
+ cur_protection = VM_PROT_WRITE | VM_PROT_READ;
+ max_protection = VM_PROT_WRITE | VM_PROT_READ;
+
+ kr = mach_vm_remap_new(mach_task_self(),
+ &localMachVMAddress,
+ sizeof(int),
+ 0,
+ VM_FLAGS_ANYWHERE,
+ tport, /* remote task, use self task port */
+ (mach_vm_address_t)&data2,
+ true,
+ &cur_protection,
+ &max_protection,
+ VM_INHERIT_NONE);
+ localAddress = (int *)(uintptr_t)localMachVMAddress;
+
+ RESULT_CHECK(kr, flavor, TASK_FLAVOR_READ, "mach_vm_remap_new - copy==TRUE");
+ if (KERN_SUCCESS == kr) {
+ T_QUIET; T_EXPECT_EQ(max_protection, VM_PROT_READ | VM_PROT_WRITE, NULL);
+ T_QUIET; T_EXPECT_EQ(cur_protection, VM_PROT_READ | VM_PROT_WRITE, NULL);
+ /* Following is causing bus error tracked by rdar://71616700 (Unexpected BUS ERROR in mach_vm_remap_new()) */
+ // T_QUIET; T_EXPECT_EQ(*localAddress, data2, NULL); /* read */
+ // *localAddress = 0; /* write */
+ }
+
+ /* */
+ mach_port_t voucher = MACH_PORT_NULL;
+ kr = task_get_mach_voucher(tport, 0, &voucher);
+ RESULT_CHECK(kr, flavor, TASK_FLAVOR_READ, "task_get_mach_voucher");
+ mach_port_deallocate(mach_task_self(), voucher);
+
+ /* */
+ ipc_info_space_t space_info;
+ ipc_info_name_array_t table;
+ mach_msg_type_number_t tableCount;
+ ipc_info_tree_name_array_t tree; /* unused */
+ mach_msg_type_number_t treeCount; /* unused */
+ kr = mach_port_space_info(tport, &space_info, &table, &tableCount, &tree, &treeCount);
+ RESULT_CHECK(kr, flavor, TASK_FLAVOR_READ, "mach_port_space_info");
+ }
+
+ {
+ /* 3. Test some inspect port interfaces */
+ task_exc_guard_behavior_t exc_behavior;
+ kr = task_get_exc_guard_behavior(tport, &exc_behavior);
+ RESULT_CHECK(kr, flavor, TASK_FLAVOR_INSPECT, "task_get_exc_guard_behavior");
+ }
+
+ {
+ /* 4. Test some name port interfaces */
+ struct task_basic_info info;
+ mach_msg_type_number_t size = sizeof(info);
+ kr = task_info(tport,
+ TASK_BASIC_INFO,
+ (task_info_t)&info,
+ &size);
+ RESULT_CHECK(kr, flavor, TASK_FLAVOR_NAME, "task_info");
+ }
+}
+
+static void
+test_thread_port_mig_intrans(
+ thread_t tport,
+ thread_flavor_t flavor)
+{
+ kern_return_t kr;
+
+ T_LOG("Testing various MIG/manual intrans thread interfaces with thread flavor %d", flavor);
+
+ {
+ /* 1. Test some control port interfaces */
+ exception_mask_t masks[EXC_TYPES_COUNT] = {};
+ mach_msg_type_number_t nmasks = 0;
+ exception_port_t ports[EXC_TYPES_COUNT] = {};
+ exception_behavior_t behaviors[EXC_TYPES_COUNT] = {};;
+ thread_state_flavor_t flavors[EXC_TYPES_COUNT] = {};;
+ kr = thread_get_exception_ports(tport, EXC_MASK_ALL,
+ masks, &nmasks, ports, behaviors, flavors);
+ RESULT_CHECK(kr, flavor, THREAD_FLAVOR_CONTROL, "thread_get_exception_ports");
+ for (size_t i = 0; i < EXC_TYPES_COUNT; i++) {
+ mach_port_deallocate(mach_task_self(), ports[i]);
+ }
+ }
+
+ {
+ /* 2. Test some read port interfaces */
+ mach_voucher_t voucher = MACH_PORT_NULL;
+ kr = thread_get_mach_voucher(tport, 0, &voucher);
+ RESULT_CHECK(kr, flavor, THREAD_FLAVOR_READ, "thread_get_mach_voucher");
+ mach_port_deallocate(mach_task_self(), voucher);
+ }
+
+ {
+ /* 3. Test some inspect port interfaces */
+ processor_set_name_t name = MACH_PORT_NULL;
+ kr = thread_get_assignment(tport, &name);
+ RESULT_CHECK(kr, flavor, THREAD_FLAVOR_INSPECT, "thread_get_assignment");
+ mach_port_deallocate(mach_task_self(), name);
+ }
+}
+
+static void
+test_get_child_task_port(void)
+{
+ pid_t child_pid;
+ kern_return_t kr;
+ mach_port_name_t tr, ti, tp, tn;
+
+ child_pid = fork();
+
+ T_LOG("Testing get child task ports");
+
+ if (child_pid < 0) {
+ T_FAIL("fork failed in test_get_child_port.");
+ }
+
+ if (child_pid == 0) {
+ /* hang the child */
+ while (1) {
+ sleep(10);
+ }
+ }
+
+ kr = task_for_pid(mach_task_self(), child_pid, &tp);
+ T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "task_for_pid for child %u", child_pid);
+
+ kr = task_read_for_pid(mach_task_self(), child_pid, &tr);
+ T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "task_read_for_pid for child %u", child_pid);
+
+ kr = task_inspect_for_pid(mach_task_self(), child_pid, &ti);
+ T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "task_inspect_for_pid for child %u", child_pid);
+
+ kr = task_name_for_pid(mach_task_self(), child_pid, &tn);
+ T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "task_name_for_pid for child %u", child_pid);
+
+ mach_port_deallocate(mach_task_self(), tp);
+ mach_port_deallocate(mach_task_self(), tr);
+ mach_port_deallocate(mach_task_self(), ti);
+ mach_port_deallocate(mach_task_self(), tn);
+
+ kill(child_pid, SIGKILL);
+ int status;
+ wait(&status);
+}
+
+T_DECL(read_inspect, "Test critical read and inspect port interfaces")
+{
+ mach_port_t control_port, movable_port, read_port, inspect_port, name_port;
+ mach_port_t th_control_port, th_movable_port, th_read_port, th_inspect_port;
+#define TASK_PORT_COUNT 5
+#define THREAD_PORT_COUNT 4
+ mach_port_t task_ports[TASK_PORT_COUNT];
+ task_flavor_t task_flavors[TASK_PORT_COUNT];
+ mach_port_t thread_ports[THREAD_PORT_COUNT];
+ thread_flavor_t thread_flavors[THREAD_PORT_COUNT];
+ kern_return_t kr;
+
+ /* first, try getting all flavors of task port for self */
+ kr = task_for_pid(mach_task_self(), getpid(), &control_port);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "task_for_pid()");
+ task_ports[0] = control_port;
+ task_flavors[0] = TASK_FLAVOR_CONTROL;
+
+ kr = task_get_special_port(mach_task_self(), TASK_KERNEL_PORT, &movable_port);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "task_get_special_port(..TASK_KERNEL_PORT..)");
+ task_ports[1] = movable_port;
+ task_flavors[1] = TASK_FLAVOR_CONTROL;
+
+ kr = task_read_for_pid(mach_task_self(), getpid(), &read_port);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "task_read_for_pid()");
+ task_ports[2] = read_port;
+ task_flavors[2] = TASK_FLAVOR_READ;
+
+ kr = task_inspect_for_pid(mach_task_self(), getpid(), &inspect_port);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "task_inspect_for_pid()");
+ task_ports[3] = inspect_port;
+ task_flavors[3] = TASK_FLAVOR_INSPECT;
+
+ kr = task_name_for_pid(mach_task_self(), getpid(), &name_port);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "task_name_for_pid()");
+ task_ports[4] = name_port;
+ task_flavors[4] = TASK_FLAVOR_NAME;
+
+
+ for (size_t i = 0; i < TASK_PORT_COUNT; i++) {
+ /*
+ * 1. Make sure can't get higher priv'ed ports from lower ones through
+ * task_get_special_port()
+ */
+ test_task_get_special_port(task_ports[i], task_flavors[i]);
+
+ /*
+ * 2. Make sure correct level of thread ports are returned from task_threads
+ */
+ test_task_threads(task_ports[i], task_flavors[i]);
+
+ /*
+ * 3. Make sure correct level of task ports are returned from processor_set_tasks
+ */
+ if (i >= 1) {
+ test_processor_set_tasks(task_flavors[i]);
+ }
+
+ /*
+ * 4. Make sure our MIG intrans enforcement for tasks does not break.
+ */
+ test_task_port_mig_intrans(task_ports[i], task_flavors[i]);
+ }
+
+
+ for (size_t i = 0; i < TASK_PORT_COUNT; i++) {
+ mach_port_deallocate(mach_task_self(), task_ports[i]);
+ }
+
+ /* 4. Try spawning a child an get its task ports */
+ test_get_child_task_port();
+
+ /* Now, test thread read/inspect ports */
+ th_control_port = mach_thread_self();
+ thread_ports[0] = th_control_port;
+ thread_flavors[0] = THREAD_FLAVOR_CONTROL;
+
+ kr = thread_get_special_port(th_control_port, THREAD_KERNEL_PORT, &th_movable_port);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "thread_get_special_port(..THREAD_KERNEL_PORT..)");
+ thread_ports[1] = th_movable_port;
+ thread_flavors[1] = THREAD_FLAVOR_CONTROL;
+
+ kr = thread_get_special_port(th_control_port, THREAD_READ_PORT, &th_read_port);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "thread_get_special_port(..THREAD_READ_PORT..)");
+ thread_ports[2] = th_read_port;
+ thread_flavors[2] = THREAD_FLAVOR_READ;
+
+ kr = thread_get_special_port(th_control_port, THREAD_INSPECT_PORT, &th_inspect_port);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "thread_get_special_port(..THREAD_INSPECT_PORT..)");
+ thread_ports[3] = th_inspect_port;
+ thread_flavors[3] = THREAD_FLAVOR_INSPECT;
+
+
+ for (size_t i = 0; i < THREAD_PORT_COUNT; i++) {
+ /*
+ * 1. Make sure can't get higher priv'ed ports from lower ones through
+ * thread_get_special_port()
+ */
+ test_thread_get_special_port(thread_ports[i], thread_flavors[i]);
+
+ /*
+ * 2. Make sure our MIG intrans enforcement for threads does not break.
+ */
+ test_thread_port_mig_intrans(thread_ports[i], thread_flavors[i]);
+ }
+
+ for (size_t i = 0; i < THREAD_PORT_COUNT; i++) {
+ mach_port_deallocate(mach_task_self(), thread_ports[i]);
+ }
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/* -*- compile-command: "xcrun --sdk iphoneos.internal make recvmsg_x_test" -*- */
+
+
+#include <sys/errno.h>
+#include <sys/fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <stdbool.h>
+#include <err.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sysexits.h>
+#include <unistd.h>
+
+#include <darwintest.h>
+#include <darwintest_utils.h>
+
+#define NMSGS 5
+#define BUFFERLEN 1000
+
+T_GLOBAL_META(T_META_NAMESPACE("xnu.net"));
+
+static void
+sendPackets(int s, struct sockaddr *dst, unsigned int numMsg, size_t bufferLen)
+{
+ ssize_t count = 0;
+ struct msghdr msg = {};
+ struct iovec vec = {};
+ char *bytes = calloc(1, bufferLen);
+ if (bytes == NULL) {
+ err(EX_OSERR, "calloc()");
+ }
+
+ vec.iov_base = bytes;
+ vec.iov_len = bufferLen;
+
+ msg.msg_name = (void *)dst;
+ msg.msg_namelen = dst->sa_len;
+ msg.msg_iov = &vec;
+ msg.msg_iovlen = 1;
+ msg.msg_flags = 0;
+
+ for (unsigned int i = 0; i < numMsg; i++) {
+ ssize_t n;
+ T_QUIET; T_EXPECT_POSIX_SUCCESS(n = sendmsg(s, &msg, 0), "sendmsg()");
+ T_LOG("Sent %ld bytes\n", n);
+ count += 1;
+ }
+
+ // Wait a bit to make sure the packets reach the receiver
+ usleep(100000);
+
+ T_LOG("Sent %ld packet\n", count);
+
+ free(bytes);
+}
+
+static void
+recvPackets_x(int s, unsigned int numMsg, size_t buflen, socklen_t cmsgLen)
+{
+ struct msghdr_x *msgList;
+ struct sockaddr_in *srcAddrs;
+ struct iovec *vec;
+ char *buffers;
+ char *cmsgBuf;
+
+ T_QUIET; T_ASSERT_NOTNULL(msgList = calloc(numMsg, sizeof(struct msghdr_x)), "msgList calloc()");
+ T_QUIET; T_ASSERT_NOTNULL(srcAddrs = calloc(numMsg, sizeof(struct sockaddr_in)), "srcAddrs calloc()");
+ T_QUIET; T_ASSERT_NOTNULL(vec = calloc(numMsg, sizeof(struct iovec)), "vec calloc()");
+ T_QUIET; T_ASSERT_NOTNULL(buffers = calloc(numMsg, buflen), "buffers calloc()");
+ T_QUIET; T_ASSERT_NOTNULL(cmsgBuf = calloc(numMsg, ALIGN(cmsgLen)), "cmsgBuf calloc()");
+
+ u_int count = 0;
+ while (true) {
+ /*
+ * Wrap around when we've exhausted the list
+ */
+ if ((count % numMsg) == 0) {
+ for (unsigned int i = 0; i < numMsg; i++) {
+ struct msghdr_x *msg = &msgList[i];
+ msg->msg_name = &srcAddrs[i];
+ msg->msg_namelen = sizeof(srcAddrs[i]);
+ vec[i].iov_base = buffers + (i * buflen);
+ vec[i].iov_len = buflen;
+ msg->msg_iov = &vec[i];
+ msg->msg_iovlen = 1;
+ msg->msg_control = cmsgBuf + (i * ALIGN(cmsgLen));
+ msg->msg_controllen = cmsgLen;
+ msg->msg_flags = 0;
+
+ T_QUIET; T_EXPECT_TRUE((uintptr_t)msg->msg_control % sizeof(uint32_t) == 0, NULL);
+ }
+ }
+
+ ssize_t n = recvmsg_x(s, msgList + (count % numMsg), numMsg - (count % numMsg), 0);
+ if (n < 0) {
+ if (errno == EINTR) {
+ T_LOG("recvmsg_x(): %s", strerror(errno));
+ continue;
+ }
+ if (errno == EWOULDBLOCK) {
+ T_LOG("recvmsg_x(): %s", strerror(errno));
+ break;
+ }
+ T_FAIL("recvmsg_x() failed: %s", strerror(errno));
+ }
+ T_LOG("recvmsg_x returned %ld packets\n", n);
+
+ for (unsigned int i = count; i < count + (u_int)n; i++) {
+ struct msghdr_x *msg = &msgList[i % numMsg];
+
+ T_LOG("Received packet #%d %lu bytes with recvmsg_x(), msg_namelen = %u, msg_controllen = %d -> %d, msg_flags = 0x%x\n",
+ i + 1, msg->msg_datalen, msg->msg_namelen, cmsgLen, msg->msg_controllen, msg->msg_flags);
+
+ struct cmsghdr *cmsg;
+
+ for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
+ T_QUIET; T_EXPECT_TRUE((uintptr_t)cmsg % sizeof(uint32_t) == 0, NULL);
+
+ T_LOG("level = %d, type = %d, length = %d\n", cmsg->cmsg_level, cmsg->cmsg_type, cmsg->cmsg_len);
+ }
+ }
+
+ count += (u_int)n;
+ }
+
+ free(msgList);
+ free(srcAddrs);
+ free(vec);
+ free(buffers);
+ free(cmsgBuf);
+}
+
+T_DECL(recvmsg_x_test, "exercise revcmsg_x() with various parameter")
+{
+ struct sockaddr_in addr = {
+ .sin_len = sizeof(addr),
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(0x7f000001),
+ .sin_port = 0
+ };
+
+ int recvSocket;
+ T_QUIET; T_EXPECT_POSIX_SUCCESS(recvSocket = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP), "socket()");
+ T_QUIET; T_EXPECT_POSIX_SUCCESS(bind(recvSocket, (const struct sockaddr *)&addr, sizeof(addr)), "bind()");
+
+ socklen_t addrLen = sizeof(addr);
+ T_QUIET; T_EXPECT_POSIX_SUCCESS(getsockname(recvSocket, (struct sockaddr *)&addr, &addrLen), "getsockname()");
+
+ int one = 1;
+ T_QUIET; T_EXPECT_POSIX_SUCCESS(setsockopt(recvSocket, IPPROTO_IP, IP_RECVPKTINFO, (void *)&one, sizeof(one)), "setsockopt(IP_RECVPKTINFO)");
+
+ int flags = fcntl(recvSocket, F_GETFL, 0);
+ T_QUIET; T_EXPECT_POSIX_SUCCESS(fcntl(recvSocket, F_SETFL, flags | O_NONBLOCK), "fcntl()");
+
+ int sendSocket;
+ T_QUIET; T_EXPECT_POSIX_SUCCESS(sendSocket = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP), "sendSocket socket()");
+
+ for (int dontTrunc = 0; dontTrunc <= 1; dontTrunc++) {
+ T_QUIET; T_EXPECT_POSIX_SUCCESS(setsockopt(recvSocket, SOL_SOCKET, SO_DONTTRUNC, (void *)&dontTrunc, sizeof(dontTrunc)), "setsockopt(SO_DONTTRUNC)");
+
+ T_LOG("\n================= recvmsg_x() test =================\n");
+ sendPackets(sendSocket, (struct sockaddr *)&addr, NMSGS, BUFFERLEN);
+ recvPackets_x(recvSocket, NMSGS, BUFFERLEN, 50);
+
+ T_LOG("\n================= recvmsg_x() test =================\n");
+ sendPackets(sendSocket, (struct sockaddr *)&addr, NMSGS, BUFFERLEN);
+ recvPackets_x(recvSocket, NMSGS, BUFFERLEN * 2, 50);
+
+ T_LOG("\n================= recvmsg_x() test =================\n");
+ sendPackets(sendSocket, (struct sockaddr *)&addr, NMSGS, BUFFERLEN);
+ recvPackets_x(recvSocket, NMSGS, BUFFERLEN / 2, 50);
+
+ T_LOG("\n================= recvmsg_x() test =================\n");
+ sendPackets(sendSocket, (struct sockaddr *)&addr, NMSGS, BUFFERLEN);
+ recvPackets_x(recvSocket, NMSGS, BUFFERLEN, 10);
+
+ T_LOG("\n================= recvmsg_x() test =================\n");
+ sendPackets(sendSocket, (struct sockaddr *)&addr, NMSGS, BUFFERLEN);
+ recvPackets_x(recvSocket, NMSGS, BUFFERLEN / 2, 10);
+ }
+
+ close(sendSocket);
+ close(recvSocket);
+
+ T_LOG("\n================= PASS =================\n");
+}
--- /dev/null
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/sysctl.h>
+#include <sys/mman.h>
+
+#include <darwintest.h>
+
+
+/*
+ * macOS only test. Try to map 2 different MAP_JIT regions. 2nd should fail.
+ */
+T_DECL(restrict_jit, "macOS restricted JIT entitlement test")
+{
+#if TARGET_OS_OSX
+ void *addr1;
+ void *addr2;
+ size_t size = 64 * 1024;
+
+
+ addr1 = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE | MAP_JIT, -1, 0);
+ T_ASSERT_NE_PTR(addr1, MAP_FAILED, "First map MAP_JIT");
+
+ addr2 = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE | MAP_JIT, -1, 0);
+ if (addr2 == MAP_FAILED) {
+ T_PASS("Only one MAP_JIT was allowed");
+ } else {
+ T_FAIL("Second MAP_JIT was allowed");
+ }
+
+#else
+ T_SKIP("Not macOS");
+#endif
+}
--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+ <key>dynamic-codesigning</key>
+ <true/>
+ <key>com.apple.security.cs.allow-jit</key>
+ <true/>
+ <key>com.apple.security.cs.single-jit</key>
+ <true/>
+</dict>
+</plist>
--- /dev/null
+/*
+ * Copyright (c) 2021 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <darwintest.h>
+
+#define MAX_SOCK 10
+
+T_DECL(scm_rights_leak, "test leak of file pointers by peeking SCM_RIGHTS")
+{
+ int pair[2];
+
+ T_ASSERT_POSIX_SUCCESS(socketpair(AF_UNIX, SOCK_STREAM, 0, pair),
+ NULL);
+
+ struct cmsghdr *cmsg;
+ T_ASSERT_NOTNULL(cmsg = calloc(1, MAX_SOCK * sizeof(int)), "calloc");
+ cmsg->cmsg_len = CMSG_LEN(MAX_SOCK * sizeof(int));
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+
+ int *sock_fds = (int *)(void *)CMSG_DATA(cmsg);
+ for (int i = 0; i < MAX_SOCK; i++) {
+ T_ASSERT_POSIX_SUCCESS(sock_fds[i] = socket(AF_UNIX, SOCK_DGRAM, 0), NULL);
+ }
+ for (int i = 0; i < MAX_SOCK; i++) {
+ fprintf(stderr, "sock_fds[%d] %i\n", i, sock_fds[i]);
+ }
+
+ struct iovec iovec[1];
+ char data = 'x';
+ iovec[0].iov_base = &data;
+ iovec[0].iov_len = 1;
+
+ struct msghdr mh;
+ mh.msg_name = 0;
+ mh.msg_namelen = 0;
+ mh.msg_iov = iovec;
+ mh.msg_iovlen = 1;
+ mh.msg_control = cmsg;
+ mh.msg_controllen = cmsg->cmsg_len;
+ mh.msg_flags = 0;
+
+ ssize_t ssize;
+ ssize = sendmsg(pair[0], &mh, 0);
+ T_ASSERT_EQ(ssize, (ssize_t)1, "sendmsg");
+
+ struct cmsghdr *rcmsg;
+ T_EXPECT_POSIX_SUCCESS_(rcmsg = calloc(2048, 1), "calloc");
+
+ mh.msg_name = 0;
+ mh.msg_namelen = 0;
+ mh.msg_iov = iovec;
+ mh.msg_iovlen = 1;
+ mh.msg_control = rcmsg;
+ mh.msg_controllen = 2048;
+ mh.msg_flags = 0;
+
+ ssize = recvmsg(pair[1], &mh, MSG_PEEK);
+ T_ASSERT_POSIX_SUCCESS(ssize, "recvmsg");
+ uintptr_t *r_ptrs = (uintptr_t *)(void *)CMSG_DATA(rcmsg);
+ socklen_t nptrs = (rcmsg->cmsg_len - CMSG_LEN(0)) / sizeof(uintptr_t);
+ for (socklen_t i = 0; i < nptrs; i++) {
+ T_EXPECT_EQ(r_ptrs[i], (uintptr_t)0, "r_ptrs[%u] 0x%lx\n", i, r_ptrs[i]);
+ }
+
+ ssize = recvmsg(pair[1], &mh, 0);
+ T_ASSERT_POSIX_SUCCESS(ssize, "recvmsg");
+ int *r_fds = (int *)(void *)CMSG_DATA(rcmsg);
+ for (int i = 0; i < MAX_SOCK; i++) {
+ T_EXPECT_NE(r_fds[i], 0, "r_fds[%d] %i\n", i, r_fds[i]);
+ }
+
+ free(cmsg);
+ free(rcmsg);
+ close(pair[0]);
+ close(pair[1]);
+}
--- /dev/null
+#include <darwintest.h>
+#include <sys/socket.h>
+
+T_DECL(socket_raw_uint8_max, "create socket with borderline proto numbers")
+{
+ int fd = socket(AF_INET, SOCK_RAW, 256);
+
+ T_ASSERT_POSIX_FAILURE(fd, EINVAL, "socket(AF_INET, SOCK_RAW, 256);");
+
+ int fd2 = socket(AF_INET, SOCK_RAW, 255);
+
+ T_ASSERT_POSIX_SUCCESS(fd2, "socket(AF_INET, SOCK_RAW, 255);");
+}
#include <uuid/uuid.h>
#include <servers/bootstrap.h>
#include <pthread/workqueue_private.h>
+#include <dispatch/private.h>
#import <zlib.h>
T_GLOBAL_META(
#define PARSE_STACKSHOT_WAITINFO_CSEG 0x40
#define PARSE_STACKSHOT_WAITINFO_SRP 0x80
#define PARSE_STACKSHOT_TRANSLATED 0x100
+#define PARSE_STACKSHOT_SHAREDCACHE_FLAGS 0x200
/* keys for 'extra' dictionary for parse_stackshot */
static const NSString* zombie_child_pid_key = @"zombie_child_pid"; // -> @(pid), required for PARSE_STACKSHOT_ZOMBIE
static const NSString* postexec_child_unique_pid_key = @"postexec_child_unique_pid"; // -> @(unique_pid), required for PARSE_STACKSHOT_POSTEXEC
static const NSString* cseg_expected_threadid_key = @"cseg_expected_threadid"; // -> @(tid), required for PARSE_STACKSHOT_WAITINFO_CSEG
-static const NSString* srp_expected_pid_key = @"srp_expected_pid"; // -> @(pid), required for PARSE_STACKSHOT_WAITINFO_SRP
+static const NSString* srp_expected_threadid_key = @"srp_expected_threadid"; // -> @(tid), this or ..._pid required for PARSE_STACKSHOT_WAITINFO_SRP
+static const NSString* srp_expected_pid_key = @"srp_expected_pid"; // -> @(pid), this or ..._threadid required for PARSE_STACKSHOT_WAITINFO_SRP
static const NSString* translated_child_pid_key = @"translated_child_pid"; // -> @(pid), required for PARSE_STACKSHOT_TRANSLATED
+static const NSString* sharedcache_child_pid_key = @"sharedcache_child_pid"; // @(pid), required for PARSE_STACKSHOT_SHAREDCACHE_FLAGS
+static const NSString* sharedcache_child_sameaddr_key = @"sharedcache_child_sameaddr"; // @(0 or 1), required for PARSE_STACKSHOT_SHAREDCACHE_FLAGS
#define TEST_STACKSHOT_QUEUE_LABEL "houston.we.had.a.problem"
#define TEST_STACKSHOT_QUEUE_LABEL_LENGTH sizeof(TEST_STACKSHOT_QUEUE_LABEL)
STACKSHOT_SAVE_LOADINFO |
STACKSHOT_SAVE_KEXT_LOADINFO |
STACKSHOT_GET_GLOBAL_MEM_STATS |
- // STACKSHOT_GET_BOOT_PROFILE |
STACKSHOT_SAVE_IMP_DONATION_PIDS |
STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT |
STACKSHOT_THREAD_GROUP |
STACKSHOT_SAVE_JETSAM_COALITIONS |
STACKSHOT_ASID |
- // STACKSHOT_PAGE_TABLES |
0),
};
start_time = clock_gettime_nsec_np(CLOCK_MONOTONIC);
while (clock_gettime_nsec_np(CLOCK_MONOTONIC) - start_time < max_diff_time) {
- take_stackshot(&scenario, false, ^(void *ssbuf, size_t sslen) {
+ take_stackshot(&scenario, false, ^(void * __unused ssbuf,
+ size_t __unused sslen) {
printf(".");
fflush(stdout);
});
dispatch_semaphore_signal(parent_done_sem);
}
+#define CACHEADDR_ENV "STACKSHOT_TEST_DYLDADDR"
+T_HELPER_DECL(spawn_reslide_child, "child process to spawn with alternate slide")
+{
+ size_t shared_cache_len;
+ const void *addr, *prevaddr;
+ uintmax_t v;
+ char *endptr;
+
+ const char *cacheaddr_env = getenv(CACHEADDR_ENV);
+ T_QUIET; T_ASSERT_NOTNULL(cacheaddr_env, "getenv("CACHEADDR_ENV")");
+ errno = 0;
+ endptr = NULL;
+ v = strtoumax(cacheaddr_env, &endptr, 16); /* read hex value */
+ T_WITH_ERRNO; T_QUIET; T_ASSERT_NE(v, 0l, "getenv(%s) = \"%s\" should be a non-zero hex number", CACHEADDR_ENV, cacheaddr_env);
+ T_QUIET; T_ASSERT_EQ(*endptr, 0, "getenv(%s) = \"%s\" endptr \"%s\" should be empty", CACHEADDR_ENV, cacheaddr_env, endptr);
+
+ prevaddr = (const void *)v;
+ addr = _dyld_get_shared_cache_range(&shared_cache_len);
+ T_QUIET; T_ASSERT_NOTNULL(addr, "shared cache address");
+
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(kill(getppid(), (addr == prevaddr) ? SIGUSR2 : SIGUSR1), "signaled parent to take stackshot");
+ for (;;) {
+ (void) pause(); /* parent will kill -9 us */
+ }
+}
+
+T_DECL(shared_cache_flags, "tests stackshot's task_ss_flags for the shared cache")
+{
+ posix_spawnattr_t attr;
+ char *env_addr;
+ char path[PATH_MAX];
+ __block bool child_same_addr = false;
+
+ uint32_t path_size = sizeof(path);
+ T_QUIET; T_ASSERT_POSIX_ZERO(_NSGetExecutablePath(path, &path_size), "_NSGetExecutablePath");
+ char *args[] = { path, "-n", "spawn_reslide_child", NULL };
+ pid_t pid;
+ size_t shared_cache_len;
+ const void *addr;
+
+ dispatch_source_t child_diffsig_src, child_samesig_src;
+ dispatch_semaphore_t child_ready_sem = dispatch_semaphore_create(0);
+ T_QUIET; T_ASSERT_NOTNULL(child_ready_sem, "shared_cache child semaphore");
+
+ dispatch_queue_t signal_processing_q = dispatch_queue_create("signal processing queue", NULL);
+ T_QUIET; T_ASSERT_NOTNULL(signal_processing_q, "signal processing queue");
+
+ signal(SIGUSR1, SIG_IGN);
+ signal(SIGUSR2, SIG_IGN);
+ child_samesig_src = dispatch_source_create(DISPATCH_SOURCE_TYPE_SIGNAL, SIGUSR1, 0, signal_processing_q);
+ T_QUIET; T_ASSERT_NOTNULL(child_samesig_src, "dispatch_source_create (child_samesig_src)");
+ child_diffsig_src = dispatch_source_create(DISPATCH_SOURCE_TYPE_SIGNAL, SIGUSR2, 0, signal_processing_q);
+ T_QUIET; T_ASSERT_NOTNULL(child_diffsig_src, "dispatch_source_create (child_diffsig_src)");
+
+ /* child will signal us depending on if their addr is the same or different */
+ dispatch_source_set_event_handler(child_samesig_src, ^{ child_same_addr = false; dispatch_semaphore_signal(child_ready_sem); });
+ dispatch_source_set_event_handler(child_diffsig_src, ^{ child_same_addr = true; dispatch_semaphore_signal(child_ready_sem); });
+ dispatch_activate(child_samesig_src);
+ dispatch_activate(child_diffsig_src);
+
+ addr = _dyld_get_shared_cache_range(&shared_cache_len);
+ T_QUIET; T_ASSERT_NOTNULL(addr, "shared cache address");
+
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(asprintf(&env_addr, "%p", addr), "asprintf of env_addr succeeded");
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(setenv(CACHEADDR_ENV, env_addr, true), "setting "CACHEADDR_ENV" to %s", env_addr);
+
+ T_QUIET; T_ASSERT_POSIX_ZERO(posix_spawnattr_init(&attr), "posix_spawnattr_init");
+ T_QUIET; T_ASSERT_POSIX_ZERO(posix_spawnattr_setflags(&attr, _POSIX_SPAWN_RESLIDE), "posix_spawnattr_setflags");
+ int sp_ret = posix_spawn(&pid, path, NULL, &attr, args, environ);
+ T_ASSERT_POSIX_ZERO(sp_ret, "spawned process '%s' with PID %d", args[0], pid);
+
+ dispatch_semaphore_wait(child_ready_sem, DISPATCH_TIME_FOREVER);
+ T_LOG("received signal from child (%s), capturing stackshot", child_same_addr ? "same shared cache addr" : "different shared cache addr");
+
+ struct scenario scenario = {
+ .name = "shared_cache_flags",
+ .flags = (STACKSHOT_SAVE_LOADINFO | STACKSHOT_GET_GLOBAL_MEM_STATS
+ | STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT
+ | STACKSHOT_SAVE_IMP_DONATION_PIDS | STACKSHOT_KCDATA_FORMAT),
+ };
+
+ take_stackshot(&scenario, false, ^( void *ssbuf, size_t sslen) {
+ int status;
+ /* First kill the child so we can reap it */
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(kill(pid, SIGKILL), "killing spawned process");
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(waitpid(pid, &status, 0), "waitpid on spawned child");
+ T_QUIET; T_ASSERT_EQ(!!WIFSIGNALED(status), 1, "waitpid status should be signalled");
+ T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGKILL, "waitpid status should be SIGKILLed");
+
+ parse_stackshot(PARSE_STACKSHOT_SHAREDCACHE_FLAGS, ssbuf, sslen,
+ @{sharedcache_child_pid_key: @(pid), sharedcache_child_sameaddr_key: @(child_same_addr ? 1 : 0)});
+ });
+}
+
static void *stuck_sysctl_thread(void *arg) {
int val = 1;
dispatch_semaphore_t child_thread_started = *(dispatch_semaphore_t *)arg;
T_DECL(translated, "tests translated bit is set correctly")
{
#if !(TARGET_OS_OSX && TARGET_CPU_ARM64)
- T_SKIP("Not arm mac")
+ T_SKIP("Only valid on Apple silicon Macs")
#endif
// Get path of stackshot_translated_child helper binary
char path[PATH_MAX];
struct kinfo_proc process_info;
size_t bufsize = sizeof(process_info);
T_QUIET; T_ASSERT_POSIX_SUCCESS(sysctl(mib, (unsigned)(sizeof(mib)/sizeof(int)), &process_info, &bufsize, NULL, 0), "get translated child process info");
- T_QUIET; T_ASSERT_GT(bufsize, 0, "process info is not empty");
+ T_QUIET; T_ASSERT_GT(bufsize, (size_t)0, "process info is not empty");
T_QUIET; T_ASSERT_TRUE((process_info.kp_proc.p_flag & P_TRANSLATED), "KERN_PROC_PID reports child is translated");
T_LOG("capturing stackshot");
};
take_stackshot(&scenario, true, ^( void *ssbuf, size_t sslen) {
- // Kill the child
- int status;
- T_QUIET; T_ASSERT_POSIX_SUCCESS(kill(pid, SIGTERM), "kill translated child");
- T_QUIET; T_ASSERT_POSIX_SUCCESS(waitpid(pid, &status, 0), "waitpid on translated child");
-
parse_stackshot(PARSE_STACKSHOT_TRANSLATED, ssbuf, sslen, @{translated_child_pid_key: @(pid)});
});
+
+ // Kill the child
+ int status;
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(kill(pid, SIGTERM), "kill translated child");
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(waitpid(pid, &status, 0), "waitpid on translated child");
+
}
T_DECL(proc_uuid_info, "tests that the main binary UUID for a proc is always populated")
T_DECL(cseg_waitinfo, "test that threads stuck in the compressor report correct waitinfo")
{
- int val = 1;
struct scenario scenario = {
.name = "cseg_waitinfo",
.quiet = false,
dispatch_async(dq, ^{
pthread_threadid_np(NULL, &thread_id);
dispatch_semaphore_signal(child_ok);
+ int val = 1;
T_ASSERT_POSIX_SUCCESS(sysctlbyname("kern.cseg_wedge_thread", NULL, NULL, &val, sizeof(val)), "wedge child thread");
});
T_LOG("taking stackshot");
take_stackshot(&scenario, false, ^(void *ssbuf, size_t sslen) {
+ int val = 1;
T_ASSERT_POSIX_SUCCESS(sysctlbyname("kern.cseg_unwedge_thread", NULL, NULL, &val, sizeof(val)), "unwedge child thread");
parse_stackshot(PARSE_STACKSHOT_WAITINFO_CSEG, ssbuf, sslen, @{cseg_expected_threadid_key: @(thread_id)});
});
T_LOG("client process exiting after sending message to parent (server)");
}
+enum srp_test_type {
+ SRP_TEST_THREAD, /* expect waiter on current thread */
+ SRP_TEST_PID, /* expect waiter on current PID */
+ SRP_TEST_EITHER, /* waiter could be on either */
+};
+
+static void
+check_srp_test(const char *name, enum srp_test_type ty)
+{
+ struct scenario scenario = {
+ .name = name,
+ .quiet = false,
+ .flags = (STACKSHOT_THREAD_WAITINFO | STACKSHOT_KCDATA_FORMAT),
+ };
+ uint64_t thread_id = 0;
+ pthread_threadid_np(NULL, &thread_id);
+ if (ty == SRP_TEST_THREAD) {
+ take_stackshot(&scenario, false, ^(void *ssbuf, size_t sslen) {
+ parse_stackshot(PARSE_STACKSHOT_WAITINFO_SRP, ssbuf, sslen,
+ @{srp_expected_threadid_key: @(thread_id)});
+ });
+ } else if (ty == SRP_TEST_PID) {
+ take_stackshot(&scenario, false, ^(void *ssbuf, size_t sslen) {
+ parse_stackshot(PARSE_STACKSHOT_WAITINFO_SRP, ssbuf, sslen,
+ @{srp_expected_pid_key: @(getpid())});
+ });
+ } else {
+ take_stackshot(&scenario, false, ^(void *ssbuf, size_t sslen) {
+ parse_stackshot(PARSE_STACKSHOT_WAITINFO_SRP, ssbuf, sslen,
+ @{srp_expected_pid_key: @(getpid()), srp_expected_threadid_key: @(thread_id)});
+ });
+ }
+
+}
+
+
/*
* Tests the stackshot wait info plumbing for synchronous IPC that doesn't use kevent on the server.
*
* to a server that receives the message and copies in the send-once right, but doesn't
* reply to the client. for this case the special reply port is copied out and the kernel
* stashes the info about which task copied out the send once right. (rdar://60440592)
+ * (part 3): tests the same as part 2, but uses kevents, which allow for
+ * priority inheritance
*/
T_DECL(special_reply_port, "test that tasks using special reply ports have correct waitinfo")
{
dispatch_semaphore_t can_continue = dispatch_semaphore_create(0);
dispatch_queue_t dq = dispatch_queue_create("signalqueue", NULL);
+ dispatch_queue_t machdq = dispatch_queue_create("machqueue", NULL);
dispatch_source_t sig_src;
char path[PATH_MAX];
uint32_t path_size = sizeof(path);
pid_t client_pid;
int sp_ret;
kern_return_t kr;
- struct scenario scenario = {
- .name = "srp",
- .quiet = false,
- .flags = (STACKSHOT_THREAD_WAITINFO | STACKSHOT_KCDATA_FORMAT),
- };
mach_port_t port;
/* setup the signal handler in the parent (server) */
dispatch_semaphore_wait(can_continue, DISPATCH_TIME_FOREVER);
T_LOG("Ready to take stackshot, but waiting 1s for the coast to clear");
+ /*
+ * can_continue indicates the client has signaled us, but we want to make
+ * sure they've actually blocked sending their mach message. It's cheesy, but
+ * sleep() works for this.
+ */
sleep(1);
/*
* take the stackshot without calling receive to verify that the stackshot wait
- * info shows our (the server) PID for the scenario where the server has yet to
+ * info shows our (the server) thread for the scenario where the server has yet to
* receive the message.
*/
T_LOG("Taking stackshot for part 1 coverage");
- take_stackshot(&scenario, false, ^(void *ssbuf, size_t sslen) {
- parse_stackshot(PARSE_STACKSHOT_WAITINFO_SRP, ssbuf, sslen,
- @{srp_expected_pid_key: @(getpid())});
- });
+ check_srp_test("srp", SRP_TEST_THREAD);
/*
* receive the message from the client (which should copy the send once right into
* for the scenario where the server has received the message and copied in the send-once right.
*/
T_LOG("Taking stackshot for part 2 coverage");
- take_stackshot(&scenario, false, ^(void *ssbuf, size_t sslen) {
- parse_stackshot(PARSE_STACKSHOT_WAITINFO_SRP, ssbuf, sslen,
- @{srp_expected_pid_key: @(getpid())});
- });
+ check_srp_test("srp", SRP_TEST_PID);
/* cleanup - kill the client */
- T_LOG("killing client");
- kill(client_pid, SIGKILL);
+ T_ASSERT_POSIX_SUCCESS(kill(client_pid, SIGKILL), "killing client");
+ T_ASSERT_POSIX_SUCCESS(waitpid(client_pid, NULL, 0), "waiting for the client to exit");
+
+ // do it again, but using kevents
+ T_LOG("Launching client");
+ sp_ret = posix_spawn(&client_pid, client_args[0], NULL, NULL, client_args, NULL);
+ T_QUIET; T_ASSERT_POSIX_ZERO(sp_ret, "spawned process '%s' with PID %d", client_args[0], client_pid);
+ T_LOG("Spawned client as PID %d", client_pid);
- T_LOG("waiting for the client to exit");
- waitpid(client_pid, NULL, 0);
+ dispatch_semaphore_wait(can_continue, DISPATCH_TIME_FOREVER);
+ T_LOG("Ready to take stackshot, but waiting 1s for the coast to clear");
+
+ /*
+ * can_continue indicates the client has signaled us, but we want to make
+ * sure they've actually blocked sending their mach message. It's cheesy, but
+ * sleep() works for this.
+ */
+ sleep(1);
+
+ dispatch_mach_t dispatch_mach = dispatch_mach_create(SRP_SERVICE_NAME, machdq,
+ ^(dispatch_mach_reason_t reason,
+ dispatch_mach_msg_t message,
+ mach_error_t error __unused) {
+ switch (reason) {
+ case DISPATCH_MACH_MESSAGE_RECEIVED: {
+ size_t size = 0;
+ mach_msg_header_t *msg __unused = dispatch_mach_msg_get_msg(message, &size);
+ T_LOG("server: recieved %ld byte message", size);
+ check_srp_test("turnstile_port_thread", SRP_TEST_THREAD);
+ T_LOG("server: letting client go");
+ // drop the message on the ground, we'll kill the client later
+ dispatch_semaphore_signal(can_continue);
+ break;
+ }
+ default:
+ break;
+ }
+ });
+
+ dispatch_mach_connect(dispatch_mach, port, MACH_PORT_NULL, NULL);
+
+ dispatch_semaphore_wait(can_continue, DISPATCH_TIME_FOREVER);
+
+ /* cleanup - kill the client */
+ T_ASSERT_POSIX_SUCCESS(kill(client_pid, SIGKILL), "killing client");
+ T_ASSERT_POSIX_SUCCESS(waitpid(client_pid, NULL, 0), "waiting for the client to exit");
}
#pragma mark performance tests
dt_stat_t duration = dt_stat_create("nanoseconds per thread", "%s_duration", flagname);
dt_stat_t size = dt_stat_create("bytes per thread", "%s_size", flagname);
- T_LOG("Testing \"%s\" = 0x%x", flagname, flag);
+ T_LOG("Testing \"%s\" = 0x%" PRIx64, flagname, flag);
while (!dt_stat_stable(duration) || !dt_stat_stable(size)) {
take_stackshot(&scenario, false, ^(void *ssbuf, size_t sslen) {
parse_stackshot(uint64_t stackshot_parsing_flags, void *ssbuf, size_t sslen, NSDictionary *extra)
{
bool delta = (stackshot_parsing_flags & PARSE_STACKSHOT_DELTA);
+ bool expect_sharedcache_child = (stackshot_parsing_flags & PARSE_STACKSHOT_SHAREDCACHE_FLAGS);
bool expect_zombie_child = (stackshot_parsing_flags & PARSE_STACKSHOT_ZOMBIE);
bool expect_postexec_child = (stackshot_parsing_flags & PARSE_STACKSHOT_POSTEXEC);
bool expect_cseg_waitinfo = (stackshot_parsing_flags & PARSE_STACKSHOT_WAITINFO_CSEG);
bool found_translated_child = false;
bool found_dispatch_queue_label = false, found_turnstile_lock = false;
bool found_cseg_waitinfo = false, found_srp_waitinfo = false;
- pid_t zombie_child_pid = -1, srp_expected_pid = 0;
+ bool found_sharedcache_child = false, found_sharedcache_badflags = false, found_sharedcache_self = false;
+ uint64_t srp_expected_threadid = 0;
+ pid_t zombie_child_pid = -1, srp_expected_pid = -1, sharedcache_child_pid = -1;
pid_t translated_child_pid = -1;
+ bool sharedcache_child_sameaddr = false;
uint64_t postexec_child_unique_pid = 0, cseg_expected_threadid = 0;
+ uint64_t sharedcache_child_flags = 0, sharedcache_self_flags = 0;
char *inflatedBufferBase = NULL;
if (expect_shared_cache_uuid) {
}
}
+ if (expect_sharedcache_child) {
+ NSNumber* pid_num = extra[sharedcache_child_pid_key];
+ NSNumber* sameaddr_num = extra[sharedcache_child_sameaddr_key];
+ T_QUIET; T_ASSERT_NOTNULL(pid_num, "sharedcache child pid provided");
+ T_QUIET; T_ASSERT_NOTNULL(sameaddr_num, "sharedcache child addrsame provided");
+ sharedcache_child_pid = [pid_num intValue];
+ T_QUIET; T_ASSERT_GT(sharedcache_child_pid, 0, "sharedcache child pid greater than zero");
+ sharedcache_child_sameaddr = [sameaddr_num intValue];
+ T_QUIET; T_ASSERT_GE([sameaddr_num intValue], 0, "sharedcache child sameaddr is boolean (0 or 1)");
+ T_QUIET; T_ASSERT_LE([sameaddr_num intValue], 1, "sharedcache child sameaddr is boolean (0 or 1)");
+ }
if (expect_zombie_child) {
NSNumber* pid_num = extra[zombie_child_pid_key];
T_QUIET; T_ASSERT_NOTNULL(pid_num, "zombie child pid provided");
if (expect_cseg_waitinfo) {
NSNumber* tid_num = extra[cseg_expected_threadid_key];
T_QUIET; T_ASSERT_NOTNULL(tid_num, "cseg's expected thread id provided");
- cseg_expected_threadid = [tid_num intValue];
- T_QUIET; T_ASSERT_GT(cseg_expected_threadid, 0, "cseg_expected_threadid greater than zero");
+ cseg_expected_threadid = tid_num.unsignedLongValue;
+ T_QUIET; T_ASSERT_GT(cseg_expected_threadid, UINT64_C(0), "compressor segment thread is present");
}
if (expect_srp_waitinfo) {
+ NSNumber* threadid_num = extra[srp_expected_threadid_key];
NSNumber* pid_num = extra[srp_expected_pid_key];
- T_QUIET; T_ASSERT_NOTNULL(pid_num, "expected SRP pid provided");
- srp_expected_pid = [pid_num intValue];
- T_QUIET; T_ASSERT_GT(srp_expected_pid , 0, "srp_expected_pid greater than zero");
+ T_QUIET; T_ASSERT_TRUE(threadid_num != nil || pid_num != nil, "expected SRP threadid or pid");
+ if (threadid_num != nil) {
+ srp_expected_threadid = [threadid_num unsignedLongLongValue];
+ T_QUIET; T_ASSERT_GT(srp_expected_threadid, 0ull, "srp_expected_threadid greater than zero");
+ }
+ if (pid_num != nil) {
+ srp_expected_pid = [pid_num intValue];
+ T_QUIET; T_ASSERT_GT(srp_expected_pid, 0, "srp_expected_pid greater than zero");
+ }
+ T_LOG("looking for SRP pid: %d threadid: %llu", srp_expected_pid, srp_expected_threadid);
}
if (expect_translated_child) {
translated_child_pid = [pid_num intValue];
T_QUIET; T_ASSERT_GT(translated_child_pid, 0, "translated child pid greater than zero");
}
-
+
kcdata_iter_t iter = kcdata_iter(ssbuf, sslen);
if (delta) {
T_ASSERT_EQ(kcdata_iter_type(iter), KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT,
uint64_t *data;
char *desc;
for (int i = 0; i < 3; i ++) {
- kcdata_iter_get_data_with_desc(iter, &desc, &data, NULL);
+ kcdata_iter_get_data_with_desc(iter, &desc, (void **)&data, NULL);
if (strcmp(desc, "kcd_c_type") == 0) {
compression_type = *data;
} else if (strcmp(desc, "kcd_c_totalout") == 0){
iter = kcdata_iter_next(iter);
}
- T_ASSERT_EQ(compression_type, 1, "zlib compression is used");
- T_ASSERT_GT(totalout, 0, "successfully gathered how long the compressed buffer is");
- T_ASSERT_GT(totalin, 0, "successfully gathered how long the uncompressed buffer will be at least");
+ T_ASSERT_EQ(compression_type, UINT64_C(1), "zlib compression is used");
+ T_ASSERT_GT(totalout, UINT64_C(0), "successfully gathered how long the compressed buffer is");
+ T_ASSERT_GT(totalin, UINT64_C(0), "successfully gathered how long the uncompressed buffer will be at least");
/* progress to the next kcdata item */
T_ASSERT_EQ(kcdata_iter_type(iter), KCDATA_BUFFER_BEGIN_STACKSHOT, "compressed stackshot found");
- void *bufferBase = kcdata_iter_payload(iter);
+ char *bufferBase = kcdata_iter_payload(iter);
/*
* zlib is used, allocate a buffer based on the metadata, plus
z_stream zs;
memset(&zs, 0, sizeof(zs));
T_QUIET; T_ASSERT_EQ(inflateInit(&zs), Z_OK, "inflateInit OK");
- zs.next_in = bufferBase;
- zs.avail_in = totalout;
- zs.next_out = inflatedBufferBase;
- zs.avail_out = inflatedBufferSize;
+ zs.next_in = (unsigned char *)bufferBase;
+ T_QUIET; T_ASSERT_LE(totalout, (uint64_t)UINT_MAX, "stackshot is not too large");
+ zs.avail_in = (uInt)totalout;
+ zs.next_out = (unsigned char *)inflatedBufferBase;
+ T_QUIET; T_ASSERT_LE(inflatedBufferSize, (size_t)UINT_MAX, "output region is not too large");
+ zs.avail_out = (uInt)inflatedBufferSize;
T_ASSERT_EQ(inflate(&zs, Z_FINISH), Z_STREAM_END, "inflated buffer");
inflateEnd(&zs);
- T_ASSERT_EQ(zs.total_out, totalin, "expected number of bytes inflated");
+ T_ASSERT_EQ((uint64_t)zs.total_out, totalin, "expected number of bytes inflated");
/* copy the data after the compressed area */
- T_QUIET; T_ASSERT_LE(sslen - totalout - (bufferBase - ssbuf),
+ T_QUIET; T_ASSERT_GE((void *)bufferBase, ssbuf,
+ "base of compressed stackshot is after the returned stackshot buffer");
+ size_t header_size = (size_t)(bufferBase - (char *)ssbuf);
+ size_t data_after_compressed_size = sslen - totalout - header_size;
+ T_QUIET; T_ASSERT_LE(data_after_compressed_size,
inflatedBufferSize - zs.total_out,
"footer fits in the buffer");
memcpy(inflatedBufferBase + zs.total_out,
bufferBase + totalout,
- sslen - totalout - (bufferBase - ssbuf));
+ data_after_compressed_size);
iter = kcdata_iter(inflatedBufferBase, inflatedBufferSize);
}
id uuid = ptr[@"imageUUID"];
uint8_t uuid_p[16];
- for (int i = 0; i < 16; i ++)
- uuid_p[i] = (uint8_t) ([[uuid objectAtIndex:i] intValue]);
+ for (unsigned int i = 0; i < 16; i ++) {
+ NSNumber *uuidByte = uuid[i];
+ uuid_p[i] = (uint8_t)uuidByte.charValue;
+ }
check_shared_cache_uuid(uuid_p);
+ uint64_t baseAddress = (uint64_t)((NSNumber *)ptr[@"imageSlidBaseAddress"]).longLongValue;
+ uint64_t firstMapping = (uint64_t)((NSNumber *)ptr[@"sharedCacheSlidFirstMapping"]).longLongValue;
+
+ T_ASSERT_LE(baseAddress, firstMapping,
+ "in per-task shared_cache_dyld_load_info, "
+ "baseAddress <= firstMapping");
+ T_ASSERT_GE(baseAddress + (1ull << 29), firstMapping,
+ "in per-task shared_cache_dyld_load_info, "
+ "baseAddress + 512meg >= firstMapping");
+
+ size_t shared_cache_len;
+ const void *addr = _dyld_get_shared_cache_range(&shared_cache_len);
+ T_ASSERT_EQ((uint64_t)addr, firstMapping,
+ "SlidFirstMapping should match shared_cache_range");
+
/*
* check_shared_cache_uuid() will assert on failure, so if
* we get here, then we have found the shared cache UUID
* and it's correct
*/
- found_shared_cache_uuid = true;
+ found_shared_cache_uuid = true;
+ }
+ }
+ if (expect_sharedcache_child) {
+ uint64_t task_flags = [task_snapshot[@"ts_ss_flags"] unsignedLongLongValue];
+ uint64_t sharedregion_flags = (task_flags & (kTaskSharedRegionNone | kTaskSharedRegionSystem | kTaskSharedRegionOther));
+ id sharedregion_info = container[@"task_snapshots"][@"shared_cache_dyld_load_info"];
+ if (!found_sharedcache_badflags) {
+ T_QUIET; T_ASSERT_NE(sharedregion_flags, 0ll, "one of the kTaskSharedRegion flags should be set on all tasks");
+ bool multiple = (sharedregion_flags & (sharedregion_flags - 1)) != 0;
+ T_QUIET; T_ASSERT_FALSE(multiple, "only one kTaskSharedRegion flag should be set on each task");
+ found_sharedcache_badflags = (sharedregion_flags == 0 || multiple);
+ }
+ if (pid == 0) {
+ T_ASSERT_EQ(sharedregion_flags, (uint64_t)kTaskSharedRegionNone, "Kernel proc (pid 0) should have no shared region");
+ } else if (pid == sharedcache_child_pid) {
+ found_sharedcache_child = true;
+ sharedcache_child_flags = sharedregion_flags;
+ } else if (pid == getpid()) {
+ found_sharedcache_self = true;
+ sharedcache_self_flags = sharedregion_flags;
+ }
+ if (sharedregion_flags == kTaskSharedRegionOther && !(task_flags & kTaskSharedRegionInfoUnavailable)) {
+ T_QUIET; T_ASSERT_NOTNULL(sharedregion_info, "kTaskSharedRegionOther should have a shared_cache_dyld_load_info struct");
+ } else {
+ T_QUIET; T_ASSERT_NULL(sharedregion_info, "expect no shared_cache_dyld_load_info struct");
}
}
-
-
if (expect_zombie_child && (pid == zombie_child_pid)) {
found_zombie_child = true;
found_translated_child = true;
uint64_t task_flags = [task_snapshot[@"ts_ss_flags"] unsignedLongLongValue];
- T_ASSERT_EQ((task_flags & kTaskIsTranslated), kTaskIsTranslated, "child marked as translated");
+ T_EXPECT_BITS_SET(task_flags, kTaskIsTranslated, "child marked as translated");
continue;
}
NSArray *winfos = container[@"task_snapshots"][@"thread_waitinfo"];
for (id i in winfos) {
- if ([i[@"wait_type"] intValue] == kThreadWaitCompressor && [i[@"owner"] intValue] == cseg_expected_threadid) {
+ NSNumber *waitType = i[@"wait_type"];
+ NSNumber *owner = i[@"owner"];
+ if (waitType.intValue == kThreadWaitCompressor &&
+ owner.unsignedLongValue == cseg_expected_threadid) {
found_cseg_waitinfo = true;
break;
}
if (expect_srp_waitinfo) {
NSArray *tinfos = container[@"task_snapshots"][@"thread_turnstileinfo"];
NSArray *winfos = container[@"task_snapshots"][@"thread_waitinfo"];
-
for (id i in tinfos) {
if (!found_srp_waitinfo) {
- if ([i[@"turnstile_context"] intValue] == srp_expected_pid &&
- ([i[@"turnstile_flags"] intValue] & STACKSHOT_TURNSTILE_STATUS_BLOCKED_ON_TASK)) {
-
- /* we found something that is blocking the correct pid */
+ bool found_thread = false;
+ bool found_pid = false;
+ if (([i[@"turnstile_flags"] intValue] & STACKSHOT_TURNSTILE_STATUS_THREAD) &&
+ [i[@"turnstile_context"] unsignedLongLongValue] == srp_expected_threadid &&
+ srp_expected_threadid != 0) {
+ found_thread = true;
+ }
+ if (([i[@"turnstile_flags"] intValue] & STACKSHOT_TURNSTILE_STATUS_BLOCKED_ON_TASK) &&
+ [i[@"turnstile_context"] intValue] == srp_expected_pid &&
+ srp_expected_pid != -1) {
+ found_pid = true;
+ }
+ if (found_pid || found_thread) {
+ T_LOG("found SRP %s %lld waiter: %d", (found_thread ? "thread" : "pid"),
+ [i[@"turnstile_context"] unsignedLongLongValue], [i[@"waiter"] intValue]);
+ /* we found something that is blocking the correct threadid */
for (id j in winfos) {
if ([j[@"waiter"] intValue] == [i[@"waiter"] intValue] &&
- [j[@"wait_type"] intValue] == kThreadWaitPortReceive) {
+ [j[@"wait_type"] intValue] == kThreadWaitPortReceive) {
found_srp_waitinfo = true;
break;
}
"current process name matches in stackshot");
uint64_t task_flags = [task_snapshot[@"ts_ss_flags"] unsignedLongLongValue];
- T_ASSERT_NE((task_flags & kTerminatedSnapshot), kTerminatedSnapshot, "current process not marked as terminated");
- T_ASSERT_NE((task_flags & kTaskIsTranslated), kTaskIsTranslated, "current process not marked as translated");
+ T_ASSERT_BITS_NOTSET(task_flags, kTerminatedSnapshot, "current process not marked as terminated");
+ T_ASSERT_BITS_NOTSET(task_flags, kTaskIsTranslated, "current process not marked as translated");
T_QUIET;
T_EXPECT_LE(pid, [task_snapshot[@"ts_unique_pid"] intValue],
bool found_main_thread = false;
uint64_t main_thread_id = -1ULL;
+ bool found_null_kernel_frame = false;
for (id thread_key in container[@"task_snapshots"][@"thread_snapshots"]) {
NSMutableDictionary *thread = container[@"task_snapshots"][@"thread_snapshots"][thread_key];
NSDictionary *thread_snap = thread[@"thread_snapshot"];
[cpu_times[@"user_time"] intValue],
"runnable time of current thread is valid");
}
+ if (!found_null_kernel_frame) {
+ for (NSNumber *frame in thread[@"kernel_frames"]) {
+ if (frame.unsignedLongValue == 0) {
+ found_null_kernel_frame = true;
+ break;
+ }
+ }
+ }
}
T_EXPECT_TRUE(found_main_thread, "found main thread for current task in stackshot");
+ T_EXPECT_FALSE(found_null_kernel_frame, "should not see any NULL kernel frames");
if (expect_turnstile_lock && !found_turnstile_lock) {
NSArray *tsinfos = container[@"task_snapshots"][@"thread_turnstileinfo"];
break;
}
case STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO: {
- struct dyld_uuid_info_64_v2 *payload = kcdata_iter_payload(iter);
- T_ASSERT_EQ(kcdata_iter_size(iter), sizeof(*payload), "valid dyld_uuid_info_64_v2 struct");
+ struct dyld_shared_cache_loadinfo *payload = kcdata_iter_payload(iter);
+ T_ASSERT_EQ((size_t)kcdata_iter_size(iter), sizeof(*payload), "valid dyld_shared_cache_loadinfo struct");
+
+ check_shared_cache_uuid(payload->sharedCacheUUID);
+
+ T_ASSERT_LE(payload->sharedCacheUnreliableSlidBaseAddress,
+ payload->sharedCacheSlidFirstMapping,
+ "SlidBaseAddress <= SlidFirstMapping");
+ T_ASSERT_GE(payload->sharedCacheUnreliableSlidBaseAddress + (1ull << 29),
+ payload->sharedCacheSlidFirstMapping,
+ "SlidFirstMapping should be within 512megs of SlidBaseAddress");
- check_shared_cache_uuid(payload->imageUUID);
+ size_t shared_cache_len;
+ const void *addr = _dyld_get_shared_cache_range(&shared_cache_len);
+ T_ASSERT_EQ((uint64_t)addr, payload->sharedCacheSlidFirstMapping,
+ "SlidFirstMapping should match shared_cache_range");
/*
* check_shared_cache_uuid() asserts on failure, so we must have
}
}
+ if (expect_sharedcache_child) {
+ T_QUIET; T_ASSERT_TRUE(found_sharedcache_child, "found sharedcache child in kcdata");
+ T_QUIET; T_ASSERT_TRUE(found_sharedcache_self, "found self in kcdata");
+ if (found_sharedcache_child && found_sharedcache_self) {
+ T_QUIET; T_ASSERT_NE(sharedcache_child_flags, (uint64_t)kTaskSharedRegionNone, "sharedcache child should have shared region");
+ T_QUIET; T_ASSERT_NE(sharedcache_self_flags, (uint64_t)kTaskSharedRegionNone, "sharedcache: self should have shared region");
+ if (sharedcache_self_flags == kTaskSharedRegionSystem && !sharedcache_child_sameaddr) {
+ /* If we're in the system shared region, and the child has a different address, child must have an Other shared region */
+ T_ASSERT_EQ(sharedcache_child_flags, (uint64_t)kTaskSharedRegionOther,
+ "sharedcache child should have Other shared region");
+ }
+ }
+ }
if (expect_zombie_child) {
T_QUIET; T_ASSERT_TRUE(found_zombie_child, "found zombie child in kcdata");
}
T_EXPECT_EQ(out_buffer->entries, 1ULL, "should have 1 vm object\n");
T_EXPECT_NE(out_buffer->data[0].object_id, 0ULL, "vm_object_id should not be 0\n");
- /* get the list for the current process */
+ /* get the list for the current process with an overly large size */
+ out_size = SIZE_MAX;
+ memset(out_buffer, 0, output_size);
+ ret = sysctlbyname(g_sysctl_name, out_buffer, &out_size, &task_name, sizeof(task_name));
+
+ T_QUIET;
+ T_EXPECT_EQ(ret, 0, "sysctlbyname failed\n");
+ T_EXPECT_EQ(out_size, 2 * sizeof(vm_object_query_data_t) + sizeof(int64_t), "sysctl return size is incorrect\n");
+ T_EXPECT_EQ(out_buffer->entries, 2ULL, "should have 2 vm objects\n");
+ T_EXPECT_NE(out_buffer->data[0].object_id, 0ULL, "vm_object_id should not be 0\n");
+
+ /* get the list for the current process with the correct output size */
out_size = output_size;
memset(out_buffer, 0, output_size);
ret = sysctlbyname(g_sysctl_name, out_buffer, &out_size, &task_name, sizeof(task_name));
{
char buffer[64] = "";
size_t buffer_size = sizeof(buffer);
+ int v;
+ size_t v_size;
int ret = sysctlbyname("hw.target", buffer,
&buffer_size, NULL, 0);
T_ASSERT_POSIX_SUCCESS(ret, "machdep.cpu.brand_string sysctl");
T_LOG("machdep.cpu.brand_string = %s", buffer);
+
+ v = 0;
+ v_size = sizeof(v);
+ ret = sysctlbyname("hw.cpu64bit_capable", &v, &v_size, NULL, 0);
+ T_ASSERT_POSIX_SUCCESS(ret, "hw.cpu64bit_capable");
+
+#if __arm__
+ T_EXPECT_EQ(v, 0, "cpu is not 64 bit capable");
+#else
+ T_EXPECT_EQ(v, 1, "cpu is 64 bit capable");
+#endif
}
<true/>
<key>task_for_pid-allow</key>
<true/>
+
+ <key>com.apple.system-task-ports.control</key>
+ <!-- Supercedes the two above in AzulE+, cross-platfrom -->
+ <true/>
+
+ <key>com.apple.security.get-movable-control-port</key>
+ <!-- Allows for task_get_special_port(..TASK_KERNEL_PORT..) -->
+ <true/>
</dict>
</plist>
--- /dev/null
+#include <darwintest.h>
+#include <darwintest_utils.h>
+#include <errno.h>
+#include <mach/mach.h>
+#include <mach/mach_types.h>
+#include <mach/task.h>
+#include <mach/mach_error.h>
+#include <mach/task_special_ports.h>
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+T_DECL(task_ident, "test task identity token")
+{
+ kern_return_t kr;
+ task_id_token_t token;
+ mach_port_t port1, port2;
+
+ kr = task_create_identity_token(mach_task_self(), &token);
+ T_ASSERT_MACH_SUCCESS(kr, "task_create_identity_token()");
+
+ port1 = mach_task_self();
+ kr = task_identity_token_get_task_port(token, TASK_FLAVOR_CONTROL, &port2); /* Immovable control port for self */
+ T_ASSERT_MACH_SUCCESS(kr, "task_identity_token_get_task_port() - CONTROL");
+ T_EXPECT_EQ(port1, port2, "Control port does not match!");
+
+ mach_port_deallocate(mach_task_self(), port2);
+
+ kr = task_get_special_port(mach_task_self(), TASK_READ_PORT, &port1);
+ T_ASSERT_MACH_SUCCESS(kr, "task_get_special_port() - READ");
+ kr = task_identity_token_get_task_port(token, TASK_FLAVOR_READ, &port2);
+ T_ASSERT_MACH_SUCCESS(kr, "task_identity_token_get_task_port() - read");
+ T_EXPECT_EQ(port1, port2, "Read port does not match!");
+
+ mach_port_deallocate(mach_task_self(), port1);
+ mach_port_deallocate(mach_task_self(), port2);
+
+ kr = task_get_special_port(mach_task_self(), TASK_INSPECT_PORT, &port1);
+ T_ASSERT_MACH_SUCCESS(kr, "task_get_special_port() - INSPECT");
+ kr = task_identity_token_get_task_port(token, TASK_FLAVOR_INSPECT, &port2);
+ T_ASSERT_MACH_SUCCESS(kr, "task_identity_token_get_task_port() - inspect");
+ T_EXPECT_EQ(port1, port2, "Inspect port does not match!");
+
+ mach_port_deallocate(mach_task_self(), port1);
+ mach_port_deallocate(mach_task_self(), port2);
+
+ kr = task_get_special_port(mach_task_self(), TASK_NAME_PORT, &port1);
+ T_ASSERT_MACH_SUCCESS(kr, "task_get_special_port() - NAME");
+ kr = task_identity_token_get_task_port(token, TASK_FLAVOR_NAME, &port2);
+ T_ASSERT_MACH_SUCCESS(kr, "task_identity_token_get_task_port() - name");
+ T_EXPECT_EQ(port1, port2, "Name port does not match!");
+
+ mach_port_deallocate(mach_task_self(), port1);
+ mach_port_deallocate(mach_task_self(), port2);
+
+ kr = task_identity_token_get_task_port(mach_thread_self(), TASK_FLAVOR_NAME, &port2);
+ T_EXPECT_NE(kr, KERN_SUCCESS, "task_identity_token_get_task_port() should fail on non-token port");
+
+ mach_port_deallocate(mach_task_self(), token);
+}
#include <sys/sysctl.h>
#include <unistd.h>
+#include "test_utils.h"
+
T_GLOBAL_META(T_META_RUN_CONCURRENTLY(true));
/* *************************************************************************************
void test_task_basic_info_64(void);
void task_basic_info_32_debug(void);
void task_basic2_info_32_warmup(void);
-static int is_development_kernel(void);
void test_task_basic_info(enum info_kind kind);
uint64_t info_get(enum info_kind kind, enum info_get get, void * data);
__builtin_unreachable();
}
-
-/*
- * Determines whether we're running on a development kernel
- */
-static int
-is_development_kernel(void)
-{
-#define NOTSET -1
-
- static int is_dev = NOTSET;
-
- if (is_dev == NOTSET) {
- int dev;
- size_t dev_size = sizeof(dev);
-
- T_QUIET;
- T_ASSERT_POSIX_SUCCESS(sysctlbyname("kern.development", &dev, &dev_size, NULL, 0), NULL);
- is_dev = (dev != 0);
-
- return is_dev;
- } else {
- return is_dev;
- }
-#undef NOTSET
-}
+++ /dev/null
-#ifdef T_NAMESPACE
-#undef T_NAMESPACE
-#endif
-
-#include <darwintest.h>
-
-#include <mach/host_priv.h>
-#include <mach/mach.h>
-#include <mach/mach_types.h>
-#include <mach/mach_vm.h>
-#include <mach/processor_set.h>
-#include <mach/task.h>
-#include <sys/sysctl.h>
-#include <unistd.h>
-
-T_GLOBAL_META(T_META_NAMESPACE("xnu.ipc"),
- T_META_RUN_CONCURRENTLY(true));
-
-/*
- * Attempt to inspect kernel_task using a task_inspect_t. Interact with the
- * kernel in the same way top(1) and lsmp(1) do.
- */
-
-static void
-check_secure_kernel(void)
-{
- int secure_kern = 0;
- size_t secure_kern_size = sizeof(secure_kern);
-
- T_ASSERT_POSIX_SUCCESS(sysctlbyname("kern.secure_kernel", &secure_kern,
- &secure_kern_size, NULL, 0), NULL);
-
- if (secure_kern) {
- T_SKIP("secure kernel: processor_set_tasks will not return kernel_task");
- }
-}
-
-static void
-attempt_kernel_inspection(task_t task)
-{
- pid_t pid = (pid_t)-1;
- mach_msg_type_number_t i, count, thcnt;
- struct task_basic_info_64 ti;
- thread_act_array_t threads;
-
- T_QUIET;
- T_EXPECT_MACH_SUCCESS(pid_for_task(task, &pid), NULL);
- T_LOG("Checking pid %d", pid);
-
- if (pid != 0) {
- return;
- }
-
- T_LOG("found kernel_task, attempting to inspect");
-
- count = TASK_BASIC_INFO_64_COUNT;
- T_EXPECT_MACH_SUCCESS(task_info(task, TASK_BASIC_INFO_64, (task_info_t)&ti,
- &count), "task_info(... TASK_BASIC_INFO_64 ...)");
-
- T_EXPECT_MACH_SUCCESS(task_threads(task, &threads, &thcnt), "task_threads");
- T_LOG("Found %d kernel threads.", thcnt);
- for (i = 0; i < thcnt; i++) {
- kern_return_t kr;
- thread_basic_info_data_t basic_info;
- mach_msg_type_number_t bi_count = THREAD_BASIC_INFO_COUNT;
-
- kr = thread_info(threads[i], THREAD_BASIC_INFO,
- (thread_info_t)&basic_info, &bi_count);
- /*
- * Ignore threads that have gone away.
- */
- if (kr == MACH_SEND_INVALID_DEST) {
- T_LOG("ignoring thread that has been destroyed");
- continue;
- }
- T_EXPECT_MACH_SUCCESS(kr, "thread_info(... THREAD_BASIC_INFO ...)");
- (void)mach_port_deallocate(mach_task_self(), threads[i]);
- }
- mach_vm_deallocate(mach_task_self(),
- (mach_vm_address_t)(uintptr_t)threads,
- thcnt * sizeof(*threads));
-
- ipc_info_space_basic_t basic_info;
- T_EXPECT_MACH_SUCCESS(mach_port_space_basic_info(task, &basic_info), "mach_port_space_basic_info");
-
- ipc_info_space_t info_space;
- ipc_info_name_array_t table;
- ipc_info_tree_name_array_t tree;
- mach_msg_type_number_t tblcnt = 0, treecnt = 0;
- T_EXPECT_MACH_SUCCESS(mach_port_space_info(task, &info_space, &table,
- &tblcnt, &tree, &treecnt), "mach_port_space_info");
- if (tblcnt > 0) {
- mach_vm_deallocate(mach_task_self(),
- (mach_vm_address_t)(uintptr_t)table,
- tblcnt * sizeof(*table));
- }
- if (treecnt > 0) {
- mach_vm_deallocate(mach_task_self(),
- (mach_vm_address_t)(uintptr_t)tree,
- treecnt * sizeof(*tree));
- }
-
- T_END;
-}
-
-T_DECL(inspect_kernel_task,
- "ensure that kernel task can be inspected",
- T_META_CHECK_LEAKS(false),
- T_META_ASROOT(true))
-{
- processor_set_name_array_t psets;
- processor_set_t pset;
- task_array_t tasks;
- mach_msg_type_number_t i, j, tcnt, pcnt = 0;
- mach_port_t self = mach_host_self();
-
- check_secure_kernel();
-
- T_ASSERT_MACH_SUCCESS(host_processor_sets(self, &psets, &pcnt),
- NULL);
-
- for (i = 0; i < pcnt; i++) {
- T_ASSERT_MACH_SUCCESS(host_processor_set_priv(self, psets[i], &pset), NULL);
- T_LOG("Checking pset %d/%d", i, pcnt - 1);
-
- tcnt = 0;
- T_ASSERT_MACH_SUCCESS(processor_set_tasks(pset, &tasks, &tcnt), NULL);
-
- for (j = 0; j < tcnt; j++) {
- attempt_kernel_inspection(tasks[j]);
- mach_port_deallocate(self, tasks[j]);
- }
-
- /* free tasks array */
- mach_vm_deallocate(mach_task_self(),
- (mach_vm_address_t)(uintptr_t)tasks,
- tcnt * sizeof(*tasks));
- mach_port_deallocate(mach_task_self(), pset);
- mach_port_deallocate(mach_task_self(), psets[i]);
- }
- mach_vm_deallocate(mach_task_self(),
- (mach_vm_address_t)(uintptr_t)psets,
- pcnt * sizeof(*psets));
-
- T_FAIL("could not find kernel_task in list of tasks returned");
-}
+++ /dev/null
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-<dict>
- <key>com.apple.system-task-ports</key>
- <true/>
- <key>task_for_pid-allow</key>
- <true/>
-</dict>
-</plist>
--- /dev/null
+#include <darwintest.h>
+#include <mach/mach.h>
+#include <mach/task.h>
+#include <mach/mach_init.h>
+
+T_DECL(mach_task_is_self,
+ "test task port comparison check")
+{
+ mach_port_t self_insp, self_read, self_name, port;
+
+ T_ASSERT_MACH_SUCCESS(task_get_special_port(mach_task_self(), TASK_READ_PORT, &self_read), "task_get_special_port failed");
+ T_ASSERT_MACH_SUCCESS(task_get_special_port(mach_task_self(), TASK_INSPECT_PORT, &self_insp), "task_get_special_port failed");
+ T_ASSERT_MACH_SUCCESS(task_get_special_port(mach_task_self(), TASK_NAME_PORT, &self_name), "task_get_special_port failed");
+
+ T_ASSERT_MACH_SUCCESS(mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &port), "mach_port_allocate failed");
+
+ T_EXPECT_NE(self_read, self_insp, "read and inspect port should be different");
+ T_EXPECT_NE(self_read, mach_task_self(), "read and control port should be different");
+
+ T_EXPECT_EQ(1, mach_task_is_self(mach_task_self()), "control port should point to self");
+ T_EXPECT_EQ(1, mach_task_is_self(self_read), "read port should point to self");
+ T_EXPECT_EQ(1, mach_task_is_self(self_insp), "inspect port should point to self");
+ T_EXPECT_EQ(1, mach_task_is_self(self_name), "name port should point to self");
+ T_EXPECT_NE(1, mach_task_is_self(port), "_port_ should not point to self");
+}
+++ /dev/null
-#include <darwintest.h>
-#include <CoreFoundation/CoreFoundation.h>
-#include <IOKit/kext/KextManager.h>
-#include <mach/mach_time.h>
-#include <sys/sysctl.h>
-#include <copyfile.h>
-#include <removefile.h>
-
-T_GLOBAL_META(T_META_NAMESPACE("xnu.iokit"),
- T_META_RUN_CONCURRENTLY(true));
-
-#define DEXT_NAME "com.apple.test_intentionally_crashing_driver_56101852.dext"
-#define DEXT_PATH "/Library/DriverExtensions/" DEXT_NAME
-#define SYSCTL_NAME "kern.driverkit_checkin_timed_out"
-#define MAX_TIMEOUT_SECONDS 120
-
-static int
-copyfileCallback(int what __unused, int stage, copyfile_state_t state __unused, const char *src __unused, const char *dst, void *ctx __unused)
-{
- if (stage == COPYFILE_FINISH) {
- T_QUIET; T_ASSERT_POSIX_SUCCESS(chown(dst, 0, 0), "chown %s to root / wheel", dst);
- }
- return COPYFILE_CONTINUE;
-}
-
-static void
-cleanup(void)
-{
- removefile_state_t state = removefile_state_alloc();
- removefile(DEXT_PATH, state, REMOVEFILE_RECURSIVE);
- removefile_state_free(state);
-}
-
-T_DECL(test_dext_launch_56101852,
- "Test launching a crashing dext",
- T_META_ASROOT(true), T_META_IGNORECRASHES("*test_intentionally_crashing_driver_56101852*"))
-{
- T_SKIP("skipping test_dext_launch_56101852 due to 62657199");
-
- CFStringRef path = NULL;
- CFURLRef url = NULL;
- uint64_t startTime = mach_absolute_time();
- uint64_t endTime = 0;
- size_t endTimeSize = sizeof(uint64_t);
- uint64_t elapsedTimeAbs = 0;
- uint64_t elapsedTimeNs = 0;
- mach_timebase_info_data_t timebaseInfo;
- copyfile_state_t copyfileState;
-
- copyfileState = copyfile_state_alloc();
- copyfile_state_set(copyfileState, COPYFILE_STATE_STATUS_CB, (void *)©fileCallback);
- T_ASSERT_POSIX_SUCCESS(copyfile(DEXT_NAME, DEXT_PATH, copyfileState, COPYFILE_RECURSIVE | COPYFILE_ALL), "copied dext " DEXT_NAME " to " DEXT_PATH);
- T_ATEND(cleanup);
-
- /* set up timebaseInfo */
- T_ASSERT_MACH_SUCCESS(mach_timebase_info(&timebaseInfo), "set up mach_timebase_info");
-
- /* Set the initial value of kern.driverkit_checkin_timed_out to startTime */
- T_ASSERT_POSIX_SUCCESS(sysctlbyname(SYSCTL_NAME, NULL, NULL, &startTime, sizeof(startTime)), "set sysctl " SYSCTL_NAME " to %llu", startTime);
-
-
- /* Convert DEXT_PATH to a CFURL */
- path = CFSTR(DEXT_PATH);
- url = CFURLCreateWithFileSystemPath(kCFAllocatorDefault, path, kCFURLPOSIXPathStyle, true);
- T_ASSERT_NOTNULL(url, "created CFURL from CFString");
-
- /* Ask kextd to load the dext */
- T_ASSERT_EQ(KextManagerLoadKextWithURL(url, NULL), kOSReturnSuccess, "Loaded dext %s with kextd", DEXT_PATH);
- T_LOG("Will sleep for up to %d seconds", MAX_TIMEOUT_SECONDS);
-
- /* Wait for up to 120 seconds. Each loop iteration sleeps for 1 second and checks
- * the value of the sysctl to check if it has changed. If the value changed, then
- * the dext loaded earlier has crashed. If 120 seconds elapses and the value does
- * not change, then the dext did not crash.
- */
- for (int i = 0; i < MAX_TIMEOUT_SECONDS; i++) {
- sleep(1);
- T_ASSERT_POSIX_SUCCESS(sysctlbyname(SYSCTL_NAME, &endTime, &endTimeSize, NULL, 0), "using " SYSCTL_NAME " to check if dext has crashed");
- if (endTime != startTime) {
- T_LOG("Detected dext crash");
- break;
- }
- T_LOG(" Slept for %d seconds", i + 1);
- }
-
- T_LOG("startTime = %llu, endTime = %llu", startTime, endTime);
-
- T_ASSERT_GT(endTime, startTime, "dext has crashed");
-
- /* Check how much time has elapsed and see if it is less than 120 seconds. If it
- * is 120 seconds or greater, then the dext did not check in to the kernel but we
- * were not able to stop waiting for the dext to check in after it crashed.
- */
- elapsedTimeAbs = endTime - startTime;
- elapsedTimeNs = elapsedTimeAbs * timebaseInfo.numer / timebaseInfo.denom;
- T_LOG("elapsedTimeAbs = %llu, elapsedTimeNs = %llu", elapsedTimeAbs, elapsedTimeNs);
- T_ASSERT_LT(elapsedTimeNs / NSEC_PER_SEC, (uint64_t)MAX_TIMEOUT_SECONDS, "elapsed time is less than %d seconds", MAX_TIMEOUT_SECONDS);
-
- copyfile_state_free(copyfileState);
- CFRelease(url);
-}
+++ /dev/null
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-<dict>
- <key>com.apple.private.security.storage.SystemExtensionManagement</key>
- <true/>
-</dict>
-</plist>
--- /dev/null
+#include <sys/kern_sysctl.h>
+#include <sys/sysctl.h>
+#include <dispatch/dispatch.h>
+#include <darwintest.h>
+
+#include "test_utils.h"
+
+bool
+is_development_kernel()
+{
+ static dispatch_once_t is_development_once;
+ static bool is_development;
+
+ dispatch_once(&is_development_once, ^{
+ int dev;
+ size_t dev_size = sizeof(dev);
+
+ T_QUIET;
+ T_ASSERT_POSIX_SUCCESS(sysctlbyname("kern.development", &dev,
+ &dev_size, NULL, 0), NULL);
+ is_development = (dev != 0);
+ });
+
+ return is_development;
+}
--- /dev/null
+#ifndef XNU_DARWINTEST_UTILS_H
+#define XNU_DARWINTEST_UTILS_H
+
+#include <stdbool.h>
+
+/* Misc. utility functions for writing darwintests. */
+bool is_development_kernel(void);
+#endif /* !defined(XNU_DARWINTEST_UTILS_H) */
--- /dev/null
+#include <unistd.h>
+#include <stdio.h>
+
+#include <darwintest.h>
+#include <darwintest_utils.h>
+
+T_GLOBAL_META(T_META_RUN_CONCURRENTLY(false));
+
+/*
+ * No system(3c) on watchOS, so provide our own.
+ * returns -1 if fails to run
+ * returns 0 if process exits normally.
+ * returns +n if process exits due to signal N
+ */
+static int
+my_system(const char *command)
+{
+ pid_t pid;
+ int status = 0;
+ int signal = 0;
+ int err;
+ const char *argv[] = {
+ "/bin/sh",
+ "-c",
+ command,
+ NULL
+ };
+
+ if (dt_launch_tool(&pid, (char **)(void *)argv, FALSE, NULL, NULL)) {
+ return -1;
+ }
+
+ err = dt_waitpid(pid, &status, &signal, 30);
+ if (err) {
+ return 0;
+ }
+
+ return signal;
+}
+
+
+/*
+ * The tests are run in the following order:
+ *
+ * - call foo
+ * - corrupt foo, then call foo
+ * - call foo
+ *
+ * - call atan
+ * - corrupt atan, then call atan
+ * - call atan
+ *
+ * The first and last of each should exit normally. The middle one should exit with SIGILL.
+ *
+ * atan() was picked as a shared region function that isn't likely used by any normal daemons.
+ */
+T_DECL(text_corruption_recovery, "test detection/recovery of text corruption",
+ T_META_IGNORECRASHES(".*text_corruption_helper.*"),
+ T_META_ASROOT(true))
+{
+ int ret;
+
+ ret = my_system("./text_corruption_helper foo");
+ T_QUIET; T_ASSERT_EQ(ret, 0, "First call of foo");
+
+ ret = my_system("./text_corruption_helper Xfoo");
+ T_QUIET; T_ASSERT_EQ(ret, SIGILL, "Call of corrupted foo");
+
+ ret = my_system("./text_corruption_helper foo");
+ T_QUIET; T_ASSERT_EQ(ret, 0, "Fixed call of foo");
+
+ ret = my_system("./text_corruption_helper atan");
+ T_QUIET; T_ASSERT_EQ(ret, 0, "First call of atan");
+
+ ret = my_system("./text_corruption_helper Xatan");
+ T_QUIET; T_ASSERT_EQ(ret, SIGILL, "Call of corrupted atan");
+
+ ret = my_system("./text_corruption_helper atan");
+ T_QUIET; T_ASSERT_EQ(ret, 0, "Fixed call of atan");
+}
--- /dev/null
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <sys/sysctl.h>
+#include <ptrauth.h>
+#include <math.h>
+#include <string.h>
+
+__attribute__((noinline))
+static void
+foo(void)
+{
+ printf("In foo()\n");
+ fflush(stdout);
+}
+
+/*
+ * volatile to stop the compiler from optimizing away calls to atan()
+ */
+volatile double zero = 0.0;
+
+int
+main(int argc, char **argv)
+{
+ void *addr;
+ size_t s = sizeof(addr);
+ int err;
+ int a;
+
+ /*
+ * needs to run as root for sysctl.
+ */
+ if (geteuid() != 0) {
+ printf("Test not running as root\n");
+ exit(-1);
+ }
+
+ if (strcmp(argv[argc - 1], "foo") == 0) {
+ foo();
+ } else if (strcmp(argv[argc - 1], "Xfoo") == 0) {
+ printf("Warm up call to foo()\n");
+ foo();
+ addr = ptrauth_strip(&foo, ptrauth_key_function_pointer);
+ err = sysctlbyname("vm.corrupt_text_addr", NULL, NULL, &addr, s);
+ foo();
+ } else if (strcmp(argv[argc - 1], "atan") == 0) {
+ printf("atan(0) is %g\n", atan(zero));
+ } else if (strcmp(argv[argc - 1], "Xatan") == 0) {
+ printf("Warmup call to atan(0) is %g\n", atan(zero));
+ addr = ptrauth_strip(&atan, ptrauth_key_function_pointer);
+ err = sysctlbyname("vm.corrupt_text_addr", NULL, NULL, &addr, s);
+ printf("atan(0) is %g\n", atan(zero));
+ } else {
+ exit(-1);
+ }
+}
--- /dev/null
+#include <darwintest.h>
+#include <pthread.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include <mach/mach_init.h>
+#include <mach/mach_port.h>
+#include <mach/mk_timer.h>
+#include <mach/task.h>
+
+#define die(w) errx(1, (w))
+#define edie(w) err(1, (w))
+#define expect(e) if (-1 == (e)) edie(#e)
+
+static void *
+racer(void *data)
+{
+ for (;;) {
+ mk_timer_destroy(*(mach_port_t *)data);
+ }
+
+ return NULL;
+}
+
+T_DECL(thread_call_race_71455282,
+ "rdar://71455282",
+ T_META_IGNORECRASHES(".*thread_call_race_71455282.*"))
+{
+ mach_port_t timer = MACH_PORT_NULL;
+ pthread_t t;
+ size_t n;
+
+ /* we will violate mach rules so ignore crashes here */
+ T_ASSERT_MACH_SUCCESS(task_set_exc_guard_behavior(mach_task_self(), 0),
+ "task_set_exc_guard_behavior");
+
+ for (n = 0; n < 4; ++n) {
+ T_ASSERT_POSIX_SUCCESS(pthread_create(&t, NULL, racer, &timer),
+ "pthread_create");
+ }
+
+ T_LOG("racing");
+ for (size_t i = 0; i < 1000; i++) {
+ timer = mk_timer_create();
+ mk_timer_arm(timer, 1);
+ mk_timer_destroy(timer);
+ timer = MACH_PORT_NULL;
+ }
+
+ T_PASS("didn't panic");
+ T_END;
+}
--- /dev/null
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+
+#include <darwintest.h>
+
+#include "drop_priv.h"
+#include "test_utils.h"
+
+#if ENTITLED
+#define SET_TREATMENT_ID set_treatment_id_entitled
+#define SET_TREATMENT_ID_DESCR "Can set treatment id with entitlement"
+#else /* ENTITLED */
+#define SET_TREATMENT_ID set_treatment_id_unentitled
+#define SET_TREATMENT_ID_DESCR "Can't set treatment id without entitlement"
+#endif /* ENTITLED */
+
+T_DECL(SET_TREATMENT_ID, "Verifies that EXPERIMENT sysctls can only be set with the entitlement", T_META_ASROOT(false))
+{
+#define TEST_STR "testing"
+#define IDENTIFIER_LENGTH 36
+
+ int ret;
+ errno_t err;
+ char val[IDENTIFIER_LENGTH + 1] = {0};
+ size_t len = sizeof(val);
+ char new_val[IDENTIFIER_LENGTH + 1] = {0};
+
+ if (!is_development_kernel()) {
+ T_SKIP("skipping test on release kernel");
+ }
+
+ strlcpy(new_val, TEST_STR, sizeof(new_val));
+ drop_priv();
+
+ ret = sysctlbyname("kern.trial_treatment_id", val, &len, new_val, strlen(new_val));
+ err = errno;
+#if ENTITLED
+ len = sizeof(val);
+ memset(new_val, 0, sizeof(new_val));
+ T_ASSERT_POSIX_SUCCESS(ret, "set kern.trial_treatment_id");
+ /* Cleanup. Set it back to the empty string. */
+ ret = sysctlbyname("kern.trial_treatment_id", val, &len, new_val, 1);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "reset kern.trial_treatment_id");
+#else
+ T_ASSERT_POSIX_FAILURE(ret, EPERM, "set kern.trial_treatment_id");
+#endif /* ENTITLED */
+}
+
+#if ENTITLED
+/* Check min and max value limits on numeric factors */
+T_DECL(experiment_factor_numeric_limits,
+ "Can only set factors within the legal range.",
+ T_META_ASROOT(false))
+{
+#define kMinVal 5 /* The min value allowed for the testing factor. */
+#define kMaxVal 10 /* The max value allowed for the testing factor. */
+ errno_t err;
+ int ret;
+ unsigned int current_val;
+ size_t len = sizeof(current_val);
+ unsigned int new_val;
+
+ drop_priv();
+ new_val = kMinVal - 1;
+ ret = sysctlbyname("kern.testing_experiment_factor", ¤t_val, &len, &new_val, sizeof(new_val));
+ err = errno;
+ T_ASSERT_POSIX_FAILURE(ret, EINVAL, "set kern.testing_experiment_factor below range.");
+
+ new_val = kMaxVal + 1;
+ ret = sysctlbyname("kern.testing_experiment_factor", ¤t_val, &len, &new_val, sizeof(new_val));
+ err = errno;
+ T_ASSERT_POSIX_FAILURE(ret, EINVAL, "set kern.testing_experiment_factor above range.");
+
+ new_val = kMaxVal;
+ ret = sysctlbyname("kern.testing_experiment_factor", ¤t_val, &len, &new_val, sizeof(new_val));
+ T_ASSERT_POSIX_SUCCESS(ret, "set kern.testing_experiment_factor at top of range.");
+
+ new_val = kMinVal;
+ ret = sysctlbyname("kern.testing_experiment_factor", ¤t_val, &len, &new_val, sizeof(new_val));
+ T_ASSERT_POSIX_SUCCESS(ret, "set kern.testing_experiment_factor at bottom of range.");
+}
+#endif /* ENTITLED */
--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+ <key>com.apple.private.write-kr-experiment-factors</key>
+ <true/>
+</dict>
+</plist>
#include <pthread.h>
#include <stdatomic.h>
-#include "vm/perf_helpers.h"
+#include "benchmark/helpers.h"
#if (TARGET_OS_OSX || TARGET_OS_SIMULATOR)
/*
bool verbose;
} test_args_t;
-/* Get a (wall-time) timestamp in nanoseconds */
-static uint64_t get_timestamp_ns(void);
-/* Get the number of cpus on this device. */
-static unsigned int get_ncpu(void);
/*
* Fault in the pages in the given buffer.
*/
#else
static const size_t memory_per_core = 25 * (1UL << 20);
#endif /* (TARGET_OS_OSX || TARGET_OS_SIMULATOR) */
- const size_t kMemSize = memory_per_core * get_ncpu();
+ const size_t kMemSize = memory_per_core * (size_t) get_ncpu();
test_globals_t *globals = allocate_test_globals();
/* Total wall-time spent faulting in pages. */
uint64_t wall_time_elapsed_ns = 0;
setup_memory(globals, variant);
benchmark_log(verbose, "Initialized data structures for iteration. Waking workers.\n");
/* Grab a timestamp, tick the current iteration, and wake up the worker threads */
- start_time = get_timestamp_ns();
+ start_time = current_timestamp_ns();
globals->tg_current_iteration++;
ret = pthread_mutex_unlock(&globals->tg_lock);
assert(ret == 0);
while (globals->tg_iterations_completed != globals->tg_current_iteration) {
ret = pthread_cond_wait(&globals->tg_cv, &globals->tg_lock);
}
- end_time = get_timestamp_ns();
+ end_time = current_timestamp_ns();
ret = pthread_mutex_unlock(&globals->tg_lock);
unmap_fault_buffers(globals);
assert(ret == 0);
fprintf(stderr, " %s Share vm objects across faulting threads.\n", kShareObjectsArgument);
}
-static uint64_t
-get_timestamp_ns()
-{
- return clock_gettime_nsec_np(kWallTimeClock);
-}
-
-static unsigned int
-get_ncpu(void)
-{
- int ncpu;
- size_t sysctl_size = sizeof(ncpu);
- int ret = sysctlbyname("hw.ncpu", &ncpu, &sysctl_size, NULL, 0);
- assert(ret == 0);
- return (unsigned int) ncpu;
-}
-
static void
parse_arguments(int argc, char** argv, test_args_t *args)
{
--- /dev/null
+#include <darwintest.h>
+#include <mach/vm_page_size.h>
+
+T_GLOBAL_META(
+ T_META_NAMESPACE("vm_page_size_overrides")
+ );
+
+static void
+verify_page_size(
+ int expected_shift,
+ int page_shift,
+ vm_size_t page_size,
+ vm_size_t page_mask)
+{
+ T_ASSERT_EQ(page_shift, expected_shift, "page_shift");
+ T_ASSERT_EQ(page_size, 1UL << expected_shift, "page_size");
+ T_ASSERT_EQ(page_mask, page_size - 1, "page_mask");
+}
+
+
+T_DECL(kernel_4k,
+ "Can override vm_kernel_page_size",
+ T_META_ENVVAR("VM_KERNEL_PAGE_SIZE_4K=1"),
+ T_META_ENVVAR("MallocGuardEdges=0"),
+ T_META_ENVVAR("MallocDoNotProtectPrelude=1"),
+ T_META_ENVVAR("MallocDoNotProtectPostlude=1"))
+{
+ verify_page_size(12, vm_kernel_page_shift, vm_kernel_page_size, vm_kernel_page_mask);
+}
+
+T_DECL(invalid,
+ "Invalid overrides",
+ T_META_ENVVAR("VM_KERNEL_PAGE_SIZE_4K=2"),
+ T_META_ENVVAR("VM_KERNEL_PAGE_SIZE=4K"),
+ T_META_ENVVAR("VM_KERNEL_PAGE_SIZE="))
+{
+ /*
+ * This test just verifies that libkernel_init doesn't
+ * crash when handling invalid overrides.
+ * So if we got here, we can pass the test.
+ */
+ T_PASS("Test process spawned");
+}
+++ /dev/null
-#include <assert.h>
-#include <errno.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <sys/mman.h>
-
-#include "vm/perf_helpers.h"
-
-#define K_CTIME_BUFFER_LEN 26
-void
-benchmark_log(bool verbose, const char *restrict fmt, ...)
-{
- time_t now;
- char time_buffer[K_CTIME_BUFFER_LEN];
- struct tm local_time;
- va_list args;
- if (verbose) {
- strncpy(time_buffer, "UNKNOWN", K_CTIME_BUFFER_LEN);
-
- now = time(NULL);
- if (now != -1) {
- struct tm* ret = localtime_r(&now, &local_time);
- if (ret == &local_time) {
- snprintf(time_buffer, K_CTIME_BUFFER_LEN,
- "%.2d/%.2d/%.2d %.2d:%.2d:%.2d",
- local_time.tm_mon + 1, local_time.tm_mday,
- local_time.tm_year + 1900,
- local_time.tm_hour, local_time.tm_min,
- local_time.tm_sec);
- }
- }
-
- printf("%s: ", time_buffer);
- va_start(args, fmt);
- vprintf(fmt, args);
- fflush(stdout);
- }
-}
-
-uint64_t
-timespec_difference_us(const struct timespec* a, const struct timespec* b)
-{
- assert(a->tv_sec >= b->tv_sec || a->tv_nsec >= b->tv_nsec);
- long seconds_elapsed = a->tv_sec - b->tv_sec;
- uint64_t nsec_elapsed;
- if (b->tv_nsec > a->tv_nsec) {
- seconds_elapsed--;
- nsec_elapsed = kNumNanosecondsInSecond - (uint64_t) (b->tv_nsec - a->tv_nsec);
- } else {
- nsec_elapsed = (uint64_t) (a->tv_nsec - b->tv_nsec);
- }
- return (uint64_t) seconds_elapsed * kNumMicrosecondsInSecond + nsec_elapsed / kNumNanosecondsInMicrosecond;
-}
-
-unsigned char *
-mmap_buffer(size_t memsize)
-{
- int fd = -1;
- unsigned char* addr = (unsigned char *)mmap(NULL, memsize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE,
- fd, 0);
- if ((void*) addr == MAP_FAILED) {
- fprintf(stderr, "Unable to mmap a memory object: %s\n", strerror(errno));
- exit(2);
- }
- return addr;
-}
+++ /dev/null
-#ifndef VM_PERF_HELPERS_H
-#define VM_PERF_HELPERS_H
-
-/*
- * Utility functions and constants used by the VM perf tests.
- */
-#include <inttypes.h>
-#include <time.h>
-#include <stdbool.h>
-
-/*
- * mmap an anonymous chunk of memory.
- */
-unsigned char *mmap_buffer(size_t size);
-/*
- * Returns a - b in microseconds.
- * NB: a must be >= b
- */
-uint64_t timespec_difference_us(const struct timespec* a, const struct timespec* b);
-/*
- * Print the message to stdout along with the current time.
- * Also flushes stdout so that the log can help detect hangs. Don't call
- * this function from within the measured portion of the benchmark as it will
- * pollute your measurement.
- *
- * NB: Will only log if verbose == true.
- */
-void benchmark_log(bool verbose, const char *restrict fmt, ...) __attribute__((format(printf, 2, 3)));
-
-static const uint64_t kNumMicrosecondsInSecond = 1000UL * 1000;
-static const uint64_t kNumNanosecondsInMicrosecond = 1000UL;
-static const uint64_t kNumNanosecondsInSecond = kNumNanosecondsInMicrosecond * kNumMicrosecondsInSecond;
-
-#endif /* !defined(VM_PERF_HELPERS_H) */
#include <sys/mman.h>
#include <sys/sysctl.h>
-#include "vm/perf_helpers.h"
+#include "benchmark/helpers.h"
typedef enum test_variant {
VARIANT_MADVISE_FREE
--- /dev/null
+#include <sys/sysctl.h>
+#include <time.h>
+
+#include <darwintest.h>
+
+/*
+ * trying phys offsets from start of dram of:
+ * watchOS 512Meg
+ * macOS 3Gig
+ * iOS,etc. 750Meg
+ */
+#if TARGET_OS_WATCH
+#define USEBOOTARG "bad_ram_pages=536870912 bad_static_mfree=1"
+#elif TARGET_OS_OSX
+#define USEBOOTARG "bad_ram_pages=3221225472 bad_static_mfree=1"
+#else
+#define USEBOOTARG "bad_ram_pages=786432000 bad_static_mfree=1"
+#endif
+
+T_DECL(retired_pages_test,
+ "Test retiring pages at boot",
+ T_META_NAMESPACE("xnu.vm"),
+ T_META_BOOTARGS_SET(USEBOOTARG),
+ T_META_ASROOT(true),
+ T_META_CHECK_LEAKS(false))
+{
+ int err;
+ unsigned int count = 0;
+ size_t s = sizeof(count);
+
+#if !defined(__arm64__) || TARGET_OS_BRIDGE
+ T_SKIP("No page retirement on x86, arm32 or bridgeOS kernels");
+#endif
+ /*
+ * Get the number of pages retired from the kernel
+ */
+ err = sysctlbyname("vm.retired_pages_count", &count, &s, NULL, 0);
+
+ /* If the sysctl isn't supported, test succeeds */
+ if (err == ENOENT) {
+ T_SKIP("sysctl vm.retired_pages_count not found, skipping test");
+ }
+ T_ASSERT_POSIX_SUCCESS(err, "sysctl vm.retired_pages_count");
+
+ T_ASSERT_GT_INT(count, 0, "Expect retired pages");
+}
+#include <TargetConditionals.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
fprintf(stdout, "%s: WARNING: unsigned code was executed\n",
cmdname);
-#if CONFIG_EMBEDDED
+#if !TARGET_OS_OSX
/* fail: unsigned code was executed */
fprintf(stdout, "%s: FAIL\n", cmdname);
exit(1);
-#else /* CONFIG_EMBEDDED */
+#else /* !TARGET_OS_OSX */
/* no fail: unsigned code is only prohibited on embedded platforms */
fprintf(stdout, "%s: SUCCESS\n", cmdname);
exit(0);
-#endif /* CONFIG_EMBEDDED */
+#endif /* !TARGET_OS_OSX */
}
*/
#include <darwintest.h>
+#include <dlfcn.h>
#include <errno.h>
#include <ptrauth.h>
#include <stdio.h>
}
#if defined(__x86_64__) || defined(__i386__)
- if (*((uint64_t *)_COMM_PAGE_CPU_CAPABILITIES64) & kIsTranslated) {
+ if (COMM_PAGE_READ(uint64_t, CPU_CAPABILITIES64) & kIsTranslated) {
T_LOG("Skipping madvise reusable tests because we're running under translation.");
goto done;
}
T_META_ALL_VALID_ARCHS(true))
{
#if defined(__x86_64__) || defined(__i386__)
- if (*((uint64_t *)_COMM_PAGE_CPU_CAPABILITIES64) & kIsTranslated) {
+ if (COMM_PAGE_READ(uint64_t, CPU_CAPABILITIES64) & kIsTranslated) {
T_SKIP("madvise reusable is not supported under Rosetta translation. Skipping.)");
}
#endif /* defined(__x86_64__) || defined(__i386__) */
T_ASSERT_MACH_SUCCESS(kr, "vm_map()");
}
+static const char *prot_str[] = { "---", "r--", "-w-", "rw-", "--x", "r-x", "-wx", "rwx" };
+static const char *share_mode_str[] = { "---", "COW", "PRIVATE", "EMPTY", "SHARED", "TRUESHARED", "PRIVATE_ALIASED", "SHARED_ALIASED", "LARGE_PAGE" };
+
+T_DECL(shared_region_share_writable, "sharing a writable mapping of the shared region shoudl not give write access to shared region - rdar://problem/74469953",
+ T_META_ALL_VALID_ARCHS(true))
+{
+ int ret;
+ uint64_t sr_start;
+ kern_return_t kr;
+ mach_vm_address_t address, tmp_address, remap_address;
+ mach_vm_size_t size, tmp_size, remap_size;
+ uint32_t depth;
+ mach_msg_type_number_t count;
+ vm_region_submap_info_data_64_t info;
+ vm_prot_t cur_prot, max_prot;
+ uint32_t before, after, remap;
+ mach_port_t mem_entry;
+
+ ret = __shared_region_check_np(&sr_start);
+ if (ret != 0) {
+ int saved_errno;
+ saved_errno = errno;
+
+ T_ASSERT_EQ(saved_errno, ENOMEM, "__shared_region_check_np() %d (%s)",
+ saved_errno, strerror(saved_errno));
+ T_END;
+ }
+ T_LOG("SHARED_REGION_BASE 0x%llx", SHARED_REGION_BASE);
+ T_LOG("SHARED_REGION_SIZE 0x%llx", SHARED_REGION_SIZE);
+ T_LOG("shared region starts at 0x%llx", sr_start);
+ T_QUIET; T_ASSERT_GE(sr_start, SHARED_REGION_BASE,
+ "shared region starts below BASE");
+ T_QUIET; T_ASSERT_LT(sr_start, SHARED_REGION_BASE + SHARED_REGION_SIZE,
+ "shared region starts above BASE+SIZE");
+
+ /*
+ * Step 1 - check that one can not get write access to a read-only
+ * mapping in the shared region.
+ */
+ size = 0;
+ for (address = SHARED_REGION_BASE;
+ address < SHARED_REGION_BASE + SHARED_REGION_SIZE;
+ address += size) {
+ size = 0;
+ depth = 99;
+ count = VM_REGION_SUBMAP_INFO_COUNT_64;
+ kr = mach_vm_region_recurse(mach_task_self(),
+ &address,
+ &size,
+ &depth,
+ (vm_region_recurse_info_t)&info,
+ &count);
+ T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "vm_region_recurse()");
+ if (kr == KERN_INVALID_ADDRESS) {
+ T_SKIP("could not find read-only nested mapping");
+ T_END;
+ }
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "vm_region_recurse()");
+ T_LOG("0x%llx - 0x%llx depth:%d %s/%s %s 0x%x",
+ address, address + size, depth,
+ prot_str[info.protection],
+ prot_str[info.max_protection],
+ share_mode_str[info.share_mode],
+ info.object_id);
+ if (depth > 0 &&
+ (info.protection == VM_PROT_READ) &&
+ (info.max_protection == VM_PROT_READ)) {
+ /* nested and read-only: bingo! */
+ break;
+ }
+ }
+ if (address >= SHARED_REGION_BASE + SHARED_REGION_SIZE) {
+ T_SKIP("could not find read-only nested mapping");
+ T_END;
+ }
+
+ /* test vm_remap() of RO */
+ before = *(uint32_t *)(uintptr_t)address;
+ remap_address = 0;
+ remap_size = size;
+ kr = mach_vm_remap(mach_task_self(),
+ &remap_address,
+ remap_size,
+ 0,
+ VM_FLAGS_ANYWHERE | VM_FLAGS_RETURN_DATA_ADDR,
+ mach_task_self(),
+ address,
+ FALSE,
+ &cur_prot,
+ &max_prot,
+ VM_INHERIT_DEFAULT);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "vm_remap()");
+// T_QUIET; T_ASSERT_EQ(cur_prot, VM_PROT_READ, "cur_prot is read-only");
+// T_QUIET; T_ASSERT_EQ(max_prot, VM_PROT_READ, "max_prot is read-only");
+ /* check that region is still nested */
+ tmp_address = address;
+ tmp_size = 0;
+ depth = 99;
+ count = VM_REGION_SUBMAP_INFO_COUNT_64;
+ kr = mach_vm_region_recurse(mach_task_self(),
+ &tmp_address,
+ &tmp_size,
+ &depth,
+ (vm_region_recurse_info_t)&info,
+ &count);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "vm_region_recurse()");
+ T_LOG("0x%llx - 0x%llx depth:%d %s/%s %s 0x%x",
+ tmp_address, tmp_address + tmp_size, depth,
+ prot_str[info.protection],
+ prot_str[info.max_protection],
+ share_mode_str[info.share_mode],
+ info.object_id);
+ T_QUIET; T_ASSERT_EQ(tmp_address, address, "address hasn't changed");
+// T_QUIET; T_ASSERT_EQ(tmp_size, size, "size hasn't changed");
+ T_QUIET; T_ASSERT_GT(depth, 0, "still nested");
+ T_QUIET; T_ASSERT_EQ(info.protection, VM_PROT_READ, "cur_prot still read-only");
+// T_QUIET; T_ASSERT_EQ(info.max_protection, VM_PROT_READ, "max_prot still read-only");
+ /* check that new mapping is read-only */
+ tmp_address = remap_address;
+ tmp_size = 0;
+ depth = 99;
+ count = VM_REGION_SUBMAP_INFO_COUNT_64;
+ kr = mach_vm_region_recurse(mach_task_self(),
+ &tmp_address,
+ &tmp_size,
+ &depth,
+ (vm_region_recurse_info_t)&info,
+ &count);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "vm_region_recurse()");
+ T_LOG("0x%llx - 0x%llx depth:%d %s/%s %s 0x%x",
+ tmp_address, tmp_address + tmp_size, depth,
+ prot_str[info.protection],
+ prot_str[info.max_protection],
+ share_mode_str[info.share_mode],
+ info.object_id);
+ T_QUIET; T_ASSERT_EQ(tmp_address, remap_address, "address hasn't changed");
+// T_QUIET; T_ASSERT_EQ(tmp_size, size, "size hasn't changed");
+ T_QUIET; T_ASSERT_EQ(info.protection, VM_PROT_READ, "new cur_prot read-only");
+// T_QUIET; T_ASSERT_EQ(info.max_protection, VM_PROT_READ, "new max_prot read-only");
+ remap = *(uint32_t *)(uintptr_t)remap_address;
+ T_QUIET; T_ASSERT_EQ(remap, before, "remap matches original");
+// this would crash if actually read-only:
+// *(uint32_t *)(uintptr_t)remap_address = before + 1;
+ after = *(uint32_t *)(uintptr_t)address;
+ T_LOG("vm_remap(): 0x%llx 0x%x -> 0x%x", address, before, after);
+// *(uint32_t *)(uintptr_t)remap_address = before;
+ if (before != after) {
+ T_FAIL("vm_remap() bypassed copy-on-write");
+ } else {
+ T_PASS("vm_remap() did not bypass copy-on-write");
+ }
+ /* cleanup */
+ kr = mach_vm_deallocate(mach_task_self(), remap_address, remap_size);
+ T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "vm_deallocate()");
+ T_PASS("vm_remap() read-only");
+
+#if defined(VM_MEMORY_ROSETTA)
+ if (dlsym(RTLD_DEFAULT, "mach_vm_remap_new") == NULL) {
+ T_PASS("vm_remap_new() is not present");
+ goto skip_vm_remap_new_ro;
+ }
+ /* test vm_remap_new() of RO */
+ before = *(uint32_t *)(uintptr_t)address;
+ remap_address = 0;
+ remap_size = size;
+ cur_prot = VM_PROT_READ | VM_PROT_WRITE;
+ max_prot = VM_PROT_READ | VM_PROT_WRITE;
+ kr = mach_vm_remap_new(mach_task_self(),
+ &remap_address,
+ remap_size,
+ 0,
+ VM_FLAGS_ANYWHERE,
+ mach_task_self(),
+ address,
+ FALSE,
+ &cur_prot,
+ &max_prot,
+ VM_INHERIT_DEFAULT);
+ T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "vm_remap_new()");
+ if (kr == KERN_PROTECTION_FAILURE) {
+ /* wrong but not a security issue... */
+ goto skip_vm_remap_new_ro;
+ }
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "vm_remap_new()");
+ remap = *(uint32_t *)(uintptr_t)remap_address;
+ T_QUIET; T_ASSERT_EQ(remap, before, "remap matches original");
+ *(uint32_t *)(uintptr_t)remap_address = before + 1;
+ after = *(uint32_t *)(uintptr_t)address;
+ T_LOG("vm_remap_new(): 0x%llx 0x%x -> 0x%x", address, before, after);
+ *(uint32_t *)(uintptr_t)remap_address = before;
+ if (before != after) {
+ T_FAIL("vm_remap_new() bypassed copy-on-write");
+ } else {
+ T_PASS("vm_remap_new() did not bypass copy-on-write");
+ }
+ /* check that region is still nested */
+ tmp_address = address;
+ tmp_size = 0;
+ depth = 99;
+ count = VM_REGION_SUBMAP_INFO_COUNT_64;
+ kr = mach_vm_region_recurse(mach_task_self(),
+ &tmp_address,
+ &tmp_size,
+ &depth,
+ (vm_region_recurse_info_t)&info,
+ &count);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "vm_region_recurse()");
+ T_LOG("0x%llx - 0x%llx depth:%d %s/%s %s 0x%x",
+ tmp_address, tmp_address + tmp_size, depth,
+ prot_str[info.protection],
+ prot_str[info.max_protection],
+ share_mode_str[info.share_mode],
+ info.object_id);
+ T_QUIET; T_ASSERT_EQ(tmp_address, address, "address hasn't changed");
+// T_QUIET; T_ASSERT_EQ(tmp_size, size, "size hasn't changed");
+ T_QUIET; T_ASSERT_GT(depth, 0, "still nested");
+ T_QUIET; T_ASSERT_EQ(info.protection, VM_PROT_READ, "cur_prot still read-only");
+ T_QUIET; T_ASSERT_EQ(info.max_protection, VM_PROT_READ, "max_prot still read-only");
+ T_PASS("vm_remap_new() read-only");
+skip_vm_remap_new_ro:
+#else /* defined(VM_MEMORY_ROSETTA) */
+ /* pre-BigSur SDK: no vm_remap_new() */
+ T_LOG("No vm_remap_new() to test");
+#endif /* defined(VM_MEMORY_ROSETTA) */
+
+ /* test mach_make_memory_entry_64(VM_SHARE) of RO */
+ before = *(uint32_t *)(uintptr_t)address;
+ remap_size = size;
+ mem_entry = MACH_PORT_NULL;
+ kr = mach_make_memory_entry_64(mach_task_self(),
+ &remap_size,
+ address,
+ MAP_MEM_VM_SHARE | VM_PROT_READ | VM_PROT_WRITE,
+ &mem_entry,
+ MACH_PORT_NULL);
+ T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_make_memory_entry_64(VM_SHARE)");
+ if (kr == KERN_PROTECTION_FAILURE) {
+ /* wrong but not a security issue... */
+ goto skip_mem_entry_vm_share_ro;
+ }
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_make_memory_entry_64(VM_SHARE)");
+ remap_address = 0;
+ kr = mach_vm_map(mach_task_self(),
+ &remap_address,
+ remap_size,
+ 0, /* mask */
+ VM_FLAGS_ANYWHERE,
+ mem_entry,
+ 0, /* offset */
+ FALSE, /* copy */
+ VM_PROT_READ | VM_PROT_WRITE,
+ VM_PROT_READ | VM_PROT_WRITE,
+ VM_INHERIT_DEFAULT);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "vm_map()");
+ remap = *(uint32_t *)(uintptr_t)remap_address;
+ T_QUIET; T_ASSERT_EQ(remap, before, "remap matches original");
+ *(uint32_t *)(uintptr_t)remap_address = before + 1;
+ after = *(uint32_t *)(uintptr_t)address;
+ T_LOG("mem_entry(VM_SHARE): 0x%llx 0x%x -> 0x%x", address, before, after);
+ *(uint32_t *)(uintptr_t)remap_address = before;
+ if (before != after) {
+ T_FAIL("mem_entry(VM_SHARE) bypassed copy-on-write");
+ } else {
+ T_PASS("mem_entry(VM_SHARE) did not bypass copy-on-write");
+ }
+ /* check that region is still nested */
+ tmp_address = address;
+ tmp_size = 0;
+ depth = 99;
+ count = VM_REGION_SUBMAP_INFO_COUNT_64;
+ kr = mach_vm_region_recurse(mach_task_self(),
+ &tmp_address,
+ &tmp_size,
+ &depth,
+ (vm_region_recurse_info_t)&info,
+ &count);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "vm_region_recurse()");
+ T_LOG("0x%llx - 0x%llx depth:%d %s/%s %s 0x%x",
+ tmp_address, tmp_address + tmp_size, depth,
+ prot_str[info.protection],
+ prot_str[info.max_protection],
+ share_mode_str[info.share_mode],
+ info.object_id);
+ T_QUIET; T_ASSERT_EQ(tmp_address, address, "address hasn't changed");
+// T_QUIET; T_ASSERT_EQ(tmp_size, size, "size hasn't changed");
+ T_QUIET; T_ASSERT_GT(depth, 0, "still nested");
+ T_QUIET; T_ASSERT_EQ(info.protection, VM_PROT_READ, "cur_prot still read-only");
+ T_QUIET; T_ASSERT_EQ(info.max_protection, VM_PROT_READ, "max_prot still read-only");
+ /* check that new mapping is a copy */
+ tmp_address = remap_address;
+ tmp_size = 0;
+ depth = 99;
+ count = VM_REGION_SUBMAP_INFO_COUNT_64;
+ kr = mach_vm_region_recurse(mach_task_self(),
+ &tmp_address,
+ &tmp_size,
+ &depth,
+ (vm_region_recurse_info_t)&info,
+ &count);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "vm_region_recurse()");
+ T_LOG("0x%llx - 0x%llx depth:%d %s/%s %s 0x%x",
+ tmp_address, tmp_address + tmp_size, depth,
+ prot_str[info.protection],
+ prot_str[info.max_protection],
+ share_mode_str[info.share_mode],
+ info.object_id);
+ T_QUIET; T_ASSERT_EQ(tmp_address, remap_address, "address hasn't changed");
+// T_QUIET; T_ASSERT_EQ(tmp_size, size, "size hasn't changed");
+ T_QUIET; T_ASSERT_EQ(depth, 0, "new mapping is unnested");
+// T_QUIET; T_ASSERT_EQ(info.protection, VM_PROT_READ, "new cur_prot read-only");
+// T_QUIET; T_ASSERT_EQ(info.max_protection, VM_PROT_READ, "new max_prot read-only");
+ /* cleanup */
+ kr = mach_vm_deallocate(mach_task_self(), remap_address, remap_size);
+ T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "vm_deallocate()");
+ T_PASS("mem_entry(VM_SHARE) read-only");
+skip_mem_entry_vm_share_ro:
+
+ /* test mach_make_memory_entry_64() of RO */
+ before = *(uint32_t *)(uintptr_t)address;
+ remap_size = size;
+ mem_entry = MACH_PORT_NULL;
+ kr = mach_make_memory_entry_64(mach_task_self(),
+ &remap_size,
+ address,
+ VM_PROT_READ | VM_PROT_WRITE,
+ &mem_entry,
+ MACH_PORT_NULL);
+ T_QUIET; T_ASSERT_EQ(kr, KERN_PROTECTION_FAILURE, "mach_make_memory_entry_64()");
+ /* check that region is still nested */
+ tmp_address = address;
+ tmp_size = 0;
+ depth = 99;
+ count = VM_REGION_SUBMAP_INFO_COUNT_64;
+ kr = mach_vm_region_recurse(mach_task_self(),
+ &tmp_address,
+ &tmp_size,
+ &depth,
+ (vm_region_recurse_info_t)&info,
+ &count);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "vm_region_recurse()");
+ T_LOG("0x%llx - 0x%llx depth:%d %s/%s %s 0x%x",
+ tmp_address, tmp_address + tmp_size, depth,
+ prot_str[info.protection],
+ prot_str[info.max_protection],
+ share_mode_str[info.share_mode],
+ info.object_id);
+ T_QUIET; T_ASSERT_EQ(tmp_address, address, "address hasn't changed");
+// T_QUIET; T_ASSERT_EQ(tmp_size, size, "size hasn't changed");
+// T_QUIET; T_ASSERT_GT(depth, 0, "still nested");
+ T_QUIET; T_ASSERT_EQ(info.protection, VM_PROT_READ, "cur_prot still read-only");
+ if (depth > 0) {
+ T_QUIET; T_ASSERT_EQ(info.max_protection, VM_PROT_READ, "max_prot still read-only");
+ }
+ T_PASS("mem_entry() read-only");
+
+
+ /*
+ * Step 2 - check that one can not share write access with a writable
+ * mapping in the shared region.
+ */
+ size = 0;
+ for (address = SHARED_REGION_BASE;
+ address < SHARED_REGION_BASE + SHARED_REGION_SIZE;
+ address += size) {
+ size = 0;
+ depth = 99;
+ count = VM_REGION_SUBMAP_INFO_COUNT_64;
+ kr = mach_vm_region_recurse(mach_task_self(),
+ &address,
+ &size,
+ &depth,
+ (vm_region_recurse_info_t)&info,
+ &count);
+ T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "vm_region_recurse()");
+ if (kr == KERN_INVALID_ADDRESS) {
+ T_SKIP("could not find writable nested mapping");
+ T_END;
+ }
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "vm_region_recurse()");
+ T_LOG("0x%llx - 0x%llx depth:%d %s/%s %s 0x%x",
+ address, address + size, depth,
+ prot_str[info.protection],
+ prot_str[info.max_protection],
+ share_mode_str[info.share_mode],
+ info.object_id);
+ if (depth > 0 && (info.protection & VM_PROT_WRITE)) {
+ /* nested and writable: bingo! */
+ break;
+ }
+ }
+ if (address >= SHARED_REGION_BASE + SHARED_REGION_SIZE) {
+ T_SKIP("could not find writable nested mapping");
+ T_END;
+ }
+
+ /* test vm_remap() of RW */
+ before = *(uint32_t *)(uintptr_t)address;
+ remap_address = 0;
+ remap_size = size;
+ kr = mach_vm_remap(mach_task_self(),
+ &remap_address,
+ remap_size,
+ 0,
+ VM_FLAGS_ANYWHERE | VM_FLAGS_RETURN_DATA_ADDR,
+ mach_task_self(),
+ address,
+ FALSE,
+ &cur_prot,
+ &max_prot,
+ VM_INHERIT_DEFAULT);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "vm_remap()");
+ if (!(cur_prot & VM_PROT_WRITE)) {
+ T_LOG("vm_remap(): 0x%llx not writable %s/%s",
+ remap_address, prot_str[cur_prot], prot_str[max_prot]);
+ T_ASSERT_FAIL("vm_remap() remapping not writable");
+ }
+ remap = *(uint32_t *)(uintptr_t)remap_address;
+ T_QUIET; T_ASSERT_EQ(remap, before, "remap matches original");
+ *(uint32_t *)(uintptr_t)remap_address = before + 1;
+ after = *(uint32_t *)(uintptr_t)address;
+ T_LOG("vm_remap(): 0x%llx 0x%x -> 0x%x", address, before, after);
+ *(uint32_t *)(uintptr_t)remap_address = before;
+ if (before != after) {
+ T_FAIL("vm_remap() bypassed copy-on-write");
+ } else {
+ T_PASS("vm_remap() did not bypass copy-on-write");
+ }
+ /* check that region is still nested */
+ tmp_address = address;
+ tmp_size = 0;
+ depth = 99;
+ count = VM_REGION_SUBMAP_INFO_COUNT_64;
+ kr = mach_vm_region_recurse(mach_task_self(),
+ &tmp_address,
+ &tmp_size,
+ &depth,
+ (vm_region_recurse_info_t)&info,
+ &count);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "vm_region_recurse()");
+ T_LOG("0x%llx - 0x%llx depth:%d %s/%s %s 0x%x",
+ tmp_address, tmp_address + tmp_size, depth,
+ prot_str[info.protection],
+ prot_str[info.max_protection],
+ share_mode_str[info.share_mode],
+ info.object_id);
+ T_QUIET; T_ASSERT_EQ(tmp_address, address, "address hasn't changed");
+// T_QUIET; T_ASSERT_EQ(tmp_size, size, "size hasn't changed");
+ T_QUIET; T_ASSERT_GT(depth, 0, "still nested");
+ T_QUIET; T_ASSERT_EQ(info.protection, VM_PROT_DEFAULT, "cur_prot still writable");
+ T_QUIET; T_ASSERT_EQ((info.max_protection & VM_PROT_WRITE), VM_PROT_WRITE, "max_prot still writable");
+ /* cleanup */
+ kr = mach_vm_deallocate(mach_task_self(), remap_address, remap_size);
+ T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "vm_deallocate()");
+
+#if defined(VM_MEMORY_ROSETTA)
+ if (dlsym(RTLD_DEFAULT, "mach_vm_remap_new") == NULL) {
+ T_PASS("vm_remap_new() is not present");
+ goto skip_vm_remap_new_rw;
+ }
+ /* test vm_remap_new() of RW */
+ before = *(uint32_t *)(uintptr_t)address;
+ remap_address = 0;
+ remap_size = size;
+ cur_prot = VM_PROT_READ | VM_PROT_WRITE;
+ max_prot = VM_PROT_READ | VM_PROT_WRITE;
+ kr = mach_vm_remap_new(mach_task_self(),
+ &remap_address,
+ remap_size,
+ 0,
+ VM_FLAGS_ANYWHERE,
+ mach_task_self(),
+ address,
+ FALSE,
+ &cur_prot,
+ &max_prot,
+ VM_INHERIT_DEFAULT);
+ T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "vm_remap_new()");
+ if (kr == KERN_PROTECTION_FAILURE) {
+ /* wrong but not a security issue... */
+ goto skip_vm_remap_new_rw;
+ }
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "vm_remap_new()");
+ if (!(cur_prot & VM_PROT_WRITE)) {
+ T_LOG("vm_remap_new(): 0x%llx not writable %s/%s",
+ remap_address, prot_str[cur_prot], prot_str[max_prot]);
+ T_ASSERT_FAIL("vm_remap_new() remapping not writable");
+ }
+ remap = *(uint32_t *)(uintptr_t)remap_address;
+ T_QUIET; T_ASSERT_EQ(remap, before, "remap matches original");
+ *(uint32_t *)(uintptr_t)remap_address = before + 1;
+ after = *(uint32_t *)(uintptr_t)address;
+ T_LOG("vm_remap_new(): 0x%llx 0x%x -> 0x%x", address, before, after);
+ *(uint32_t *)(uintptr_t)remap_address = before;
+ if (before != after) {
+ T_FAIL("vm_remap_new() bypassed copy-on-write");
+ } else {
+ T_PASS("vm_remap_new() did not bypass copy-on-write");
+ }
+ /* check that region is still nested */
+ tmp_address = address;
+ tmp_size = 0;
+ depth = 99;
+ count = VM_REGION_SUBMAP_INFO_COUNT_64;
+ kr = mach_vm_region_recurse(mach_task_self(),
+ &tmp_address,
+ &tmp_size,
+ &depth,
+ (vm_region_recurse_info_t)&info,
+ &count);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "vm_region_recurse()");
+ T_LOG("0x%llx - 0x%llx depth:%d %s/%s %s 0x%x",
+ tmp_address, tmp_address + tmp_size, depth,
+ prot_str[info.protection],
+ prot_str[info.max_protection],
+ share_mode_str[info.share_mode],
+ info.object_id);
+ T_QUIET; T_ASSERT_EQ(tmp_address, address, "address hasn't changed");
+// T_QUIET; T_ASSERT_EQ(tmp_size, size, "size hasn't changed");
+ T_QUIET; T_ASSERT_GT(depth, 0, "still nested");
+ T_QUIET; T_ASSERT_EQ(info.protection, VM_PROT_DEFAULT, "cur_prot still writable");
+ T_QUIET; T_ASSERT_EQ((info.max_protection & VM_PROT_WRITE), VM_PROT_WRITE, "max_prot still writable");
+ /* cleanup */
+ kr = mach_vm_deallocate(mach_task_self(), remap_address, remap_size);
+ T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "vm_deallocate()");
+skip_vm_remap_new_rw:
+#else /* defined(VM_MEMORY_ROSETTA) */
+ /* pre-BigSur SDK: no vm_remap_new() */
+ T_LOG("No vm_remap_new() to test");
+#endif /* defined(VM_MEMORY_ROSETTA) */
+
+ /* test mach_make_memory_entry_64(VM_SHARE) of RW */
+ before = *(uint32_t *)(uintptr_t)address;
+ remap_size = size;
+ mem_entry = MACH_PORT_NULL;
+ kr = mach_make_memory_entry_64(mach_task_self(),
+ &remap_size,
+ address,
+ MAP_MEM_VM_SHARE | VM_PROT_READ | VM_PROT_WRITE,
+ &mem_entry,
+ MACH_PORT_NULL);
+ T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "mach_make_memory_entry_64(VM_SHARE)");
+ if (kr == KERN_PROTECTION_FAILURE) {
+ /* wrong but not a security issue... */
+ goto skip_mem_entry_vm_share_rw;
+ }
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_make_memory_entry_64(VM_SHARE)");
+ T_QUIET; T_ASSERT_EQ(remap_size, size, "mem_entry(VM_SHARE) should cover whole mapping");
+// T_LOG("AFTER MAKE_MEM_ENTRY(VM_SHARE) 0x%llx...", address); fflush(stdout); fflush(stderr); getchar();
+ remap_address = 0;
+ kr = mach_vm_map(mach_task_self(),
+ &remap_address,
+ remap_size,
+ 0, /* mask */
+ VM_FLAGS_ANYWHERE,
+ mem_entry,
+ 0, /* offset */
+ FALSE, /* copy */
+ VM_PROT_READ | VM_PROT_WRITE,
+ VM_PROT_READ | VM_PROT_WRITE,
+ VM_INHERIT_DEFAULT);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "vm_map()");
+ remap = *(uint32_t *)(uintptr_t)remap_address;
+ T_QUIET; T_ASSERT_EQ(remap, before, "remap matches original");
+// T_LOG("AFTER VM_MAP 0x%llx...", remap_address); fflush(stdout); fflush(stderr); getchar();
+ *(uint32_t *)(uintptr_t)remap_address = before + 1;
+// T_LOG("AFTER WRITE 0x%llx...", remap_address); fflush(stdout); fflush(stderr); getchar();
+ after = *(uint32_t *)(uintptr_t)address;
+ T_LOG("mem_entry(VM_SHARE): 0x%llx 0x%x -> 0x%x", address, before, after);
+ *(uint32_t *)(uintptr_t)remap_address = before;
+ if (before != after) {
+ T_FAIL("mem_entry(VM_SHARE) bypassed copy-on-write");
+ } else {
+ T_PASS("mem_entry(VM_SHARE) did not bypass copy-on-write");
+ }
+ /* check that region is still nested */
+ tmp_address = address;
+ tmp_size = 0;
+ depth = 99;
+ count = VM_REGION_SUBMAP_INFO_COUNT_64;
+ kr = mach_vm_region_recurse(mach_task_self(),
+ &tmp_address,
+ &tmp_size,
+ &depth,
+ (vm_region_recurse_info_t)&info,
+ &count);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "vm_region_recurse()");
+ T_LOG("0x%llx - 0x%llx depth:%d %s/%s %s 0x%x",
+ tmp_address, tmp_address + tmp_size, depth,
+ prot_str[info.protection],
+ prot_str[info.max_protection],
+ share_mode_str[info.share_mode],
+ info.object_id);
+ T_QUIET; T_ASSERT_EQ(tmp_address, address, "address hasn't changed");
+// T_QUIET; T_ASSERT_EQ(tmp_size, size, "size hasn't changed");
+ T_QUIET; T_ASSERT_GT(depth, 0, "still nested");
+ T_QUIET; T_ASSERT_EQ(info.protection, VM_PROT_DEFAULT, "cur_prot still writable");
+ T_QUIET; T_ASSERT_EQ((info.max_protection & VM_PROT_WRITE), VM_PROT_WRITE, "max_prot still writable");
+ /* cleanup */
+ kr = mach_vm_deallocate(mach_task_self(), remap_address, remap_size);
+ T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "vm_deallocate()");
+ mach_port_deallocate(mach_task_self(), mem_entry);
+skip_mem_entry_vm_share_rw:
+
+ /* test mach_make_memory_entry_64() of RW */
+ before = *(uint32_t *)(uintptr_t)address;
+ remap_size = size;
+ mem_entry = MACH_PORT_NULL;
+ kr = mach_make_memory_entry_64(mach_task_self(),
+ &remap_size,
+ address,
+ VM_PROT_READ | VM_PROT_WRITE,
+ &mem_entry,
+ MACH_PORT_NULL);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_make_memory_entry_64()");
+ remap_address = 0;
+ kr = mach_vm_map(mach_task_self(),
+ &remap_address,
+ remap_size,
+ 0, /* mask */
+ VM_FLAGS_ANYWHERE,
+ mem_entry,
+ 0, /* offset */
+ FALSE, /* copy */
+ VM_PROT_READ | VM_PROT_WRITE,
+ VM_PROT_READ | VM_PROT_WRITE,
+ VM_INHERIT_DEFAULT);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "vm_map()");
+ remap = *(uint32_t *)(uintptr_t)remap_address;
+ T_QUIET; T_ASSERT_EQ(remap, before, "remap matches original");
+ *(uint32_t *)(uintptr_t)remap_address = before + 1;
+ after = *(uint32_t *)(uintptr_t)address;
+ T_LOG("mem_entry(): 0x%llx 0x%x -> 0x%x", address, before, after);
+ *(uint32_t *)(uintptr_t)remap_address = before;
+ /* check that region is no longer nested */
+ tmp_address = address;
+ tmp_size = 0;
+ depth = 99;
+ count = VM_REGION_SUBMAP_INFO_COUNT_64;
+ kr = mach_vm_region_recurse(mach_task_self(),
+ &tmp_address,
+ &tmp_size,
+ &depth,
+ (vm_region_recurse_info_t)&info,
+ &count);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "vm_region_recurse()");
+ T_LOG("0x%llx - 0x%llx depth:%d %s/%s %s 0x%x",
+ tmp_address, tmp_address + tmp_size, depth,
+ prot_str[info.protection],
+ prot_str[info.max_protection],
+ share_mode_str[info.share_mode],
+ info.object_id);
+ if (before != after) {
+ if (depth == 0) {
+ T_PASS("mem_entry() honored copy-on-write");
+ } else {
+ T_FAIL("mem_entry() did not trigger copy-on_write");
+ }
+ } else {
+ T_FAIL("mem_entry() did not honor copy-on-write");
+ }
+ T_QUIET; T_ASSERT_EQ(tmp_address, address, "address hasn't changed");
+// T_QUIET; T_ASSERT_EQ(tmp_size, size, "size hasn't changed");
+ T_QUIET; T_ASSERT_EQ(depth, 0, "no longer nested");
+ T_QUIET; T_ASSERT_EQ(info.protection, VM_PROT_DEFAULT, "cur_prot still writable");
+ T_QUIET; T_ASSERT_EQ((info.max_protection & VM_PROT_WRITE), VM_PROT_WRITE, "max_prot still writable");
+ /* cleanup */
+ kr = mach_vm_deallocate(mach_task_self(), remap_address, remap_size);
+ T_QUIET; T_EXPECT_MACH_SUCCESS(kr, "vm_deallocate()");
+ mach_port_deallocate(mach_task_self(), mem_entry);
+}
+
T_DECL(copyoverwrite_submap_protection, "test copywrite vm region submap \
protection", T_META_ALL_VALID_ARCHS(true))
{
the shared region triggers code-signing violations",
T_META_ALL_VALID_ARCHS(true))
{
- char *addr;
+ uint32_t *addr, before, after;
int retval;
int saved_errno;
kern_return_t kr;
vm_address_t map_addr, remap_addr;
vm_prot_t curprot, maxprot;
- addr = (char *)&printf;
+ addr = (uint32_t *)&printf;
#if __has_feature(ptrauth_calls)
map_addr = (vm_address_t)(uintptr_t)ptrauth_strip(addr, ptrauth_key_function_pointer);
#else /* __has_feature(ptrauth_calls) */
VM_INHERIT_DEFAULT);
T_ASSERT_EQ(kr, KERN_SUCCESS, "vm_remap error 0x%x (%s)",
kr, mach_error_string(kr));
+ before = *addr;
retval = mlock(addr, 4096);
+ after = *addr;
if (retval != 0) {
saved_errno = errno;
T_ASSERT_EQ(saved_errno, EACCES, "wire shared text error %d (%s), expected: %d",
saved_errno, strerror(saved_errno), EACCES);
+ } else if (after != before) {
+ T_ASSERT_FAIL("shared text changed by wiring at %p 0x%x -> 0x%x", addr, before, after);
} else {
T_PASS("wire shared text");
}
- addr = (char *) &fprintf;
+ addr = (uint32_t *) &fprintf;
+ before = *addr;
retval = mlock(addr, 4096);
+ after = *addr;
if (retval != 0) {
saved_errno = errno;
T_ASSERT_EQ(saved_errno, EACCES, "wire shared text error %d (%s), expected: %d",
saved_errno, strerror(saved_errno), EACCES);
+ } else if (after != before) {
+ T_ASSERT_FAIL("shared text changed by wiring at %p 0x%x -> 0x%x", addr, before, after);
} else {
T_PASS("wire shared text");
}
- addr = (char *) &testmain_wire_text;
+ addr = (uint32_t *) &testmain_wire_text;
+ before = *addr;
retval = mlock(addr, 4096);
+ after = *addr;
if (retval != 0) {
saved_errno = errno;
T_ASSERT_EQ(saved_errno, EACCES, "wire text error return error %d (%s)",
saved_errno, strerror(saved_errno));
+ } else if (after != before) {
+ T_ASSERT_FAIL("text changed by wiring at %p 0x%x -> 0x%x", addr, before, after);
} else {
T_PASS("wire text");
}
+++ /dev/null
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-<dict>
- <key>com.apple.rootless.datavault.controller.internal</key>
- <true/>
-</dict>
-</plist>
+++ /dev/null
-#include <darwintest.h>
-
-#include <fcntl.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/sysctl.h>
-#include <sys/disk.h>
-#include <sys/ioctl.h>
-#include <sys/mount.h>
-
-#if !(TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR)
-#include <sys/csr.h>
-#endif
-
-T_GLOBAL_META(
- T_META_NAMESPACE("xnu.quicktest"),
- T_META_CHECK_LEAKS(false),
- T_META_RUN_CONCURRENTLY(true)
- );
-
-
-/* **************************************************************************************************************
- * Test ioctl system calls.
- * **************************************************************************************************************
- */
-T_DECL(ioctl, "Sanity check of ioctl by exercising DKIOCGETBLOCKCOUNT and DKIOCGETBLOCKSIZE",
- T_META_ASROOT(true))
-{
- int my_err;
- int my_fd = -1;
- struct statfs * my_infop;
- char * my_ptr;
- int my_blksize;
- long long my_block_count;
- char my_name[MAXPATHLEN];
-
-#if !(TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR)
- /*
- * this test won't be able to open the root disk device unless CSR is
- * disabled or in AppleInternal mode
- */
- if (csr_check( CSR_ALLOW_UNRESTRICTED_FS ) &&
- csr_check( CSR_ALLOW_APPLE_INTERNAL )) {
- T_SKIP("System Integrity Protection is enabled");
- }
-#endif
-
- T_SETUPBEGIN;
-
- T_WITH_ERRNO;
- T_ASSERT_GT(getmntinfo( &my_infop, MNT_NOWAIT ), 0, "getmntinfo");
-
- /* make this a raw device */
- strlcpy( &my_name[0], &my_infop->f_mntfromname[0], sizeof(my_name));
- if ((my_ptr = strrchr( &my_name[0], '/' )) != 0) {
- if (my_ptr[1] != 'r') {
- my_ptr[strlen( my_ptr )] = 0x00;
- memmove( &my_ptr[2], &my_ptr[1], (strlen( &my_ptr[1] ) + 1));
- my_ptr[1] = 'r';
- }
- }
-
- T_ASSERT_POSIX_SUCCESS(my_fd = open( &my_name[0], O_RDONLY ), "open");
-
- T_SETUPEND;
-
- /* obtain the size of the media (in blocks) */
- T_EXPECT_POSIX_SUCCESS(my_err = ioctl( my_fd, DKIOCGETBLOCKCOUNT, &my_block_count ),
- "ioctl DKIOCGETBLOCKCOUNT");
-
- /* obtain the block size of the media */
- T_EXPECT_POSIX_SUCCESS(my_err = ioctl( my_fd, DKIOCGETBLOCKSIZE, &my_blksize ),
- "ioctl DKIOCGETBLOCKSIZE");
-
- T_LOG( "my_block_count %qd my_blksize %d \n", my_block_count, my_blksize );
-
- if (my_err != -1) {
- /* make sure the returned data looks somewhat valid */
- T_EXPECT_GE(my_blksize, 0, NULL);
- T_EXPECT_LE(my_blksize, 1024 * 1000, NULL);
- }
-
- close( my_fd );
-}
--- /dev/null
+#include <darwintest.h>
+#include <darwintest_utils.h>
+
+#include <mach/mach.h>
+#include <sys/mman.h>
+
+#undef __abortlike
+#define __abortlike
+#define panic(fmt, ...) ({ T_FAIL(fmt, __VA_ARGS__); abort(); })
+
+#define __security_const_late
+#define ZALLOC_TEST 1
+#include "../osfmk/kern/zalloc.c"
+
+#define ZBA_TEST_SIZE (1ul << 20)
+
+static void
+zba_populate_any(vm_address_t addr, vm_size_t size)
+{
+ int rc = mprotect((void *)addr, size, PROT_READ | PROT_WRITE);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(rc, "mprotect");
+}
+
+static void
+zba_populate_nope(vm_address_t addr, vm_size_t size)
+{
+#pragma unused(addr, size)
+ T_FAIL("Trying to extend the storage");
+ T_END;
+}
+
+static void
+zba_test_allow_extension(void)
+{
+ zba_test_info.zbats_populate = zba_populate_any;
+}
+
+static void
+zba_test_disallow_extension(void)
+{
+ zba_test_info.zbats_populate = zba_populate_nope;
+}
+
+static void
+zba_test_setup(void)
+{
+ kern_return_t kr;
+ int rc;
+
+ kr = vm_allocate(mach_task_self(), &zba_test_info.zbats_base,
+ ZBA_TEST_SIZE + ZBA_CHUNK_SIZE, VM_FLAGS_ANYWHERE);
+ T_ASSERT_MACH_SUCCESS(kr, "vm_allocate()");
+
+ zba_test_info.zbats_base = roundup(zba_test_info.zbats_base,
+ ZBA_CHUNK_SIZE);
+
+ rc = mprotect(zba_base_header(), ZBA_TEST_SIZE, PROT_NONE);
+ T_ASSERT_POSIX_SUCCESS(rc, "mprotect");
+
+ T_LOG("SETUP allocator with base at %p", zba_base_header());
+
+ zba_test_allow_extension();
+ zba_populate(0);
+ zba_init_chunk(0);
+}
+
+T_DECL(zone_buddy_allocator_encodings, "test the buddy allocator formulas")
+{
+ uint8_t bits[sizeof(zba_base_header()->zbah_bits)] = { };
+
+ for (uint32_t o = ZBA_MAX_ORDER + 1; o-- > 0;) {
+ for (vm_address_t pos = 0; pos < ZBA_CHUNK_SIZE; pos += ZBA_GRANULE << o) {
+ struct zone_bits_chain *zbc;
+ size_t node = zba_node(pos, o);
+
+ zbc = zba_chain_for_node(NULL, node, o);
+ T_QUIET; T_ASSERT_EQ(pos, (vm_offset_t)zbc,
+ "zba_node / zba_chain_for_node is reversible (pos: %lx, node %zd)",
+ pos, node);
+
+
+ if (o == 0) {
+ // leaf nodes aren't represented in the bitmap
+ continue;
+ }
+ T_QUIET; T_ASSERT_LT(node, 8 * sizeof(bits), "fits in bitfield: %zd", pos);
+ T_QUIET; T_ASSERT_EQ(0, bits[node / 8] & (1 << (node % 8)), "never seen");
+ bits[node / 8] ^= 1 << (node % 8);
+ }
+ }
+
+ T_PASS("zba_node, zba_chain_for_node look sane");
+}
+
+T_DECL(zone_buddy_allocator, "test the zone bits setup")
+{
+ vm_address_t base, pos;
+
+ zba_test_setup();
+
+ zba_test_disallow_extension();
+
+ base = (vm_address_t)zba_slot_base();
+ for (pos = zba_chunk_header_size(0); pos < ZBA_CHUNK_SIZE; pos += ZBA_GRANULE) {
+ T_QUIET; T_ASSERT_EQ(base + pos, zba_alloc(0), "alloc");
+ *(uint64_t *)(base + pos) = ~0ull;
+ }
+ for (pos = zba_chunk_header_size(0); pos < ZBA_CHUNK_SIZE; pos += ZBA_GRANULE) {
+ zba_free(base + pos, 0);
+ }
+
+ for (pos = zba_chunk_header_size(0); pos < ZBA_CHUNK_SIZE; pos += ZBA_GRANULE) {
+ T_QUIET; T_ASSERT_EQ(base + pos, zba_alloc(0), "alloc");
+ *(uint64_t *)(base + pos) = ~0ull;
+ }
+ zba_test_allow_extension();
+
+ base += ZBA_CHUNK_SIZE;
+ for (pos = zba_chunk_header_size(1); pos < ZBA_CHUNK_SIZE; pos += ZBA_GRANULE) {
+ T_QUIET; T_ASSERT_EQ(base + pos, zba_alloc(0), "alloc");
+ *(uint64_t *)(base + pos) = ~0ull;
+ }
+
+ for (pos = zba_chunk_header_size(1); pos < ZBA_CHUNK_SIZE; pos += ZBA_GRANULE) {
+ zba_free(base + pos, 0);
+ }
+ base -= ZBA_CHUNK_SIZE;
+ for (pos = zba_chunk_header_size(0); pos < ZBA_CHUNK_SIZE; pos += ZBA_GRANULE) {
+ zba_free(base + pos, 0);
+ }
+}
pgtrace.py \
xnutriage.py \
zonetriage.py \
- sysreg.py
+ sysreg.py \
+ counter.py
ifneq ($(PLATFORM),MacOSX)
LLDBMACROS_PYTHON_FILES+= \
self._thread_groups = []
self._allproc = []
self._terminated_tasks_list = []
+ self._terminated_threads_list = []
self._zones_list = []
self._zombproc_list = []
self._kernel_types_cache = {} #this will cache the Type objects as and when requested.
caching.SaveDynamicCacheData("kern._terminated_tasks_list", self._terminated_tasks_list)
return self._terminated_tasks_list
+ if name == 'terminated_threads' :
+ self._terminated_threads_list = caching.GetDynamicCacheData("kern._terminated_threads_list", [])
+ if len(self._terminated_threads_list) > 0 : return self._terminated_threads_list
+ thread_queue_head = self.GetGlobalVariable('terminated_threads')
+ thread_type = LazyTarget.GetTarget().FindFirstType('thread')
+ thread_ptr_type = thread_type.GetPointerType()
+ for trd in IterateQueue(thread_queue_head, thread_ptr_type, 'threads'):
+ self._terminated_threads_list.append(trd)
+ caching.SaveDynamicCacheData("kern._terminated_threads_list", self._terminated_threads_list)
+ return self._terminated_threads_list
+
if name == 'procs' :
self._allproc = caching.GetDynamicCacheData("kern._allproc", [])
if len(self._allproc) > 0 : return self._allproc
tabs_search_rex = re.compile("^\s*\t+",re.MULTILINE|re.DOTALL)
+def find_non_ascii(s):
+ for c in s:
+ if ord(c) >= 0x80: return True
+ return False
+
if __name__ == "__main__":
if len(sys.argv) < 2:
print >>sys.stderr, "Error: Unknown arguments"
fh = open(fname)
strdata = fh.readlines()
lineno = 0
- tab_check_status = True
+ syntax_fail = False
for linedata in strdata:
lineno += 1
if len(tabs_search_rex.findall(linedata)) > 0 :
print >>sys.stderr, "Error: Found a TAB character at %s:%d" % (fname, lineno)
- tab_check_status = False
- if tab_check_status == False:
+ syntax_fail = True
+ if find_non_ascii(linedata):
+ print >>sys.stderr, "Error: Found a non ascii character at %s:%d" % (fname, lineno)
+ syntax_fail = True
+ if syntax_fail:
print >>sys.stderr, "Error: Syntax check failed. Please fix the errors and try again."
sys.exit(1)
#now check for error in compilation
--- /dev/null
+from memory import IterateZPerCPU
+from xnu import *
+
+@lldb_type_summary(['scalable_counter_t'])
+@header("Counter Value\n-------------")
+def GetSimpleCounter(counter):
+ """ Prints out the value of a percpu counter
+ params: counter: value - value object representing counter
+ returns: str - THe value of the counter as a string.
+ """
+ val = 0
+ for v in IterateZPerCPU(counter, "uint64_t *"):
+ val += dereference(v)
+ return str(val)
+
+@lldb_command('showcounter')
+def ShowSimpleCounter(cmd_args=None):
+ """ Show the value of a percpu counter.
+ Usage: showcounter <address of counter>
+ """
+ if not cmd_args:
+ raise ArgumentError("Please specify the address of the counter you want to read.")
+ return
+ print GetSimpleCounter(kern.GetValueFromAddress(cmd_args[0], "scalable_counter_t"))
func(t, space, ctx, taskports_idx, 0, t.itk_debug_control, 17)
if unsigned(t.itk_task_access) > 0:
func(t, space, ctx, taskports_idx, 0, t.itk_task_access, 17)
- if unsigned(t.itk_self[1]) > 0: ## task read port
- func(t, space, ctx, taskports_idx, 0, t.itk_self[1], 17)
- if unsigned(t.itk_self[2]) > 0: ## task inspect port
- func(t, space, ctx, taskports_idx, 0, t.itk_self[2], 17)
+ if unsigned(t.itk_task_ports[1]) > 0: ## task read port
+ func(t, space, ctx, taskports_idx, 0, t.itk_task_ports[1], 17)
+ if unsigned(t.itk_task_ports[2]) > 0: ## task inspect port
+ func(t, space, ctx, taskports_idx, 0, t.itk_task_ports[2], 17)
## Task name port (not a send right, just a naked ref); TASK_FLAVOR_NAME = 3
- if unsigned(t.itk_self[3]) > 0:
- func(t, space, ctx, taskports_idx, 0, t.itk_self[3], 0)
+ if unsigned(t.itk_task_ports[3]) > 0:
+ func(t, space, ctx, taskports_idx, 0, t.itk_task_ports[3], 0)
## task resume port is a receive right to resume the task
if unsigned(t.itk_resume) > 0:
leftrz = 16
else:
alloc_type = "zone"
- leftrz = unsigned(zone.kasan_redzone)
+ leftrz = unsigned(zone.z_kasan_redzone)
else:
alloc_type = "kalloc"
if asz - usz >= 2*pgsz:
import base64
import zlib
+# can be removed once we move to Python3.1+
+from future.utils.surrogateescape import register_surrogateescape
+register_surrogateescape()
+
class Globals(object):
pass
G = Globals()
def enum(**args):
return type('enum', (), args)
+#
+# Decode bytes as UTF-8, using surrogateescape if there are invalid UTF-8
+# sequences; see PEP-383
+#
+def BytesToString(b):
+ return b.decode('utf-8', errors="surrogateescape")
+
KCSUBTYPE_TYPE = enum(KC_ST_CHAR=1, KC_ST_INT8=2, KC_ST_UINT8=3, KC_ST_INT16=4, KC_ST_UINT16=5, KC_ST_INT32=6, KC_ST_UINT32=7, KC_ST_INT64=8, KC_ST_UINT64=9)
@staticmethod
def FromBinaryTypeData(byte_data):
(st_flag, st_type, st_offset, st_size, st_name) = struct.unpack_from('=BBHI32s', byte_data)
- st_name = st_name.rstrip('\x00')
+ st_name = BytesToString(st_name.rstrip('\0'))
return KCSubTypeElement(st_name, st_type, st_size, st_offset, st_flag)
@staticmethod
return self.totalsize
def GetValueAsString(self, base_data, array_pos=0):
- return str(self.GetValue(base_data, array_pos))
+ v = self.GetValue(base_data, array_pos)
+ if isinstance(v, bytes):
+ return BytesToString(v)
+ return str(v)
def GetValue(self, base_data, array_pos=0):
return struct.unpack_from(self.unpack_fmt, base_data[self.offset + (array_pos * self.size):])[0]
elif self.i_type == GetTypeForName('KCDATA_TYPE_UINT32_DESC'):
self.is_naked_type = True
u_d = struct.unpack_from('32sI', self.i_data)
- self.i_name = u_d[0].strip(chr(0))
+ self.i_name = BytesToString(u_d[0].rstrip('\0'))
self.obj = u_d[1]
logging.info("0x%08x: %s%s" % (self.offset, INDENT(), self.i_name))
elif self.i_type == GetTypeForName('KCDATA_TYPE_UINT64_DESC'):
self.is_naked_type = True
u_d = struct.unpack_from('32sQ', self.i_data)
- self.i_name = u_d[0].strip(chr(0))
+ self.i_name = BytesToString(u_d[0].rstrip('\0'))
self.obj = u_d[1]
logging.info("0x%08x: %s%s" % (self.offset, INDENT(), self.i_name))
KCSubTypeElement('imageLoadAddress', KCSUBTYPE_TYPE.KC_ST_UINT64, 8, 0, 0),
KCSubTypeElement('imageUUID', KCSUBTYPE_TYPE.KC_ST_UINT8, KCSubTypeElement.GetSizeForArray(16, 1), 8, 1),
KCSubTypeElement('imageSlidBaseAddress', KCSUBTYPE_TYPE.KC_ST_UINT64, 8, 24, 0),
+ KCSubTypeElement('sharedCacheSlidFirstMapping', KCSUBTYPE_TYPE.KC_ST_UINT64, 8, 32, 0),
),
'shared_cache_dyld_load_info',
legacy_size = 0x18
(
KCSubTypeElement.FromBasicCtype('stackshot_duration', KCSUBTYPE_TYPE.KC_ST_UINT64, 0),
KCSubTypeElement.FromBasicCtype('stackshot_duration_outer', KCSUBTYPE_TYPE.KC_ST_UINT64, 8),
+ KCSubTypeElement.FromBasicCtype('stackshot_duration_prior', KCSUBTYPE_TYPE.KC_ST_UINT64, 16),
), 'stackshot_duration', merge=True
)
return (exit_code, output_str)
-parser = argparse.ArgumentParser(description="Decode a kcdata binary file.")
-parser.add_argument("-l", "--listtypes", action="store_true", required=False, default=False,
- help="List all known types",
- dest="list_known_types")
-
-parser.add_argument("-s", "--stackshot", required=False, default=False,
- help="Generate a stackshot report file",
- dest="stackshot_file")
-
-parser.add_argument("--multiple", help="look for multiple stackshots in a single file", action='store_true')
-
-parser.add_argument("-p", "--plist", required=False, default=False,
- help="output as plist", action="store_true")
-
-parser.add_argument("-S", "--sdk", required=False, default="", help="sdk property passed to xcrun command to find the required tools. Default is empty string.", dest="sdk")
-parser.add_argument("--pretty", default=False, action='store_true', help="make the output a little more human readable")
-parser.add_argument("--incomplete", action='store_true', help="accept incomplete data")
-parser.add_argument("kcdata_file", type=argparse.FileType('r'), help="Path to a kcdata binary file.")
-
-class VerboseAction(argparse.Action):
- def __call__(self, parser, namespace, values, option_string=None):
- logging.basicConfig(level=logging.INFO, stream=sys.stderr, format='%(message)s')
-parser.add_argument('-v', "--verbose", action=VerboseAction, nargs=0)
-
@contextlib.contextmanager
def data_from_stream(stream):
try:
value = '%02X%02X%02X%02X-%02X%02X-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X' % tuple(value)
elif 'address' in key.lower() and isinstance(value, (int, long)):
value = '0x%X' % value
- elif key == 'lr':
+ elif key == 'lr' or key == 'sharedCacheSlidFirstMapping':
value = '0x%X' % value
elif key == 'thread_waitinfo':
value = map(formatWaitInfo, value)
if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description="Decode a kcdata binary file.")
+ parser.add_argument("-l", "--listtypes", action="store_true", required=False, default=False,
+ help="List all known types",
+ dest="list_known_types")
+
+ parser.add_argument("-s", "--stackshot", required=False, default=False,
+ help="Generate a stackshot report file",
+ dest="stackshot_file")
+
+ parser.add_argument("--multiple", help="look for multiple stackshots in a single file", action='store_true')
+
+ parser.add_argument("-p", "--plist", required=False, default=False,
+ help="output as plist", action="store_true")
+
+ parser.add_argument("-S", "--sdk", required=False, default="", help="sdk property passed to xcrun command to find the required tools. Default is empty string.", dest="sdk")
+ parser.add_argument("--pretty", default=False, action='store_true', help="make the output a little more human readable")
+ parser.add_argument("--incomplete", action='store_true', help="accept incomplete data")
+ parser.add_argument("kcdata_file", type=argparse.FileType('r'), help="Path to a kcdata binary file.")
+
+ class VerboseAction(argparse.Action):
+ def __call__(self, parser, namespace, values, option_string=None):
+ logging.basicConfig(level=logging.INFO, stream=sys.stderr, format='%(message)s')
+ parser.add_argument('-v', "--verbose", action=VerboseAction, nargs=0)
+
args = parser.parse_args()
if args.multiple and args.stackshot_file:
from utils import *
from core.lazytarget import *
from misc import *
+from kcdata import kcdata_item_iterator, KCObject, GetTypeForName, KCCompressedBufferObject
from collections import namedtuple
+import heapq
# From the defines in bsd/sys/kdebug.h:
print GetKperfStatus()
-class KDCPU(object):
- def __init__(self, store, curidx):
- self.store = store
- self.curidx = curidx
- self.oldest_time = None
+class KDEvent(object):
+ """
+ Wrapper around kevent pointer that handles sorting logic.
+ """
+ def __init__(self, timestamp, kevent):
+ self.kevent = kevent
+ self.timestamp = timestamp
+ def get_kevent(self):
+ return self.kevent
-def IterateKdebugEvents():
+ def __eq__(self, other):
+ return self.timestamp == other.timestamp
+
+ def __lt__(self, other):
+ return self.timestamp < other.timestamp
+
+ def __gt__(self, other):
+ return self.timestamp > other.timestamp
+
+
+class KDCPU(object):
"""
- Yield events from the in-memory kdebug trace buffers.
+ Represents all events from a single CPU.
"""
- ctrl = kern.globals.kd_ctrl_page
+ def __init__(self, cpuid):
+ self.cpuid = cpuid
+ self.iter_store = None
+
+ kdstoreinfo = kern.globals.kdbip[cpuid]
+ self.kdstorep = kdstoreinfo.kd_list_head
+
+ if self.kdstorep.raw == xnudefines.KDS_PTR_NULL:
+ # Returns an empty iterrator. It will immediatelly stop at
+ # first call to __next__().
+ return
- def get_kdstore(kdstorep):
+ self.iter_store = self.get_kdstore(self.kdstorep)
+
+ # XXX Doesn't have the same logic to avoid un-mergeable events
+ # (respecting barrier_min and bufindx) as the C code.
+
+ self.iter_idx = self.iter_store.kds_readlast
+
+ def get_kdstore(self, kdstorep):
"""
See POINTER_FROM_KDSPTR.
"""
buf = kern.globals.kd_bufs[kdstorep.buffer_index]
return addressof(buf.kdsb_addr[kdstorep.offset])
- def get_kdbuf_timestamp(kdbuf):
- time_cpu = kdbuf.timestamp
- return unsigned(time_cpu)
+ # Event iterator implementation returns KDEvent instance
- if (ctrl.kdebug_flags & xnudefines.KDBG_BFINIT) == 0:
- return
+ def __iter__(self):
+ return self
- barrier_min = ctrl.oldest_time
+ def __next__(self):
+ # This CPU is out of events
+ if self.iter_store is None:
+ raise StopIteration
- if (ctrl.kdebug_flags & xnudefines.KDBG_WRAPPED) != 0:
- # TODO Yield a wrap event with the barrier_min timestamp.
- pass
+ if self.iter_idx == self.iter_store.kds_bufindx:
+ self.iter_store = None
+ raise StopIteration
- # Set up CPU state for merging events.
- ncpus = ctrl.kdebug_cpus
- cpus = []
- for cpu in range(ncpus):
- kdstoreinfo = kern.globals.kdbip[cpu]
- storep = kdstoreinfo.kd_list_head
- store = None
- curidx = 0
- if storep.raw != xnudefines.KDS_PTR_NULL:
- store = get_kdstore(storep)
- curidx = store.kds_readlast
- # XXX Doesn't have the same logic to avoid un-mergeable events
- # (respecting barrier_min and bufindx) as the C code.
+ keventp = addressof(self.iter_store.kds_records[self.iter_idx])
+ timestamp = unsigned(keventp.timestamp)
- cpus.append(KDCPU(store, curidx))
+ # check for writer overrun
+ if timestamp < self.iter_store.kds_timestamp:
+ raise StopIteration
- while True:
- earliest_time = 0xffffffffffffffff
- min_cpu = None
- for cpu in cpus:
- if not cpu.store:
- continue
+ # Advance iterator
+ self.iter_idx += 1
- # Check for overrunning the writer, which also indicates the CPU is
- # out of events.
- if cpu.oldest_time:
- timestamp = cpu.oldest_time
+ if self.iter_idx == xnudefines.EVENTS_PER_STORAGE_UNIT:
+ snext = self.iter_store.kds_next
+ if snext.raw == xnudefines.KDS_PTR_NULL:
+ # Terminate iteration in next loop. Current element is the
+ # last one in this CPU buffer.
+ self.iter_store = None
else:
- timestamp = get_kdbuf_timestamp(
- addressof(cpu.store.kds_records[cpu.curidx]))
- cpu.oldest_time = timestamp
+ self.iter_store = self.get_kdstore(snext)
+ self.iter_idx = self.iter_store.kds_readlast
- if timestamp < cpu.store.kds_timestamp:
- cpu.store = None
- continue
+ return KDEvent(timestamp, keventp)
- if timestamp < earliest_time:
- earliest_time = timestamp
- min_cpu = cpu
+ # Python 2 compatibility
+ def next(self):
+ return self.__next__()
- # Out of events.
- if not min_cpu:
- return
- yield min_cpu.store.kds_records[min_cpu.curidx]
- min_cpu.oldest_time = None
+def IterateKdebugEvents():
+ """
+ Yield events from the in-memory kdebug trace buffers.
+ """
+ ctrl = kern.globals.kd_ctrl_page
- min_cpu.curidx += 1
- if min_cpu.curidx == xnudefines.EVENTS_PER_STORAGE_UNIT:
- next = min_cpu.store.kds_next
- if next.raw == xnudefines.KDS_PTR_NULL:
- min_cpu.store = None
- min_cpu.curidx = None
- else:
- min_cpu.store = get_kdstore(next)
- min_cpu.curidx = min_cpu.store.kds_readlast
+ if (ctrl.kdebug_flags & xnudefines.KDBG_BFINIT) == 0:
+ return
+
+ barrier_min = ctrl.oldest_time
- # This CPU is out of events.
- if min_cpu.curidx == min_cpu.store.kds_bufindx:
- min_cpu.store = None
- continue
+ if (ctrl.kdebug_flags & xnudefines.KDBG_WRAPPED) != 0:
+ # TODO Yield a wrap event with the barrier_min timestamp.
+ pass
+
+ # Merge sort all events from all CPUs.
+ cpus = [KDCPU(cpuid) for cpuid in range(ctrl.kdebug_cpus)]
+
+ for event in heapq.merge(*cpus):
+ yield event.get_kevent()
def GetKdebugEvent(event):
continue
event = process.ReadMemory(
- unsigned(addressof(event)), event_size, error)
+ unsigned(event), event_size, error)
file_offset += event_size
f.write(event)
written_nevents += 1
kcdata_length = unsigned(kcdata.kcd_length)
if kcdata_addr != 0 and kcdata_length != 0:
print('writing stackshot')
- f.write(struct.pack(CHUNKHDR_PACK, SSHOT_TAG, 1, 0, kcdata_length))
- file_offset += 16
if verbose:
- print('stackshot is {} bytes long'.format(kcdata_length))
print('stackshot starts at offset {}'.format(file_offset))
+ print('stackshot is {} bytes long'.format(kcdata_length))
ssdata = process.ReadMemory(kcdata_addr, kcdata_length, error)
+ magic = struct.unpack('I', ssdata[:4])
+ if magic[0] == GetTypeForName('KCDATA_BUFFER_BEGIN_COMPRESSED'):
+ if verbose:
+ print('found compressed stackshot')
+ iterator = kcdata_item_iterator(ssdata)
+ for item in iterator:
+ kcdata_buffer = KCObject.FromKCItem(item)
+ if isinstance(kcdata_buffer, KCCompressedBufferObject):
+ kcdata_buffer.ReadItems(iterator)
+ decompressed = kcdata_buffer.Decompress(ssdata)
+ ssdata = decompressed
+ kcdata_length = len(ssdata)
+ if verbose:
+ print(
+ 'compressed stackshot is {} bytes long'.
+ format(kcdata_length))
+
+ f.write(struct.pack(CHUNKHDR_PACK, SSHOT_TAG, 1, 0, kcdata_length))
+ file_offset += 16
+
f.write(ssdata)
file_offset += kcdata_length
if verbose:
(mcs.mbcl_total - total - mcs.mbcl_infree),
mcs.mbcl_fail_cnt, mbuf.mtbl_cache.mc_waiter_cnt,
mcs.mbcl_notified, mcs.mbcl_purge_cnt,
- mbuf.mtbl_maxlimit
- )
+ mbuf.mtbl_maxlimit)
# EndMacro: mbuf_stat
# Macro: mbuf_walkpkt
addr >>= 64 - bits - shift
return kern.GetValueFromAddress(addr, type_str)
-def IterateZPerCPU(root, element_type):
+def GetZPerCPU(root, cpu, element_type = None):
""" Iterates over a percpu variable
params:
root - value : Value object for per-cpu variable
+ cpu - int : the CPU number
element_type - str : Type of element
returns:
one slot
"""
pagesize = kern.globals.page_size
mangle = 1 << (8 * kern.ptrsize - 1)
+ if element_type is None:
+ element_type = root.GetSBValue().GetType()
+ return kern.GetValueFromAddress((int(root) | mangle) + cpu * pagesize, element_type)
+
+def IterateZPerCPU(root, element_type = None):
+ """ Iterates over a percpu variable
+ params:
+ root - value : Value object for per-cpu variable
+ element_type - str : Type of element
+ returns:
+ one slot
+ """
for i in range(0, kern.globals.zpercpu_early_count):
- yield kern.GetValueFromAddress((int(root) | mangle) + i * pagesize, element_type)
+ yield GetZPerCPU(root, i, element_type)
@lldb_command('showzpcpu', "S")
def ShowZPerCPU(cmd_args=None, cmd_options={}):
Helper class that helpers walking metadata
"""
- @classmethod
- def _looksForeign(cls, addr):
- if addr & (kern.globals.page_size - 1):
- return False
- try:
- meta = kern.GetValueFromAddress(addr, "struct zone_page_metadata *")
- return meta.zm_foreign_cookie[0] == 0x123456789abcdef
- except:
- return False
-
def __init__(self, addr, isPageIndex = False):
global kern
pagesize = kern.globals.page_size
zone_info = kern.GetGlobalVariable('zone_info')
- self.zone_map_min = unsigned(zone_info.zi_map_range.min_address)
- self.zone_map_max = unsigned(zone_info.zi_map_range.max_address)
- self.zone_meta_min = unsigned(zone_info.zi_meta_range.min_address)
- self.zone_meta_max = unsigned(zone_info.zi_meta_range.max_address)
+ def load_range(var):
+ return (unsigned(var.min_address), unsigned(var.max_address))
+
+ def in_range(x, r):
+ return x >= r[0] and x <= r[1]
+
+ FOREIGN = GetEnumValue('zone_addr_kind_t', 'ZONE_ADDR_FOREIGN')
+ NATIVE = GetEnumValue('zone_addr_kind_t', 'ZONE_ADDR_NATIVE')
+
+ self.meta_range = load_range(zone_info.zi_meta_range)
+ self.native_range = load_range(zone_info.zi_map_range[NATIVE])
+ self.foreign_range = load_range(zone_info.zi_map_range[FOREIGN])
+ self.addr_base = min(self.foreign_range[0], self.native_range[0])
addr = unsigned(addr)
if isPageIndex:
self.address = addr
- if self.zone_meta_min <= addr and addr < self.zone_meta_max:
+ if in_range(addr, self.meta_range):
self.kind = 'Metadata'
- addr -= (addr - self.zone_meta_min) % sizeof('struct zone_page_metadata')
+ addr -= addr % sizeof('struct zone_page_metadata')
self.meta_addr = addr
self.meta = kern.GetValueFromAddress(addr, "struct zone_page_metadata *")
- self.page_addr = self.zone_map_min + ((addr - self.zone_meta_min) / sizeof('struct zone_page_metadata') * pagesize)
- self.first_offset = 0
- elif self.zone_map_min <= addr and addr < self.zone_map_max:
+ self.page_addr = self.addr_base + ((addr - self.meta_range[0]) / sizeof('struct zone_page_metadata') * pagesize)
+ elif in_range(addr, self.native_range) or in_range(addr, self.foreign_range):
addr &= ~(pagesize - 1)
- page_idx = (addr - self.zone_map_min) / pagesize
+ page_idx = (addr - self.addr_base) / pagesize
self.kind = 'Element'
self.page_addr = addr
- self.meta_addr = self.zone_meta_min + page_idx * sizeof('struct zone_page_metadata')
+ self.meta_addr = self.meta_range[0] + page_idx * sizeof('struct zone_page_metadata')
self.meta = kern.GetValueFromAddress(self.meta_addr, "struct zone_page_metadata *")
- self.first_offset = 0
- elif ZoneMeta._looksForeign(addr):
- self.kind = 'Element (F)'
- addr &= ~(pagesize - 1)
- self.page_addr = addr
- self.meta_addr = addr
- self.meta = kern.GetValueFromAddress(addr, "struct zone_page_metadata *")
- self.first_offset = 32 # ZONE_FOREIGN_PAGE_FIRST_OFFSET in zalloc.c
else:
self.kind = 'Unknown'
self.meta = None
self.page_addr = 0
self.meta_addr = 0
- self.first_offset = 0
+
+ if self.meta:
+ self.zone = addressof(kern.globals.zone_array[self.meta.zm_index])
+ else:
+ self.zone = None
def isSecondaryPage(self):
- return self.meta and self.meta.zm_secondary_page
+ return self.meta and self.meta.zm_chunk_len >= 0xe
def getPageCount(self):
- return self.meta and self.meta.zm_page_count or 0
+ n = self.meta and self.meta.zm_chunk_len or 0
+ if self.zone and self.zone.z_percpu:
+ n *= kern.globals.zpercpu_early_count
+ return n
+
+ def getAllocAvail(self):
+ if not self.meta: return 0
+ chunk_len = unsigned(self.meta.zm_chunk_len)
+ page_size = unsigned(kern.globals.page_size)
+ return chunk_len * page_size / self.zone.z_elem_size
def getAllocCount(self):
- return self.meta and self.meta.zm_alloc_count or 0
+ if not self.meta: return 0
+ return self.meta.zm_alloc_size / self.zone.z_elem_size
def getReal(self):
if self.isSecondaryPage():
- return ZoneMeta(self.meta - self.meta.zm_page_count)
+ return ZoneMeta(unsigned(self.meta) - sizeof('struct zone_page_metadata') * unsigned(self.meta.zm_page_index))
return self
- def getFreeList(self):
- if self.meta and self.meta.zm_freelist_offs != unsigned(0xffff):
- return kern.GetValueFromAddress(self.page_addr + self.meta.zm_freelist_offs, 'vm_offset_t *')
- return 0
+ def getElementAddress(self, addr):
+ meta = self.getReal()
+ esize = meta.zone.z_elem_size
+ start = meta.page_addr
- def iterateFreeList(self):
- cur = self.getFreeList()
- while cur:
- yield cur
+ if esize == 0:
+ return None
+
+ estart = addr - start
+ return unsigned(start + estart - (estart % esize))
+
+ def getInlineBitmapChunkLength(self):
+ if self.zone.z_percpu:
+ return unsigned(self.zone.z_chunk_pages)
+ return unsigned(self.meta.zm_chunk_len)
+
+ def getBitmapSize(self):
+ if not self.meta or self.zone.z_permanent or not self.meta.zm_chunk_len:
+ return 0
+ if self.meta.zm_inline_bitmap:
+ return -4 * self.getInlineBitmapChunkLength()
+ return 8 << (unsigned(self.meta.zm_bitmap) & 0x7);
+
+ def getBitmap(self):
+ if not self.meta or self.zone.z_permanent or not self.meta.zm_chunk_len:
+ return 0
+ if self.meta.zm_inline_bitmap:
+ return unsigned(addressof(self.meta.zm_bitmap))
+ bbase = unsigned(kern.globals.zone_info.zi_bits_range.min_address)
+ index = unsigned(self.meta.zm_bitmap) & ~0x7
+ return bbase + index;
+
+ def getFreeCountSlow(self):
+ if not self.meta or self.zone.z_permanent or not self.meta.zm_chunk_len:
+ return self.getAllocAvail() - self.getAllocCount()
+
+ n = 0
+ if self.meta.zm_inline_bitmap:
+ for i in xrange(0, self.getInlineBitmapChunkLength()):
+ m = kern.GetValueFromAddress(self.meta_addr + i * 16,
+ 'struct zone_page_metadata *');
+ bits = unsigned(m.zm_bitmap)
+ while bits:
+ n += 1
+ bits &= bits - 1
+ else:
+ bitmap = kern.GetValueFromAddress(self.getBitmap(), 'uint64_t *')
+ for i in xrange(0, 1 << (unsigned(self.meta.zm_bitmap) & 0x7)):
+ bits = unsigned(bitmap[i])
+ while bits:
+ n += 1
+ bits &= bits - 1
+ return n
+
+ def isElementFree(self, addr):
+ meta = self.meta
+
+ if not meta or self.zone.z_permanent or not meta.zm_chunk_len:
+ return True
+
+ start = self.page_addr
+ esize = self.zone.z_elem_size
+ eidx = (addr - start) / esize
+
+ if meta.zm_inline_bitmap:
+ i = eidx / 32
+ m = unsigned(meta) + sizeof('struct zone_page_metadata') * i
+ bits = kern.GetValueFromAddress(m, meta).zm_bitmap
+ return (bits & (1 << (eidx % 32))) != 0
- cur = dereference(cast(cur, 'vm_offset_t *'))
- cur = unsigned(cur) ^ unsigned(kern.globals.zp_nopoison_cookie)
- cur = kern.GetValueFromAddress(cur, 'vm_offset_t *')
+ else:
+ bitmap = kern.GetValueFromAddress(self.getBitmap(), 'uint64_t *')
+ bits = unsigned(bitmap[eidx / 64])
+ return (bits & (1 << (eidx % 64))) != 0
def iterateElements(self):
if self.meta is None:
return
- esize = self.getZone().z_elem_size
- offs = self.first_offset
- end = kern.globals.page_size
- if not self.meta.zm_percpu:
- end *= self.meta.zm_page_count
+ esize = self.zone.z_elem_size
+ start = 0
+ end = unsigned(kern.globals.page_size) * self.meta.zm_chunk_len
+ end -= end % esize
- while offs + esize <= end:
- yield kern.GetValueFromAddress(self.page_addr + offs, 'void *')
- offs += esize
-
- def getZone(self):
- if self.meta:
- return kern.globals.zone_array[self.meta.zm_index]
- return None
+ for offs in xrange(start, end, esize):
+ yield unsigned(self.page_addr + offs)
@lldb_type_summary(['zone_page_metadata'])
-@header("{:<18s} {:<18s} {:>8s} {:>8s} {:<18s} {:<20s}".format('ZONE_METADATA', 'FREELIST', 'PG_CNT', 'ALLOC_CNT', 'ZONE', 'NAME'))
+@header("{:<20s} {:<10s} {:<10s} {:<24s} {:<20s} {:<20s}".format(
+ 'METADATA', 'PG_CNT', 'ALLOC_CNT', 'BITMAP', 'ZONE', 'NAME'))
def GetZoneMetadataSummary(meta):
""" Summarize a zone metadata object
params: meta - obj representing zone metadata in the kernel
out_str = 'Metadata Description:\n' + GetZoneMetadataSummary.header + '\n'
if meta.isSecondaryPage():
- out_str += "{:#018x} {:#018x} {:8d} {:8d} {:#018x} {:s}\n".format(
- meta.meta_addr, 0, 0, 0, 0, '(fake multipage meta)')
+ out_str += "{:<#20x} {:<10d} {:<10d} {:<#18x} @{:<4d} {:<#20x} {:s}\n".format(
+ meta.meta_addr, 0, 0, 0, 0, 0, '(fake multipage meta)')
meta = meta.getReal()
- zinfo = meta.getZone()
- out_str += "{:#018x} {:#018x} {:8d} {:8d} {:#018x} {:s}".format(
- meta.meta_addr, meta.getFreeList(), meta.getPageCount(), meta.getAllocCount(),
- addressof(zinfo), ZoneName(zinfo))
+ out_str += "{:<#20x} {:<10d} {:<10d} {:<#18x} @{:<4d} {:<#20x} {:s}".format(
+ meta.meta_addr, meta.getPageCount(), meta.getAllocCount(),
+ meta.getBitmap(), meta.getBitmapSize(), meta.zone, ZoneName(meta.zone))
return out_str
-@header("{:<18s} {:>10s} {:>18s} {:>18s} {:<10s}".format(
- 'ADDRESS', 'TYPE', 'METADATA', 'PAGE_ADDR', 'OFFSET'))
+@header("{:<20s} {:<10s} {:<10s} {:<20s} {:<10s}".format(
+ 'ADDRESS', 'TYPE', 'STATUS', 'PAGE_ADDR', 'OFFSET'))
def WhatIs(addr):
""" Information about kernel pointer
"""
global kern
meta = ZoneMeta(addr)
+ estart = None
if meta.meta is None:
out_str = "Address {:#018x} is outside of any zone map ({:#018x}-{:#018x})\n".format(
- addr, meta.zone_map_min, meta.zone_map_max)
+ addr, meta.native_range[0], meta.native_range[-1] + 1)
else:
if meta.kind[0] == 'E': # element
page_offset_str = "{:d}/{:d}K".format(
addr - meta.page_addr, kern.globals.page_size / 1024)
+ estart = meta.getElementAddress(addr)
+ if estart is None:
+ status = "Unattributed"
+ elif meta.isElementFree(estart):
+ status = "Free"
+ else:
+ status = "Allocated"
else:
page_offset_str = "-"
+ status = "-"
out_str = WhatIs.header + '\n'
- out_str += "{meta.address:#018x} {meta.kind:>10s} {meta.meta_addr:#018x} {meta.page_addr:#018x} {:<10s}\n\n".format(
- page_offset_str, meta=meta)
+ out_str += "{meta.address:<#20x} {meta.kind:<10s} {status:<10s} {meta.page_addr:<#20x} {:<10s}\n\n".format(
+ page_offset_str, meta=meta, status=status)
out_str += GetZoneMetadataSummary(meta) + '\n\n'
print out_str
- if meta.kind[0] == 'E':
+ if estart is not None:
print "Hexdump:\n"
- meta = meta.getReal()
- esize = meta.getZone().z_elem_size
- start = meta.page_addr
-
- estart = addr - (start - meta.first_offset)
- estart = start + estart - (estart % esize)
+ meta = meta.getReal()
+ esize = meta.zone.z_elem_size
+ start = meta.page_addr
+ marks = {unsigned(addr): ">"}
try:
if estart > start:
data_array = kern.GetValueFromAddress(estart - 16, "uint8_t *")
print_hex_data(data_array[0:16], estart - 16, "")
- print "------------------------------------------------------------------"
except:
pass
+ print "------------------------------------------------------------------"
try:
data_array = kern.GetValueFromAddress(estart, "uint8_t *")
- print_hex_data(data_array[0:esize], estart, "")
+ print_hex_data(data_array[0:esize], estart, "", marks)
except:
+ print "*** unable to read memory ***"
pass
+ print "------------------------------------------------------------------"
try:
- print "------------------------------------------------------------------"
data_array = kern.GetValueFromAddress(estart + esize, "uint8_t *")
print_hex_data(data_array[0:16], estart + esize, "")
except:
# Macro: showzcache
@lldb_type_summary(['zone','zone_t'])
-@header("{:<18s} {:>5s} {:>10s} {:>12s} {:>12s} {:>9s} {:>9s} {:>9s} {:>9s} {:>9s} {:<20s}".format(
-'ZONE', 'ELTS', 'D FULL/EMPTY', 'ALLOCS', 'FREES', 'D_SWAP', 'D_FILL', 'D_DRAIN', 'D_GC', 'D_FAIL', 'NAME'))
-
-def GetZoneCacheSummary(zone, O):
- """ Summarize a zone's cache with important information.
- params:
- zone: value - obj representing a zone in kernel
- returns:
- str - summary of the zone's cache contents
- """
- format_string = '{:#018x} {:>5d} {:>4d} / {:>4d} {:>12,d} {:>12,d} {:>9,d} {:>9,d} {:>9,d} {:>9,d} {:>9,d} {:<20s}'
- mag_capacity = kern.GetGlobalVariable('magazine_element_count')
- depot_capacity = kern.GetGlobalVariable('depot_element_count')
-
- cache_elem_count = 0
- allocs = 0
- frees = 0
-
- if zone.__getattr__('cpu_cache_enabled') :
- for cache in IterateZPerCPU(zone.zcache.zcc_pcpu, 'struct zcc_per_cpu_cache *'):
- cache_elem_count += cache.current.zcc_magazine_index
- cache_elem_count += cache.previous.zcc_magazine_index
- allocs += cache.zcc_allocs
- frees += cache.zcc_frees
-
- depot = zone.zcache.zcc_depot
- cache_elem_count += depot.zcc_depot_index * mag_capacity
- print O.format(format_string, zone, cache_elem_count,
- depot.zcc_depot_index, depot_capacity - depot.zcc_depot_index,
- allocs, frees, depot.zcc_swap, depot.zcc_fill, depot.zcc_drain,
- depot.zcc_gc, depot.zcc_fail, ZoneName(zone))
-
-@lldb_command('showzcache', fancy=True)
-def ZcachePrint(cmd_args=None, cmd_options={}, O=None):
- """ Routine to print a summary listing of all the kernel zones cache contents
- All columns are printed in decimal
- """
- global kern
- with O.table(GetZoneCacheSummary.header):
- for zval in kern.zones:
- if zval.__getattr__('cpu_cache_enabled') :
- GetZoneCacheSummary(zval, O)
-
-# EndMacro: showzcache
-
-# Macro: showzcachecpu
-
-@lldb_type_summary(['zone','zone_t'])
-@header("{:18s} {:32s} {:<10s} {:<10s}".format(
-'ZONE', 'NAME', 'CACHE_ELTS', 'CPU_INFO'))
-
-def GetZoneCacheCPUSummary(zone, O):
+@header("{:18s} {:32s} {:>6s} {:>6s} {:>6s} {:>6s} {:>6s} {:>6s} {:<s}".format(
+ 'ZONE', 'NAME', 'WSS', 'CONT', 'USED', 'FREE', 'CACHED', 'RECIRC', 'CPU_CACHES'))
+def GetZoneCacheCPUSummary(zone, verbose, O):
""" Summarize a zone's cache broken up per cpu
params:
zone: value - obj representing a zone in kernel
returns:
str - summary of the zone's per CPU cache contents
"""
- format_string = '{:#018x} {:32s} {:10d} {cpuinfo:s}'
+ format_string = '{zone:#018x} {:32s} '
+ format_string += '{zone.z_elems_free_wss:6d} {cont:6.2f} '
+ format_string += '{used:6d} {zone.z_elems_free:6d} '
+ format_string += '{cached:6d} {recirc:6d} {cpuinfo:s}'
cache_elem_count = 0
cpu_info = ""
- per_cpu_count = 0
- mag_capacity = kern.GetGlobalVariable('magazine_element_count')
+ mag_capacity = unsigned(kern.GetGlobalVariable('zc_magazine_size'))
depot_capacity = kern.GetGlobalVariable('depot_element_count')
- if zone.__getattr__('cpu_cache_enabled') :
- i = 0
- for cache in IterateZPerCPU(zone.zcache.zcc_pcpu, 'struct zcc_per_cpu_cache *'):
- if i is not 0:
- cpu_info += ", "
- per_cpu_count = cache.current.zcc_magazine_index
- per_cpu_count += cache.previous.zcc_magazine_index
- cache_elem_count += per_cpu_count
- cpu_info += "CPU {:d}: {:5}".format(i,per_cpu_count)
- i += 1
- cache_elem_count += zone.zcache.zcc_depot.zcc_depot_index * mag_capacity
-
- print O.format(format_string, zone, ZoneName(zone), cache_elem_count,cpuinfo = cpu_info)
+ if zone.z_pcpu_cache:
+ if verbose:
+ cpu_info = None
+ for cache in IterateZPerCPU(zone.z_pcpu_cache):
+ if cpu_info is None:
+ cpu_info = "{ "
+ else:
+ cpu_info += ", "
+ per_cpu_count = unsigned(cache.zc_alloc_cur)
+ per_cpu_count += unsigned(cache.zc_free_cur)
+ per_cpu_count += unsigned(cache.zc_depot_cur) * mag_capacity
+ cache_elem_count += per_cpu_count
+ cpu_info += "{:3d} /{cache.zc_depot_max:3d}".format(per_cpu_count, cache=cache)
+ cpu_info += " }"
+ else:
+ depot_cur = 0
+ depot_max = 0
+ for cache in IterateZPerCPU(zone.z_pcpu_cache):
+ depot_cur += unsigned(cache.zc_alloc_cur)
+ depot_cur += unsigned(cache.zc_free_cur)
+ cache_elem_count += unsigned(cache.zc_depot_cur) * mag_capacity
+ depot_max += unsigned(cache.zc_depot_max)
+ cache_elem_count += depot_cur
+
+ cpus = unsigned(kern.globals.zpercpu_early_count)
+ cpu_info = "total: {:3d} / {:3d}, avg: {:5.1f} / {:5.1f}".format(
+ depot_cur, depot_max, float(depot_cur) / cpus, float(depot_max) / cpus)
+
+
+ print O.format(format_string, ZoneName(zone), cached=cache_elem_count,
+ used=zone.z_elems_avail - cache_elem_count - zone.z_elems_free,
+ cont=float(zone.z_contention_wma) / 256.,
+ recirc=zone.z_recirc_cur * mag_capacity,
+ zone=zone, cpuinfo = cpu_info)
-@lldb_command('showzcachecpu', fancy=True)
+@lldb_command('showzcache', fancy=True)
def ZcacheCPUPrint(cmd_args=None, cmd_options={}, O=None):
- """ Routine to print a summary listing of all the kernel zones cache contents
- All columns are printed in decimal
+ """
+ Routine to print a summary listing of all the kernel zones cache contents
+
+ Usage: showzcache [-V]
+
+ Use -V to see more detailed output
"""
global kern
+ verbose = "-V" in cmd_options
with O.table(GetZoneCacheCPUSummary.header):
- for zval in kern.zones:
- if zval.__getattr__('cpu_cache_enabled'):
- GetZoneCacheCPUSummary(zval, O)
+ if len(cmd_args) == 1:
+ zone = kern.GetValueFromAddress(cmd_args[0], 'struct zone *')
+ GetZoneCacheCPUSummary(zone, verbose, O);
+ else:
+ for zval in kern.zones:
+ if zval.z_self:
+ GetZoneCacheCPUSummary(zval, verbose, O)
-# EndMacro: showzcachecpu
+# EndMacro: showzcache
# Macro: zprint
returns:
zone - python dictionary with zone stats
"""
+ pcpu_scale = 1
+ if zone_val.z_percpu:
+ pcpu_scale = unsigned(kern.globals.zpercpu_early_count)
pagesize = kern.globals.page_size
zone = {}
- zone["free_size"] = zone_val.countfree * zone_val.pcpu_elem_size
- mag_capacity = kern.GetGlobalVariable('magazine_element_count')
- zone["page_count"] = unsigned(zone_val.page_count)
- zone["allfree_page_count"] = unsigned(zone_val.allfree_page_count)
+ mag_capacity = unsigned(kern.GetGlobalVariable('zc_magazine_size'))
+ zone["page_count"] = unsigned(zone_val.z_wired_cur) * pcpu_scale
+ zone["allfree_page_count"] = unsigned(zone_val.z_wired_empty)
+
+ cache_elem_count = 0
+ if zone_val.z_pcpu_cache:
+ for cache in IterateZPerCPU(zone_val.z_pcpu_cache):
+ cache_elem_count += unsigned(cache.zc_alloc_cur)
+ cache_elem_count += unsigned(cache.zc_free_cur)
+ cache_elem_count += unsigned(cache.zc_depot_cur) * mag_capacity
+
+ zone["size"] = zone["page_count"] * pagesize
- zone["size"] = zone_val.page_count * pagesize
- zone["used_size"] = zone["size"] - zone["free_size"]
- zone["element_count"] = zone_val.countavail - zone_val.countfree
+ zone["free_size"] = zone_val.z_elems_free * zone_val.z_elem_size * pcpu_scale
+ zone["cached_size"] = cache_elem_count * zone_val.z_elem_size * pcpu_scale
+ zone["used_size"] = zone["size"] - zone["free_size"] - zone["cached_size"]
- if zone_val.percpu:
+ zone["element_count"] = zone_val.z_elems_avail - zone_val.z_elems_free - cache_elem_count
+ zone["cache_element_count"] = cache_elem_count
+ zone["free_element_count"] = zone_val.z_elems_free
+
+ if zone_val.z_percpu:
zone["allocation_size"] = unsigned(pagesize)
- zone["allocation_ncpu"] = unsigned(zone_val.alloc_pages)
+ zone["allocation_ncpu"] = unsigned(zone_val.z_chunk_pages)
else:
- zone["allocation_size"] = unsigned(zone_val.alloc_pages * pagesize)
+ zone["allocation_size"] = unsigned(zone_val.z_chunk_pages * pagesize)
zone["allocation_ncpu"] = 1
zone["allocation_count"] = zone["allocation_size"] / zone_val.z_elem_size
zone["allocation_waste"] = (zone["allocation_size"] % zone_val.z_elem_size) * zone["allocation_ncpu"]
-
+
if not zone_val.__getattr__("z_self") :
zone["destroyed"] = True
else:
else:
zone[mark[0]] = False
- cache_elem_count = 0
- if zone_val.__getattr__('cpu_cache_enabled') :
- for cache in IterateZPerCPU(zone_val.zcache.zcc_pcpu, 'struct zcc_per_cpu_cache *'):
- cache_elem_count += cache.current.zcc_magazine_index
- cache_elem_count += cache.previous.zcc_magazine_index
- cache_elem_count += zone_val.zcache.zcc_depot.zcc_depot_index * mag_capacity
- zone["cache_element_count"] = cache_elem_count
zone["name"] = ZoneName(zone_val)
if zone_val.exhaustible:
zone["exhaustible"] = True
else:
zone["exhaustible"] = False
- zone["sequester_page_count"] = unsigned(zone_val.sequester_page_count)
- zone["page_count_max"] = unsigned(zone_val.page_count_max)
+ zone["sequester_page_count"] = (unsigned(zone_val.z_va_cur) -
+ unsigned(zone_val.z_wired_cur)) * pcpu_scale
+ zone["page_count_max"] = unsigned(zone_val.z_wired_max) * pcpu_scale
return zone
@lldb_type_summary(['zone','zone_t'])
-@header(("{:<18s} {:_^35s} {:_^24s} {:_^13s} {:_^28s}\n"+
-"{:<18s} {:>11s} {:>11s} {:>11s} {:>8s} {:>7s} {:>7s} {:>6s} {:>6s} {:>8s} {:>6s} {:>5s} {:>7s} {:<18s} {:<20s}").format(
+@header(("{:<18s} {:_^47s} {:_^24s} {:_^13s} {:_^28s}\n"+
+"{:<18s} {:>11s} {:>11s} {:>11s} {:>11s} {:>8s} {:>7s} {:>7s} {:>6s} {:>6s} {:>8s} {:>6s} {:>5s} {:>7s} {:<18s} {:<20s}").format(
'', 'SIZE (bytes)', 'ELEMENTS (#)', 'PAGES', 'ALLOC CHUNK CONFIG',
-'ZONE', 'TOTAL', 'ALLOC', 'FREE', 'ALLOC', 'FREE', 'CACHE', 'COUNT', 'FREE', 'SIZE (P)', 'ELTS', 'WASTE', 'ELT_SZ', 'FLAGS', 'NAME'))
+'ZONE', 'TOTAL', 'ALLOC', 'CACHE', 'FREE', 'ALLOC', 'CACHE', 'FREE', 'COUNT', 'FREE', 'SIZE (P)', 'ELTS', 'WASTE', 'ELT_SZ', 'FLAGS', 'NAME'))
def GetZoneSummary(zone_val, marks, stats):
""" Summarize a zone with important information. See help zprint for description of each field
params:
out_string = ""
zone = GetZone(zone_val, marks)
- format_string = '{zone:#018x} {cur_size:11,d} {used_size:11,d} {free_size:11,d} '
- format_string += '{count_elts:8,d} {zone.countfree:7,d} {cache_elem_count:7,d} '
- format_string += '{zone.page_count:6,d} {zone.allfree_page_count:6,d} '
- format_string += '{alloc_size_kb:3,d}K ({zone.alloc_pages:d}) {alloc_count:6,d} {alloc_waste:5,d} {zone.pcpu_elem_size:7,d} '
+ pcpu_scale = 1
+ if zone_val.z_percpu:
+ pcpu_scale = unsigned(kern.globals.zpercpu_early_count)
+
+ format_string = '{zone:#018x} {zd[size]:11,d} {zd[used_size]:11,d} {zd[cached_size]:11,d} {zd[free_size]:11,d} '
+ format_string += '{zd[element_count]:8,d} {zd[cache_element_count]:7,d} {zone.z_elems_free:7,d} '
+ format_string += '{z_wired_cur:6,d} {z_wired_empty:6,d} '
+ format_string += '{alloc_size_kb:3,d}K ({zone.z_chunk_pages:d}) '
+ format_string += '{zd[allocation_count]:6,d} {zd[allocation_waste]:5,d} {z_elem_size:7,d} '
format_string += '{markings:<18s} {zone_name:<20s}'
markings=""
if zone["destroyed"]:
markings+="I"
-
+
for mark in marks:
if zone[mark[0]]:
markings += mark[1]
markings+=" "
alloc_size_kb = zone["allocation_size"] / 1024
- out_string += format_string.format(zone=zone_val, free_size=zone["free_size"], used_size=zone["used_size"],
- cur_size=zone["size"], count_elts=zone["element_count"], cache_elem_count=zone["cache_element_count"],
- alloc_count=zone["allocation_count"], alloc_size_kb=alloc_size_kb, alloc_waste=zone["allocation_waste"],
- markings=markings, zone_name=zone["name"])
+ out_string += format_string.format(zone=zone_val, zd=zone,
+ z_wired_cur=unsigned(zone_val.z_wired_cur) * pcpu_scale,
+ z_wired_empty=unsigned(zone_val.z_wired_empty) * pcpu_scale,
+ z_elem_size=unsigned(zone_val.z_elem_size) * pcpu_scale,
+ alloc_size_kb=alloc_size_kb, markings=markings, zone_name=zone["name"])
if zone["exhaustible"] :
out_string += " (max: {:d})".format(zone["page_count_max"] * pagesize)
stats["cur_size"] += zone["size"]
stats["used_size"] += zone["used_size"]
+ stats["cached_size"] += zone["cached_size"]
stats["free_size"] += zone["free_size"]
stats["cur_pages"] += zone["page_count"]
stats["free_pages"] += zone["allfree_page_count"]
Output json
All columns are printed in decimal
Legend:
+ ! - zone uses VA sequestering
+ $ - not encrypted during hibernation
+ A - currently trying to allocate more backing memory from kernel_memory_allocate without VM priv
C - collectable
D - destructible
- X - expandable
- $ - not encrypted during hibernation
- H - exhaustible
+ E - Per-cpu caching is enabled for this zone
F - allows foreign memory (memory not allocated from any zone map)
+ G - currently running GC
+ H - exhaustible
+ I - zone was destroyed and is no longer valid
+ L - zone is being monitored by zleaks
M - gzalloc will avoid monitoring this zone
- R - will be refilled when below low water mark
- O - does not allow refill callout to fill zone on noblock allocation
N - zone requires alignment (avoids padding this zone for debugging)
- A - currently trying to allocate more backing memory from kernel_memory_allocate without VM priv
+ O - does not allow refill callout to fill zone on noblock allocation
+ R - will be refilled when below low water mark
S - currently trying to allocate more backing memory from kernel_memory_allocate with VM priv
W - another thread is waiting for more memory
- E - Per-cpu caching is enabled for this zone
- L - zone is being monitored by zleaks
- G - currently running GC
- I - zone was destroyed and is no longer valid
+ X - expandable
+ Z - elements are zeroed on free
"""
global kern
marks = [
["collectable", "C"],
- ["destructible", "D"],
+ ["z_destructible", "D"],
["expandable", "X"],
- ["noencrypt", "$"],
+ ["z_noencrypt", "$"],
["exhaustible", "H"],
- ["allows_foreign", "F"],
- ["prio_refill_count", "R"],
+ ["z_allows_foreign", "F"],
+ ["z_elems_rsv", "R"],
["no_callout", "O"],
["zleak_on", "L"],
- ["expanding_no_vm_priv", "A"],
- ["expanding_vm_priv", "S"],
- ["waiting", "W"],
- ["cpu_cache_enabled", "E"],
+ ["z_expander", "A"],
+ ["z_expander_vm_priv", "S"],
+ ["z_replenish_wait", "W"],
+ ["z_pcpu_cache", "E"],
["gzalloc_exempt", "M"],
["alignment_required", "N"],
- ["va_sequester", "!"]
+ ["z_va_sequester", "!"],
+ ["z_free_zeroes", "Z"]
]
+
stats = {
- "cur_size": 0, "used_size": 0, "free_size": 0,
+ "cur_size": 0, "used_size": 0, "cached_size": 0, "free_size": 0,
"cur_pages": 0, "free_pages": 0, "seq_pages": 0
}
if zval.z_self:
print GetZoneSummary(zval, marks, stats)
- format_string = '{VT.Bold}{name:19s} {stats[cur_size]:11,d} {stats[used_size]:11,d} {stats[free_size]:11,d} '
+ format_string = '{VT.Bold}{name:19s} {stats[cur_size]:11,d} {stats[used_size]:11,d} {stats[cached_size]:11,d} {stats[free_size]:11,d} '
format_string += ' '
format_string += '{stats[cur_pages]:6,d} {stats[free_pages]:6,d}{VT.EndBold} '
format_string += '(sequester: {VT.Bold}{stats[seq_pages]:,d}{VT.EndBold})'
# EndMacro: zprint
-
-# Macro: showzfreelist
-
-def ShowZfreeListHeader(zone):
- """ Helper routine to print a header for zone freelist.
- (Since the freelist does not have a custom type, this is not defined as a Type Summary).
- params:
- zone:zone_t - Zone object to print header info
- returns:
- None
- """
-
- scaled_factor = (unsigned(kern.globals.zp_factor) +
- (unsigned(zone.z_elem_size) >> unsigned(kern.globals.zp_scale)))
-
- out_str = ""
- out_str += "{0: <9s} {1: <12s} {2: <18s} {3: <18s} {4: <6s}\n".format('ELEM_SIZE', 'COUNT', 'NCOOKIE', 'PCOOKIE', 'FACTOR')
- out_str += "{0: <9d} {1: <12d} 0x{2:0>16x} 0x{3:0>16x} {4: <2d}/{5: <2d}\n\n".format(
- zone.z_elem_size, zone.countavail - zone.countfree, kern.globals.zp_nopoison_cookie, kern.globals.zp_poisoned_cookie, zone.zp_count, scaled_factor)
- out_str += "{0: <7s} {1: <18s} {2: <18s} {3: <18s} {4: <18s} {5: <18s} {6: <14s}\n".format(
- 'NUM', 'ELEM', 'NEXT', 'BACKUP', '^ NCOOKIE', '^ PCOOKIE', 'POISON (PREV)')
- print out_str
-
-def ShowZfreeListChain(zone, zfirst, zlimit):
- """ Helper routine to print a zone free list chain
- params:
- zone: zone_t - Zone object
- zfirst: void * - A pointer to the first element of the free list chain
- zlimit: int - Limit for the number of elements to be printed by showzfreelist
- returns:
- None
- """
- current = Cast(zfirst, 'void *')
- while ShowZfreeList.elts_found < zlimit:
- ShowZfreeList.elts_found += 1
- znext = dereference(Cast(current, 'vm_offset_t *'))
- znext = (unsigned(znext) ^ unsigned(kern.globals.zp_nopoison_cookie))
- znext = kern.GetValueFromAddress(znext, 'vm_offset_t *')
- backup_ptr = kern.GetValueFromAddress((unsigned(Cast(current, 'vm_offset_t')) + unsigned(zone.z_elem_size) - sizeof('vm_offset_t')), 'vm_offset_t *')
- backup_val = dereference(backup_ptr)
- n_unobfuscated = (unsigned(backup_val) ^ unsigned(kern.globals.zp_nopoison_cookie))
- p_unobfuscated = (unsigned(backup_val) ^ unsigned(kern.globals.zp_poisoned_cookie))
- poison_str = ''
- if p_unobfuscated == unsigned(znext):
- poison_str = "P ({0: <d})".format(ShowZfreeList.elts_found - ShowZfreeList.last_poisoned)
- ShowZfreeList.last_poisoned = ShowZfreeList.elts_found
- else:
- if n_unobfuscated != unsigned(znext):
- poison_str = "INVALID"
- print "{0: <7d} 0x{1:0>16x} 0x{2:0>16x} 0x{3:0>16x} 0x{4:0>16x} 0x{5:0>16x} {6: <14s}\n".format(
- ShowZfreeList.elts_found, unsigned(current), unsigned(znext),
- unsigned(backup_val), n_unobfuscated, p_unobfuscated, poison_str)
- if unsigned(znext) == 0:
- break
- current = Cast(znext, 'void *')
+# Macro: showzchunks
def ZoneIteratePageQueue(page):
while page.packed_address:
yield meta
page = meta.meta.zm_page_next
-@static_var('elts_found',0)
-@static_var('last_poisoned',0)
-@lldb_command('showzfreelist')
-def ShowZfreeList(cmd_args=None):
- """ Walk the freelist for a zone, printing out the primary and backup next pointers, the poisoning cookies, and the poisoning status of each element.
- Usage: showzfreelist <zone> [iterations]
+@header("{: <20s} {: <20s} {: <20s} {: <25s} {: <10s} {: <8s} {: <4s} {: >9s}".format(
+ "Zone", "Metadata", "Page", "Bitmap", "Kind", "Queue", "Pgs", "Allocs"))
+def GetZoneChunk(meta, queue, O=None):
+ format_string = "{meta.zone: <#20x} "
+ format_string += "{meta.meta_addr: <#20x} {meta.page_addr: <#20x} "
+ format_string += "{bitmap: <#18x} @{bitmap_size:<5d} "
+ format_string += "{kind:<10s} {queue:<8s} {pgs:<1d}/{chunk:<1d} "
+ format_string += "{alloc_count: >4d}/{avail_count: >4d}"
+
+ pgs = int(meta.zone.z_chunk_pages)
+ chunk = pgs
+ if meta.meta.zm_chunk_len >= 0xe:
+ kind = "secondary"
+ pgs -= int(meta.meta.zm_page_index)
+ else:
+ kind = "primary"
+
+ alloc_count=meta.getAllocCount()
+ avail_count=meta.getAllocAvail()
+ free_count=meta.getFreeCountSlow()
+
+ if alloc_count + free_count != avail_count:
+ format_string += " {VT.Red}bitmap mismatch{VT.Default}"
+
+ return O.format(format_string, meta=meta,
+ alloc_count=alloc_count,
+ avail_count=avail_count,
+ bitmap=meta.getBitmap(),
+ bitmap_size=meta.getBitmapSize(),
+ queue=queue, kind=kind, pgs=pgs, chunk=chunk)
+
+def ShowZChunksImpl(zone, extra_addr=None, cmd_options={}, O=None):
+ verbose = '-V' in cmd_options
+
+ def do_content(meta, O, indent=False):
+ with O.table("{:>5s} {:<20s} {:<10s}".format("#", "Element", "State"), indent=indent):
+ i = 0
+ for e in meta.iterateElements():
+ status = "Allocated"
+ if meta.isElementFree(e):
+ status = "Free"
+ print O.format("{:5d} {:<#20x} {:10s}", i, e, status)
+ i += 1
+
+ if extra_addr is None:
+ with O.table(GetZoneChunk.header):
+ for meta in ZoneIteratePageQueue(zone.z_pageq_full):
+ print GetZoneChunk(meta, "full", O)
+ if verbose: do_content(meta, O, indent=True);
+
+ for meta in ZoneIteratePageQueue(zone.z_pageq_partial):
+ print GetZoneChunk(meta, "partial", O)
+ if verbose: do_content(meta, O, indent=True);
+
+ for meta in ZoneIteratePageQueue(zone.z_pageq_empty):
+ print GetZoneChunk(meta, "empty", O)
+ if verbose: do_content(meta, O, indent=True);
+
+ for meta in ZoneIteratePageQueue(zone.z_pageq_va):
+ print GetZoneChunk(meta, "va", O)
+ else:
+ meta = ZoneMeta(extra_addr, isPageIndex="-I" in cmd_options).getReal()
+ with O.table(GetZoneChunk.header):
+ print GetZoneChunk(meta, "N/A", O)
+ do_content(meta, O)
+
+@lldb_command('showzchunks', "IV", fancy=True)
+def ShowZChunks(cmd_args=None, cmd_options={}, O=None):
+ """
+ prints the list of zone chunks, or the content of a given chunk
+
+ Usage: showzchunks <zone> [-I] [-V] [address]
+
+ Use -I to interpret [address] as a page index
+ Use -V to show the contents of all the chunks
- Will walk up to 50 elements by default, pass a limit in 'iterations' to override.
+ [address] can by any address belonging to the zone, or metadata
"""
+
if not cmd_args:
- print ShowZfreeList.__doc__
- return
- ShowZfreeList.elts_found = 0
- ShowZfreeList.last_poisoned = 0
+ return O.error('missing zone argument')
zone = kern.GetValueFromAddress(cmd_args[0], 'struct zone *')
- zlimit = 50
- if len(cmd_args) >= 2:
- zlimit = ArgumentStringToInt(cmd_args[1])
- ShowZfreeListHeader(zone)
- for head in [zone.pages_any_free_foreign, zone.pages_intermediate, zone.pages_all_free]:
- for free_page_meta in ZoneIteratePageQueue(head):
- if ShowZfreeList.elts_found == zlimit:
- break
- zfirst = free_page_meta.getFreeList()
- if zfirst != 0:
- ShowZfreeListChain(zone, zfirst, zlimit)
-
- if ShowZfreeList.elts_found == zlimit:
- print "Stopped at {0: <d} elements!".format(zlimit)
+ if len(cmd_args) == 1:
+ ShowZChunksImpl(zone, cmd_options=cmd_options, O=O)
else:
- print "Found {0: <d} elements!".format(ShowZfreeList.elts_found)
+ addr = unsigned(kern.GetValueFromAddress(cmd_args[1]))
+ ShowZChunksImpl(zone, extra_addr=addr, cmd_options=cmd_options, O=O)
+
+@lldb_command('showallzchunks', fancy=True)
+def ShowAllZChunks(cmd_args=None, cmd_options={}, O=None):
+ """
+ prints the list of all zone chunks
+
+ Usage: showallzchunks
+ """
-# EndMacro: showzfreelist
+ for z in kern.zones:
+ ShowZChunksImpl(z, O=O)
+# EndMacro: showzchunks
# Macro: zstack_showzonesbeinglogged
@lldb_command('zstack_showzonesbeinglogged')
ty = var.GetSBValue().GetTypeName()
r = range(0, ncpu)
- if cpu:
+ if cpu is not None:
r = range(cpu, cpu + 1)
def PCPUSlot(pcpu_var, i):
26: "VM_KERN_MEMORY_SKYWALK",
27: "VM_KERN_MEMORY_LTABLE",
28: "VM_KERN_MEMORY_HV",
+ 29: "VM_KERN_MEMORY_RETIRED",
255:"VM_KERN_MEMORY_ANY",
}
return (kern.Symbolicate(site), "")
return ("", "")
-@lldb_command("showvmtags", "ASJ")
+@lldb_command("showvmtags", "ASJO")
def showvmtags(cmd_args=None, cmd_options={}):
"""Routine to print out info about kernel wired page allocations
usage: showvmtags
iterates kernel map and vm objects totaling allocations by tag.
- usage: showvmtags -S
+ usage: showvmtags -S [-O]
also iterates kernel object pages individually - slow.
- usage: showvmtags -A
+ usage: showvmtags -A [-O]
show all tags, even tags that have no wired count
- usage: showvmtags -J
+ usage: showvmtags -J [-O]
Output json
+
+ -O: list in increasing size order
"""
slow = False
print_json = False
page_size = unsigned(kern.globals.page_size)
nsites = unsigned(kern.globals.vm_allocation_tag_highest) + 1
tagcounts = [0] * nsites
- tagpeaks = [0] * nsites
tagmapped = [0] * nsites
if kern.globals.vm_tag_active_update:
if site:
tagcounts[tag] = unsigned(site.total)
tagmapped[tag] = unsigned(site.mapped)
- tagpeaks[tag] = unsigned(site.peak)
else:
queue_head = kern.globals.vm_objects_wired
for object in IterateQueue(queue_head, 'struct vm_object *', 'wired_objq'):
current["name"] = sitestr
current["size"] = tagcounts[tag]
current["mapped"] = tagmapped[tag]
- current["peak"] = tagpeaks[tag]
current["tag"] = tag
current["tagstr"] = tagstr
current["subtotals"] = []
})
tags.append(current)
+ if "-O" in cmd_options:
+ tags.sort(key = lambda tag: tag['size'])
+
if print_json:
print json.dumps(tags)
else:
print " vm_allocation_tag_highest: {:<7d} ".format(nsites - 1)
- print " {:<7s} {:>7s} {:>7s} {:>7s} {:<50s}".format("tag.kmod", "peak", "size", "mapped", "name")
+ print " {:<7s} {:>7s} {:>7s} {:<50s}".format("tag.kmod", "size", "mapped", "name")
for tag in tags:
if not tagstr:
tagstr = ""
- print " {:>3d}{:<4s} {:>7d}K {:>7d}K {:>7d}K {:<50s}".format(tag["tag"], tag["tagstr"], tag["peak"] / 1024, tag["size"] / 1024, tag["mapped"] / 1024, tag["name"])
+ print " {:>3d}{:<4s} {:>7d}K {:>7d}K {:<50s}".format(tag["tag"], tag["tagstr"], tag["size"] / 1024, tag["mapped"] / 1024, tag["name"])
for sub in tag["subtotals"]:
if ((sub["flags"] & 0x007f) == 0):
kind_str = "named"
else:
kind_str = "from"
- print " {:>7s} {:>7s} {:>7s} {:>7d}K {:s} {:>3d}{:<4s} {:<50s}".format(" ", " ", " ", sub["amount"] / 1024, kind_str, sub["tag"], sub["tagstr"], sub["sitestr"])
+ print " {:>7s} {:>7d}K {:s} {:>3d}{:<4s} {:<50s}".format(" ", sub["amount"] / 1024, kind_str, sub["tag"], sub["tagstr"], sub["sitestr"])
- print "Total: {:>7d}K {:>7d}K".format(total / 1024, totalmapped / 1024)
+ print "Total: {:>7d}K {:>7d}K".format(total / 1024, totalmapped / 1024)
return None
def FindAllocatedElementsInZone(zone):
elements = []
- if not zone.z_self or zone.permanent:
+ if not zone.z_self or zone.z_permanent:
return elements
- for head in [zone.pages_any_free_foreign, zone.pages_all_used_foreign,
- zone.pages_intermediate, zone.pages_all_used]:
-
+ for head in [zone.z_pageq_partial, zone.z_pageq_full]:
for meta in ZoneIteratePageQueue(head):
- free_elements = set(meta.iterateFreeList())
-
for elem in meta.iterateElements():
- if elem in free_elements:
- continue
-
- if elem not in free_elements:
+ if not meta.isElementFree(elem):
elements.append(elem)
- elem += zone.z_elem_size
return elements
"""Routine to print all apple_protect pagers
usage: show_all_apple_protect_pagers
"""
- print "{:>3s} {:<3s} {:<18s} {:>5s} {:>5s} {:>6s} {:<18s} {:<18s} {:<18s} {:<18s} {:<18s} {:<18s}\n".format("#", "#", "pager", "refs", "ready", "mapped", "mo_control", "object", "offset", "crypto_offset", "crypto_start", "crypto_end")
+ print "{:>3s} {:<3s} {:<18s} {:>5s} {:>5s} {:>6s} {:>6s} {:<18s} {:<18s} {:<18s} {:<18s} {:<18s}\n".format("#", "#", "pager", "refs", "ready", "mapped", "cached", "object", "offset", "crypto_offset", "crypto_start", "crypto_end")
qhead = kern.globals.apple_protect_pager_queue
qtype = GetType('apple_protect_pager *')
qcnt = kern.globals.apple_protect_pager_count
shadow = object.shadow
vnode_pager = Cast(object.pager,'vnode_pager *')
filename = GetVnodePath(vnode_pager.vnode_handle)
- print "{:>3}/{:<3d} {: <#018x} {:>5d} {:>5d} {:>6d} {: <#018x} {: <#018x} {:#018x} {:#018x} {:#018x} {:#018x}\n\tcrypt_info:{: <#018x} <decrypt:{: <#018x} end:{:#018x} ops:{: <#018x} refs:{:<d}>\n\tvnode:{: <#018x} {:s}\n".format(idx, qcnt, pager, pager.ref_count, pager.is_ready, pager.is_mapped, pager.pager_control, pager.backing_object, pager.backing_offset, pager.crypto_backing_offset, pager.crypto_start, pager.crypto_end, pager.crypt_info, pager.crypt_info.page_decrypt, pager.crypt_info.crypt_end, pager.crypt_info.crypt_ops, pager.crypt_info.crypt_refcnt, vnode_pager.vnode_handle, filename)
+ if hasattr(pager, "ap_pgr_hdr_ref"):
+ refcnt = pager.ap_pgr_hdr_ref
+ else:
+ refcnt = pager.ap_pgr_hdr.mo_ref
+ print "{:>3}/{:<3d} {: <#018x} {:>5d} {:>5d} {:>6d} {:>6d} {: <#018x} {:#018x} {:#018x} {:#018x} {:#018x}\n\tcrypt_info:{: <#018x} <decrypt:{: <#018x} end:{:#018x} ops:{: <#018x} refs:{:<d}>\n\tvnode:{: <#018x} {:s}\n".format(idx, qcnt, pager, refcnt, pager.is_ready, pager.is_mapped, pager.is_cached, pager.backing_object, pager.backing_offset, pager.crypto_backing_offset, pager.crypto_start, pager.crypto_end, pager.crypt_info, pager.crypt_info.page_decrypt, pager.crypt_info.crypt_end, pager.crypt_info.crypt_ops, pager.crypt_info.crypt_refcnt, vnode_pager.vnode_handle, filename)
+ showvmobject(pager.backing_object, pager.backing_offset, pager.crypto_end - pager.crypto_start, 1, 1)
+
+@lldb_command("show_all_shared_region_pagers")
+def ShowAllSharedRegionPagers(cmd_args=None):
+ """Routine to print all shared_region pagers
+ usage: show_all_shared_region_pagers
+ """
+ print "{:>3s} {:<3s} {:<18s} {:>5s} {:>5s} {:>6s} {:<18s} {:<18s} {:<18s} {:<18s}\n".format("#", "#", "pager", "refs", "ready", "mapped", "object", "offset", "jop_key", "slide", "slide_info")
+ qhead = kern.globals.shared_region_pager_queue
+ qtype = GetType('shared_region_pager *')
+ qcnt = kern.globals.shared_region_pager_count
+ idx = 0
+ for pager in IterateQueue(qhead, qtype, "srp_queue"):
+ idx = idx + 1
+ show_shared_region_pager(pager, qcnt, idx)
+
+@lldb_command("show_shared_region_pager")
+def ShowSharedRegionPager(cmd_args=None):
+ """Routine to print out info about a shared_region pager
+ usage: show_shared_region_pager <pager>
+ """
+ if cmd_args == None or len(cmd_args) < 1:
+ print "Invalid argument.", ShowSharedRegionPager.__doc__
+ return
+ pager = kern.GetValueFromAddress(cmd_args[0], 'shared_region_pager_t')
+ show_shared_region_pager(pager, 1, 1)
+
+def show_shared_region_pager(pager, qcnt, idx):
+ object = pager.srp_backing_object
+ shadow = object.shadow
+ while shadow != 0:
+ object = shadow
+ shadow = object.shadow
+ vnode_pager = Cast(object.pager,'vnode_pager *')
+ filename = GetVnodePath(vnode_pager.vnode_handle)
+ if hasattr(pager, 'srp_ref_count'):
+ ref_count = pager.srp_ref_count
+ else:
+ ref_count = pager.srp_header.mo_ref
+ if hasattr(pager, 'srp_jop_key'):
+ jop_key = pager.srp_jop_key
+ else:
+ jop_key = -1
+ print "{:>3}/{:<3d} {: <#018x} {:>5d} {:>5d} {:>6d} {: <#018x} {:#018x} {:#018x} {:#018x}\n\tvnode:{: <#018x} {:s}\n".format(idx, qcnt, pager, ref_count, pager.srp_is_ready, pager.srp_is_mapped, pager.srp_backing_object, pager.srp_backing_offset, jop_key, pager.srp_slide_info.si_slide, pager.srp_slide_info, vnode_pager.vnode_handle, filename)
+ showvmobject(pager.srp_backing_object, pager.srp_backing_offset, pager.srp_slide_info.si_end - pager.srp_slide_info.si_start, 1, 1)
@lldb_command("show_console_ring")
def ShowConsoleRingData(cmd_args=None):
else:
print "<no compressed data>"
-def print_hex_data(data, begin_offset=0, desc=""):
- """ print on stdout "hexdump -C < data" like output
- params:
- data - bytearray or array of int where each int < 255
- begin_offset - int offset that should be printed in left column
- desc - str optional description to print on the first line to describe data
- """
- if desc:
- print "{}:".format(desc)
- index = 0
- total_len = len(data)
- hex_buf = ""
- char_buf = ""
- while index < total_len:
- hex_buf += " {:02x}".format(data[index])
- if data[index] < 0x20 or data[index] > 0x7e:
- char_buf += "."
- else:
- char_buf += "{:c}".format(data[index])
- index += 1
- if index and index % 8 == 0:
- hex_buf += " "
- if index > 1 and (index % 16) == 0:
- print "{:08x} {: <50s} |{: <16s}|".format(begin_offset + index - 16, hex_buf, char_buf)
- hex_buf = ""
- char_buf = ""
- if index % 16 != 0:
- print "{:08x} {: <50s} |{: <16s}|".format(begin_offset + index - 16, hex_buf, char_buf)
- return
-
@lldb_command('vm_scan_all_pages')
def VMScanAllPages(cmd_args=None):
"""Scans the vm_pages[] array
import xnudefines
import memory
import json
+from collections import defaultdict
def GetProcName(proc):
""" returns a string name of the process. Longer variant is preffered if provided.
return str(proc.p_comm)
def GetProcNameForTask(task):
- """ returns a string name of the process. if proc is not valid "unknown" is returned
+ """ returns a string name of the process. If proc is not valid the proc
+ name is looked up in the associated importance structure (if
+ available). If no name can be found, "unknown" is returned.
params:
task: value object represeting a task in the kernel.
returns:
str : A string name of the process linked to the task
"""
- if not task or not unsigned(task.bsd_info):
- return "unknown"
- p = Cast(task.bsd_info, 'proc *')
+ if task:
+ if unsigned(task.bsd_info):
+ p = Cast(task.bsd_info, 'proc *')
+ return GetProcName(p)
+
+ if (hasattr(task, 'task_imp_base') and
+ hasattr(task.task_imp_base, 'iit_procname') and
+ unsigned(task.task_imp_base) != 0):
+ return str(task.task_imp_base.iit_procname)
- return GetProcName(p)
+ return "unknown"
def GetProcPIDForTask(task):
""" returns a int pid of the process. if the proc is not valid, val[5] from audit_token is returned.
K - AST_KPERF
M - AST_MACF
r - AST_RESET_PCS
+ a - AST_ARCADE
G - AST_GUARD
T - AST_TELEMETRY_USER
T - AST_TELEMETRY_KERNEL
out_string = ""
state = int(ast)
thread_state_chars = {0x0:'', 0x1:'P', 0x2:'Q', 0x4:'U', 0x8:'H', 0x10:'Y', 0x20:'A',
- 0x40:'L', 0x80:'B', 0x100:'K', 0x200:'M', 0x400: 'r',
+ 0x40:'L', 0x80:'B', 0x100:'K', 0x200:'M', 0x400: 'r', 0x800: 'a',
0x1000:'G', 0x2000:'T', 0x4000:'T', 0x8000:'T', 0x10000:'S',
0x20000: 'D', 0x40000: 'I', 0x80000: 'E', 0x100000: 'R', 0x200000: 'N'}
state_str = ''
mask = 0x1
- while mask <= 0x80000:
+ while mask <= 0x200000:
state_str += thread_state_chars[int(state & mask)]
mask = mask << 1
tg_flags += 'E'
if (tg.tg_flags & 0x2):
tg_flags += 'U'
- out_string += format_string.format(tg, tg.tg_id, tg.tg_name, tg.tg_refcount, tg_flags, tg.tg_recommendation)
+ out_string += format_string.format(tg, tg.tg_id, tg.tg_name, tg.tg_refcount.ref_count, tg_flags, tg.tg_recommendation)
return out_string
@lldb_command('showallthreadgroups')
global kern
print GetTaskSummary.header + " " + GetProcSummary.header
for t in kern.terminated_tasks:
+
+ # If the task has been terminated it's likely that the process is
+ # gone too. If there is no proc it may still be possible to find
+ # the original proc name.
pval = Cast(t.bsd_info, 'proc *')
- print GetTaskSummary(t) +" "+ GetProcSummary(pval)
+ if pval:
+ psummary = GetProcSummary(pval)
+ else:
+ name = GetProcNameForTask(t);
+ pslen = GetProcSummary.header.find("command");
+ psummary = "{0: <{indent}} {1: <s}".format("", name, indent = pslen - 1)
+
+ print GetTaskSummary(t) + " " + psummary
+
return True
# Macro: showtaskstacks
def ShowAllThreads(cmd_args = None):
""" Display info about all threads in the system
"""
+
+ # Terminated threads get prefixed with a 'T'
+ def ShowTaskTerminatedThreads(task):
+ tlist = tmap.get(unsigned(task), [])
+ for thval in tlist:
+ print "T\t" + GetThreadSummary(thval)
+
+ # Task -> [thread, ..] map of terminated threads
+ tmap = defaultdict(list)
+ for thr in kern.terminated_threads:
+ tmap[unsigned(thr.task)].append(thr)
+
for t in kern.tasks:
ShowTaskThreads([str(int(t))])
+ ShowTaskTerminatedThreads(t)
print " \n"
-
+
for t in kern.terminated_tasks:
print "Terminated: \n"
ShowTaskThreads([str(int(t))])
+ ShowTaskTerminatedThreads(t)
print " \n"
-
+
+ return
+
+@lldb_command('showterminatedthreads')
+def ShowTerminatedThreads(cmd_args=None):
+ """ Display info about all terminated threads in the system
+ """
+
+ global kern
+ print GetThreadSummary.header
+ for t in kern.terminated_threads:
+ print GetThreadSummary(t)
+
return
@lldb_command('showtaskthreads', "F:")
if (not kern.arch.startswith('arm') and frame_ptr < mh_execute_addr) or (kern.arch.startswith('arm') and frame_ptr > mh_execute_addr):
break
pc_val = kern.GetValueFromAddress(frame_ptr + kern.ptrsize,'uintptr_t *')
- pc_val = unsigned(dereference(pc_val))
+ pc_val = kern.StripKernelPAC(unsigned(dereference(pc_val)))
out_string += prefix + GetSourceInformationForAddress(pc_val) + "\n"
bt_count +=1
previous_frame_ptr = frame_ptr
return False
_enum_cache = {}
-def GetEnumValue(name):
+def GetEnumValue(enum_name_or_combined, member_name = None):
""" Finds the value of a particular enum define. Ex kdp_req_t::KDP_VERSION => 0x3
params:
- name : str - name of enum in the format type::name
+ enum_name_or_combined: str
+ name of an enum of the format type::name (legacy)
+ name of an enum type
+ member_name: None, or the name of an enum member
+ (then enum_name_or_combined is a type name).
returns:
int - value of the particular enum.
raises:
TypeError - if the enum is not found
"""
- name = name.strip()
global _enum_cache
- if name not in _enum_cache:
- res = lldb.SBCommandReturnObject()
- lldb.debugger.GetCommandInterpreter().HandleCommand("p/x (`%s`)" % name, res)
- if not res.Succeeded():
- raise TypeError("Enum not found with name: " + name)
- # the result is of format '(int) $481 = 0x00000003\n'
- _enum_cache[name] = int( res.GetOutput().split('=')[-1].strip(), 16)
- return _enum_cache[name]
+ if member_name is None:
+ enum_name, member_name = enum_name_or_combined.strip().split("::")
+ else:
+ enum_name = enum_name_or_combined
+
+ if enum_name not in _enum_cache:
+ ty = GetType(enum_name)
+ d = {}
+
+ for e in ty.get_enum_members_array():
+ if ty.GetTypeFlags() & lldb.eTypeIsSigned:
+ d[e.GetName()] = e.GetValueAsSigned()
+ else:
+ d[e.GetName()] = e.GetValueAsUnsigned()
+
+ _enum_cache[enum_name] = d
+
+ return _enum_cache[enum_name][member_name]
def ResolveFSPath(path):
""" expand ~user directories and return absolute path.
retval = False
return retval
-def print_hex_data(data, begin_offset=0, desc=""):
+def print_hex_data(data, begin_offset=0, desc="", marks={}):
""" print on stdout "hexdump -C < data" like output
params:
data - bytearray or array of int where each int < 255
begin_offset - int offset that should be printed in left column
desc - str optional description to print on the first line to describe data
+ mark - dictionary of markers
"""
if desc:
print "{}:".format(desc)
hex_buf = ""
char_buf = ""
while index < total_len:
- hex_buf += " {:02x}".format(data[index])
+ if marks.has_key(begin_offset + index):
+ hex_buf += marks[begin_offset + index]
+ hex_buf += "{:02x}".format(data[index])
+ else:
+ hex_buf += " {:02x}".format(data[index])
if data[index] < 0x20 or data[index] > 0x7e:
char_buf += "."
else:
base_address = array_base_val.GetValueAsUnsigned()
size = array_base_val.GetType().GetPointeeType().GetByteSize()
obj_address = base_address + (index * size)
- obj = kern.GetValueFromAddress(obj_address, array_base_val.GetType().GetName())
+ obj = kern.GetValueFromAddress(obj_address, array_base_val.GetType())
return Cast(obj, array_base_val.GetType())
Trace_cmd(cmd_args, cmd_options, hdrString, entryString, kern.globals.traptrace_ring,
kern.globals.traptrace_entries_per_cpu, MAX_TRAPTRACE_BACKTRACES)
-
+
+# Yields an iterator over all the sysctls from the provided root.
+# Can optionally filter by the given prefix
+def IterateSysctls(root_oid=kern.globals.sysctl__children, prefix="", depth = 0, parent = ""):
+ headp = root_oid
+ for pp in IterateListEntry(headp, 'struct sysctl_oid *', 'oid_link', 's'):
+ node_str = ""
+ if prefix != "":
+ node_str = str(pp.oid_name)
+ if parent != "":
+ node_str = parent + "." + node_str
+ if node_str.startswith(prefix):
+ yield pp, depth, parent
+ else:
+ yield pp, depth, parent
+ type = pp.oid_kind & 0xf
+ if type == 1 and pp.oid_arg1 != 0:
+ if node_str == "":
+ next_parent = str(pp.oid_name)
+ if parent != "":
+ next_parent = parent + "." + next_parent
+ else:
+ next_parent = node_str
+ # Only recurse if the next parent starts with our allowed prefix.
+ # Note that it's OK if the parent string is too short (because the prefix might be for a deeper node).
+ prefix_len = min(len(prefix), len(next_parent))
+ if next_parent[:prefix_len] == prefix[:prefix_len]:
+ for x in IterateSysctls(Cast(pp.oid_arg1, "struct sysctl_oid_list *"), prefix, depth + 1, next_parent):
+ yield x
@lldb_command('showsysctls', 'P:')
def ShowSysctls(cmd_args=[], cmd_options={}):
else:
_ShowSysctl_prefix = ''
allowed_prefixes = []
- def IterateSysctls(oid, parent_str, i):
- headp = oid
- parentstr = "<none>" if parent_str is None else parent_str
- for pp in IterateListEntry(headp, 'struct sysctl_oid *', 'oid_link', 's'):
- type = pp.oid_kind & 0xf
- next_parent = str(pp.oid_name)
- if parent_str is not None:
- next_parent = parent_str + "." + next_parent
- st = (" " * i) + str(pp.GetSBValue().Dereference()).replace("\n", "\n" + (" " * i))
- if type == 1 and pp.oid_arg1 != 0:
- # Check allowed_prefixes to see if we can recurse from root to the allowed prefix.
- # To recurse further, we need to check only the the next parent starts with the user-specified
- # prefix
- if next_parent not in allowed_prefixes and next_parent.startswith(_ShowSysctl_prefix) is False:
- continue
- print 'parent = "%s"' % parentstr, st[st.find("{"):]
- IterateSysctls(Cast(pp.oid_arg1, "struct sysctl_oid_list *"), next_parent, i + 2)
- elif _ShowSysctl_prefix == '' or next_parent.startswith(_ShowSysctl_prefix):
- print ('parent = "%s"' % parentstr), st[st.find("{"):]
- IterateSysctls(kern.globals.sysctl__children, None, 0)
+ for sysctl, depth, parentstr in IterateSysctls(kern.globals.sysctl__children, _ShowSysctl_prefix):
+ if parentstr == "":
+ parentstr = "<none>"
+ headp = sysctl
+ st = (" " * depth * 2) + str(sysctl.GetSBValue().Dereference()).replace("\n", "\n" + (" " * depth * 2))
+ print 'parent = "%s"' % parentstr, st[st.find("{"):]
+
+@lldb_command('showexperiments', 'F')
+def ShowExperiments(cmd_args=[], cmd_options={}):
+ """ Shows any active kernel experiments being run on the device via trial.
+ Arguments:
+ -F: Scan for changed experiment values even if no trial identifiers have been set.
+ """
+
+ treatment_id = str(kern.globals.trial_treatment_id)
+ experiment_id = str(kern.globals.trial_experiment_id)
+ deployment_id = kern.globals.trial_deployment_id._GetValueAsSigned()
+ if treatment_id == "" and experiment_id == "" and deployment_id == -1:
+ print("Device is not enrolled in any kernel experiments.")
+ if not '-F' in cmd_options:
+ return
+ else:
+ print("""Device is enrolled in a kernel experiment:
+ treatment_id: %s
+ experiment_id: %s
+ deployment_id: %d""" % (treatment_id, experiment_id, deployment_id))
+
+ print("Scanning sysctl tree for modified factors...")
+
+ kExperimentFactorFlag = 0x00100000
+
+ formats = {
+ "IU": gettype("unsigned int *"),
+ "I": gettype("int *"),
+ "LU": gettype("unsigned long *"),
+ "L": gettype("long *"),
+ "QU": gettype("uint64_t *"),
+ "Q": gettype("int64_t *")
+ }
+ for sysctl, depth, parentstr in IterateSysctls(kern.globals.sysctl__children):
+ if sysctl.oid_kind & kExperimentFactorFlag:
+ spec = cast(sysctl.oid_arg1, "struct experiment_spec *")
+ # Skip if arg2 isn't set to 1 (indicates an experiment factor created without an experiment_spec).
+ if sysctl.oid_arg2 == 1:
+ if spec.modified == 1:
+ fmt = str(sysctl.oid_fmt)
+ ptr = spec.ptr
+ t = formats.get(fmt, None)
+ if t:
+ value = cast(ptr, t)
+ else:
+ # Unknown type
+ continue
+ name = str(parentstr) + "." + str(sysctl.oid_name)
+ print("%s = %d (Default value is %d)" % (name, dereference(value), spec.original_value))
from memory import *
from process import *
from ntstat import *
from zonetriage import *
from sysreg import *
+from counter import *
perf_index \
personas \
unixconf \
+ kernpost_test_report \
KEXT_TARGETS = pgokext.kext
--- /dev/null
+include ../Makefile.common
+
+DSTROOT?=$(shell /bin/pwd)
+SYMROOT?=$(shell /bin/pwd)
+OBJROOT?=$(shell /bin/pwd)
+
+CC:=$(shell xcrun -sdk "$(SDKROOT)" -find cc)
+
+CFLAGS:=$(ARCH_FLAGS) -g -Wall -Os -isysroot $(SDKROOT) -I$(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders -lkdd -framework Foundation
+
+all: $(DSTROOT)/kernpost_test_report
+
+$(DSTROOT)/kernpost_test_report: kernpost_test_report.m
+ $(CC) -o $@ $^ $(subst -arch i386,,$(CFLAGS))
+
+clean:
+ rm -f $(DSTROOT)/kernpost_test_report $(OBJROOT)/*.o
+ rm -rf $(SYMROOT)/*.dSYM
--- /dev/null
+#import <Foundation/Foundation.h>
+#include <kcdata.h>
+#import <kdd.h>
+#include <mach/mach_time.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <sys/sysctl.h>
+#include <sysexits.h>
+#include <unistd.h>
+
+#define FREE_BUF(_buf) \
+ do { \
+ if (_buf) { \
+ free(_buf); \
+ _buf = NULL; \
+ } \
+ } while (0);
+
+#define ERR(_msg_format, ...) fprintf(stderr, "error: " _msg_format "\n", ##__VA_ARGS__)
+
+#define PERR(_msg) perror("error: " _msg)
+
+/* XNUPost KCData constants */
+NSString * const kXNUPostKCDataKeyTestConfig = @"xnupost_testconfig";
+NSString * const kXNUPostKCDataKeyOSVersion = @"osversion";
+NSString * const kXNUPostKCDataKeyBootargs = @"boot_args";
+NSString * const kXNUPostKCDataKeyMachTBInfo = @"mach_timebase_info";
+NSString * const kXNUPostKCDataKeyMachTBInfoDenom = @"denom";
+NSString * const kXNUPostKCDataKeyMachTBInfoNumer = @"numer";
+NSString * const kXNUPostKCDataKeySubTestConfig = @"xnupost_test_config";
+NSString * const kXNUPostKCDataKeyTestName = @"test_name";
+NSString * const kXNUPostKCDataKeyBeginTime = @"begin_time";
+NSString * const kXNUPostKCDataKeyEndTime = @"end_time";
+NSString * const kXNUPostKCDataKeyRetval = @"retval";
+NSString * const kXNUPostKCDataKeyExpectedRetval = @"expected_retval";
+
+/* Resultbundle info constants */
+NSString * const kRBInfoKeyVersion = @"version";
+NSString * const kRBInfoKeyCategory = @"test_category";
+NSString * const kRBInfoKeyTestID = @"test_id";
+NSString * const kRBInfoKeyProject = @"Project";
+NSString * const kRBInfoKeyBootargs = @"boot-args";
+NSString * const kRBInfoKeyOSVersion = @"osVersion";
+NSString * const kRBInfoKeyResultCode = @"result_code";
+NSString * const kRBInfoKeyResultStarted = @"result_started";
+NSString * const kRBInfoKeyResultFinished = @"result_finished";
+NSString * const kRBInfoKeyMachTBInfo = @"mach_timebase_info";
+NSString * const kRBInfoKeyMachTBInfoDenom = @"denom";
+NSString * const kRBInfoKeyMachTBInfoNumer = @"numer";
+NSString * const kRBInfoKeyBeginTimeRaw = @"beginTimeRaw";
+NSString * const kRBInfoKeyEndTimeRaw = @"endTimeRaw";
+
+NSNumber * const kResultBundleVersion = @2;
+NSString * const kResultBundleCategory = @"unittest";
+NSString * const kResultBundleProject = @"xnu";
+NSNumber * const kResultCodePass = @200;
+NSNumber * const kResultCodeFail = @400;
+
+#define COMMAND_EXPORT (0)
+static int g_command = COMMAND_EXPORT;
+#define OUTPUT_FORMAT_RAW (0)
+#define OUTPUT_FORMAT_PLIST_XML (1)
+#define OUTPUT_FORMAT_RESULTBUNDLE (2)
+static int g_output_format = OUTPUT_FORMAT_RAW;
+static char * g_output_dir = NULL;
+
+static void
+usage(void)
+{
+ const char * progname = getprogname();
+ fprintf(stderr,
+ "Usage:\t%s COMMAND [OPTIONS]\n\n"
+ "\t%s export -o OUTPUT_DIR_PATH [-f raw|plist|resultbundle]\n"
+ "\nSupported command:\n"
+ "\texport\n",
+ progname, progname);
+}
+
+static void
+parse_export_options(int argc, char * argv[])
+{
+ int ch;
+ bool error = false;
+
+ while ((ch = getopt(argc, argv, "o:f:")) != -1) {
+ switch (ch) {
+ case 'o':
+ g_output_dir = optarg;
+ break;
+ case 'f':
+ if (strncmp(optarg, "raw", 4) == 0) {
+ g_output_format = OUTPUT_FORMAT_RAW;
+ } else if (strncmp(optarg, "plist", 6) == 0) {
+ g_output_format = OUTPUT_FORMAT_PLIST_XML;
+ } else if (strncmp(optarg, "resultbundle", 13) == 0) {
+ g_output_format = OUTPUT_FORMAT_RESULTBUNDLE;
+ } else {
+ error = true;
+ }
+ break;
+ default:
+ error = true;
+ break;
+ }
+ }
+
+ if (g_output_dir == NULL) {
+ error = true;
+ }
+
+ struct stat path_stat;
+ if (stat(g_output_dir, &path_stat)) {
+ PERR("Failed to access output dir");
+ error = true;
+ } else if (!S_ISDIR(path_stat.st_mode)) {
+ ERR("error: Output path must be a directory");
+ error = true;
+ }
+
+ if (error) {
+ usage();
+ exit(EX_USAGE);
+ }
+}
+
+static void
+parse_options(int argc, char * argv[])
+{
+ if (argc > 1) {
+ char * cmd = argv[1];
+ argc--;
+ argv++;
+ if (strncmp(cmd, "export", 7) == 0) {
+ g_command = COMMAND_EXPORT;
+ parse_export_options(argc, argv);
+ } else {
+ usage();
+ exit(EX_USAGE);
+ }
+ } else {
+ usage();
+ exit(EX_USAGE);
+ }
+}
+
+static void
+retrieve_test_data(void ** raw_buf_p, size_t * raw_size_p)
+{
+ int rc = sysctlbyname("debug.xnupost_get_tests", NULL, raw_size_p, NULL, 0);
+ if (rc == 0 && *raw_size_p > 0) {
+ *raw_buf_p = malloc(*raw_size_p);
+ if (*raw_buf_p) {
+ rc = sysctlbyname("debug.xnupost_get_tests", *raw_buf_p, raw_size_p, NULL, 0);
+ if (0 != rc) {
+ PERR("Failed to get KCData through sysctl");
+ }
+ } else {
+ PERR("Failed to allocate KCData raw buffer");
+ }
+ } else {
+ PERR("Failed to get size through sysctl");
+ }
+}
+
+static void
+export_raw(void * raw_buf, size_t raw_size)
+{
+ if (raw_buf) {
+ char output_path[MAXPATHLEN];
+ snprintf(output_path, MAXPATHLEN, "%s/xnupost.kcdata", g_output_dir);
+ FILE * output_fp = fopen(output_path, "w");
+ if (output_fp) {
+ fwrite(raw_buf, raw_size, 1, output_fp);
+ fclose(output_fp);
+ } else {
+ PERR("Failed to open output path");
+ }
+ }
+}
+
+static void
+export_to_plist(void * raw_buf, size_t raw_size)
+{
+ if (raw_buf) {
+ char output_path[MAXPATHLEN];
+ snprintf(output_path, MAXPATHLEN, "%s/xnupost.plist", g_output_dir);
+ NSError * nsError = nil;
+ NSDictionary * parsed_dict = parseKCDataBuffer(raw_buf, raw_size, &nsError);
+ if (parsed_dict) {
+ NSData * plist_data = [NSPropertyListSerialization dataWithPropertyList:parsed_dict
+ format:NSPropertyListXMLFormat_v1_0
+ options:0
+ error:&nsError];
+ if (plist_data) {
+ if (![plist_data writeToFile:[NSString stringWithUTF8String:output_path] atomically:YES]) {
+ ERR("Failed to write plist to %s", output_path);
+ }
+ } else {
+ ERR("Failed to serialize result plist: %s", nsError.localizedDescription.UTF8String);
+ }
+ } else {
+ ERR("Failed to parse KCData to plist: %s", nsError.localizedDescription.UTF8String);
+ }
+ }
+}
+
+#define RESULTBUNDLE_TIME_STR_SIZE (30) // 0000-00-00T00:00:00.000+00:00'\0'
+#define RESULTBUNLDE_TIME_MS_INDEX (20)
+#define RESULTBUNLDE_TIME_TZ_COLON_INDEX (26)
+#define RESULTBUNDLE_TIME_MS_STR_SIZE (4) // 000'\0'
+#define MSEC_PER_USEC 1000ull
+
+static void
+get_estimated_time_str_resultbundle(char * output_str, uint64_t mach_abs_time_usec)
+{
+ uint64_t est_usec = mach_boottime_usec() + mach_abs_time_usec;
+ time_t est_sec = (time_t)(est_usec / USEC_PER_SEC);
+ uint64_t est_usec_fraction = est_usec % USEC_PER_SEC;
+ struct tm tm_info;
+ int i = 0;
+
+ localtime_r(&est_sec, &tm_info);
+ strftime(output_str, RESULTBUNDLE_TIME_STR_SIZE, "%Y-%m-%dT%H:%M:%S.000%z", &tm_info);
+
+ /* Fill out milliseconds */
+ char ms_str[RESULTBUNDLE_TIME_MS_STR_SIZE] = {0};
+ snprintf(ms_str, RESULTBUNDLE_TIME_MS_STR_SIZE, "%03llu", est_usec_fraction / MSEC_PER_USEC);
+ for (i = 0; i < 3; i++) {
+ output_str[RESULTBUNLDE_TIME_MS_INDEX + i] = ms_str[i];
+ }
+
+ /* Add colon for timezone offset */
+ for (i = RESULTBUNDLE_TIME_STR_SIZE - 1; i > RESULTBUNLDE_TIME_TZ_COLON_INDEX; i--) {
+ output_str[i] = output_str[i - 1];
+ }
+ output_str[RESULTBUNLDE_TIME_TZ_COLON_INDEX] = ':';
+}
+
+static void
+create_subtest_bundle_config(NSDictionary * testconfig, NSDictionary * subtest, char * bundle_dir)
+{
+ NSString * testName = subtest[kXNUPostKCDataKeyTestName];
+ NSNumber * tbInfoDenom = testconfig[kXNUPostKCDataKeyMachTBInfo][kXNUPostKCDataKeyMachTBInfoDenom];
+ NSNumber * tbInfoNumer = testconfig[kXNUPostKCDataKeyMachTBInfo][kXNUPostKCDataKeyMachTBInfoNumer];
+ struct mach_timebase_info tb_info;
+ tb_info.denom = tbInfoDenom.unsignedIntValue;
+ tb_info.numer = tbInfoNumer.unsignedIntValue;
+ NSNumber * beginTimeRaw = subtest[kXNUPostKCDataKeyBeginTime];
+ NSNumber * endTimeRaw = subtest[kXNUPostKCDataKeyEndTime];
+ uint64_t begin_time_usec = (beginTimeRaw.unsignedLongLongValue * tb_info.numer) / (tb_info.denom * NSEC_PER_USEC);
+ uint64_t end_time_usec = (endTimeRaw.unsignedLongLongValue * tb_info.numer) / (tb_info.denom * NSEC_PER_USEC);
+ bool test_status =
+ subtest[kXNUPostKCDataKeyRetval] && (subtest[kXNUPostKCDataKeyRetval] == subtest[kXNUPostKCDataKeyExpectedRetval]);
+
+ char output_path[MAXPATHLEN];
+ char * output_dir_end = NULL;
+
+ snprintf(output_path, MAXPATHLEN, "%s/test_%s", bundle_dir, testName.UTF8String);
+ if (mkdir(output_path, 0777)) {
+ PERR("Failed to create subtest bundle dir");
+ }
+ output_dir_end = output_path + strlen(output_path);
+
+ *output_dir_end = '\0';
+ strlcat(output_path, "/Attachments", MAXPATHLEN);
+ if (mkdir(output_path, 0777)) {
+ PERR("Failed to create subtest Attachments dir");
+ }
+
+ *output_dir_end = '\0';
+ strlcat(output_path, "/Diagnostics", MAXPATHLEN);
+ if (mkdir(output_path, 0777)) {
+ PERR("Failed to create subtest Diagnostics dir");
+ }
+
+ NSMutableDictionary * rbInfo = [NSMutableDictionary new];
+ rbInfo[kRBInfoKeyVersion] = kResultBundleVersion;
+ rbInfo[kRBInfoKeyCategory] = kResultBundleCategory;
+ rbInfo[kRBInfoKeyTestID] = testName;
+ rbInfo[kRBInfoKeyProject] = kResultBundleProject;
+ rbInfo[kRBInfoKeyOSVersion] = testconfig[kXNUPostKCDataKeyOSVersion];
+ rbInfo[kRBInfoKeyBootargs] = testconfig[kXNUPostKCDataKeyBootargs];
+ rbInfo[kRBInfoKeyResultCode] = test_status ? kResultCodePass : kResultCodeFail;
+
+ char estimated_time_str[RESULTBUNDLE_TIME_STR_SIZE];
+ get_estimated_time_str_resultbundle(estimated_time_str, begin_time_usec);
+ rbInfo[kRBInfoKeyResultStarted] = [NSString stringWithUTF8String:estimated_time_str];
+ get_estimated_time_str_resultbundle(estimated_time_str, end_time_usec);
+ rbInfo[kRBInfoKeyResultFinished] = [NSString stringWithUTF8String:estimated_time_str];
+
+ rbInfo[kRBInfoKeyMachTBInfo] = @{kRBInfoKeyMachTBInfoDenom : tbInfoDenom, kRBInfoKeyMachTBInfoNumer : tbInfoNumer};
+
+ rbInfo[kRBInfoKeyBeginTimeRaw] = beginTimeRaw;
+ rbInfo[kRBInfoKeyEndTimeRaw] = endTimeRaw;
+
+ *output_dir_end = '\0';
+ strlcat(output_path, "/Info.plist", MAXPATHLEN);
+ NSURL * output_url = [NSURL fileURLWithFileSystemRepresentation:output_path isDirectory:NO relativeToURL:nil];
+ NSError * writeError = nil;
+ if (![rbInfo writeToURL:output_url error:&writeError]) {
+ ERR("Failed to write Info.plist file: %s", writeError.localizedDescription.UTF8String);
+ }
+
+ *output_dir_end = '\0';
+ strlcat(output_path, test_status ? "/PASS.status" : "/FAIL.status", MAXPATHLEN);
+ int fd = open(output_path, O_CREAT | O_TRUNC | O_WRONLY, 0666);
+ if (fd == -1) {
+ PERR("Failed to create subtest status file");
+ } else {
+ close(fd);
+ }
+}
+
+static void
+export_to_resultbundle(void * raw_buf, size_t raw_size)
+{
+ if (raw_buf) {
+ NSError * nsError = nil;
+ NSDictionary * parsed_dict = parseKCDataBuffer(raw_buf, raw_size, &nsError);
+ if (parsed_dict) {
+ NSDictionary * testconfig = parsed_dict[kXNUPostKCDataKeyTestConfig];
+ NSArray * subtests = testconfig[kXNUPostKCDataKeySubTestConfig];
+
+ char bundle_dir[MAXPATHLEN];
+ snprintf(bundle_dir, MAXPATHLEN, "%s/xnupost", g_output_dir);
+ if (mkdir(bundle_dir, 0777)) {
+ PERR("Failed to create result bundle dir");
+ }
+
+ for (NSDictionary * subtest in subtests) {
+ create_subtest_bundle_config(testconfig, subtest, bundle_dir);
+ }
+ } else {
+ ERR("Failed to parse KCData to plist: %s", nsError.localizedDescription.UTF8String);
+ }
+ }
+}
+
+static void
+execute_export(void)
+{
+ void * raw_buf = NULL;
+ size_t raw_size = 0;
+ retrieve_test_data(&raw_buf, &raw_size);
+ switch (g_output_format) {
+ case OUTPUT_FORMAT_PLIST_XML:
+ export_to_plist(raw_buf, raw_size);
+ break;
+ case OUTPUT_FORMAT_RESULTBUNDLE:
+ export_to_resultbundle(raw_buf, raw_size);
+ break;
+ case OUTPUT_FORMAT_RAW:
+ default:
+ export_raw(raw_buf, raw_size);
+ break;
+ }
+
+ FREE_BUF(raw_buf);
+}
+
+int
+main(int argc, char * argv[])
+{
+ parse_options(argc, argv);
+ switch (g_command) {
+ case COMMAND_EXPORT:
+ execute_export();
+ break;
+ default:
+ usage();
+ exit(EX_USAGE);
+ break;
+ }
+
+ return 0;
+}
#include <sysexits.h>
#include <sys/sysctl.h>
#include <getopt.h>
+#include <libproc.h>
#include <spawn.h>
#include <spawn_private.h>
#include <stdatomic.h>
#include <os/tsd.h>
+#include <os/lock.h>
#include <TargetConditionals.h>
typedef enum wake_type { WAKE_BROADCAST_ONESEM, WAKE_BROADCAST_PERTHREAD, WAKE_CHAIN, WAKE_HOP } wake_type_t;
-typedef enum my_policy_type { MY_POLICY_REALTIME, MY_POLICY_TIMESHARE, MY_POLICY_FIXEDPRI } my_policy_type_t;
+typedef enum my_policy_type { MY_POLICY_REALTIME, MY_POLICY_TIMESHARE, MY_POLICY_TIMESHARE_NO_SMT, MY_POLICY_FIXEDPRI } my_policy_type_t;
#define mach_assert_zero(error) do { if ((error) != 0) { fprintf(stderr, "[FAIL] error %d (%s) ", (error), mach_error_string(error)); assert(error == 0); } } while (0)
#define mach_assert_zero_t(tid, error) do { if ((error) != 0) { fprintf(stderr, "[FAIL] Thread %d error %d (%s) ", (tid), (error), mach_error_string(error)); assert(error == 0); } } while (0)
create_churn_threads()
{
if (g_churn_count == 0) {
- g_churn_count = g_numcpus - 1;
+ g_churn_count = g_test_rt_smt ? g_numcpus : g_numcpus - 1;
}
errno_t err;
{
if (strcmp(str, "timeshare") == 0) {
return MY_POLICY_TIMESHARE;
+ } else if (strcmp(str, "timeshare_no_smt") == 0) {
+ return MY_POLICY_TIMESHARE_NO_SMT;
} else if (strcmp(str, "realtime") == 0) {
return MY_POLICY_REALTIME;
} else if (strcmp(str, "fixed") == 0) {
switch (g_policy) {
case MY_POLICY_TIMESHARE:
break;
+ case MY_POLICY_TIMESHARE_NO_SMT:
+ proc_setthread_no_smt();
+ break;
case MY_POLICY_REALTIME:
/* Hard-coded realtime parameters (similar to what Digi uses) */
pol.period = 100000;
return 0;
}
+time_value_t
+get_thread_runtime(void)
+{
+ thread_basic_info_data_t info;
+ mach_msg_type_number_t info_count = THREAD_BASIC_INFO_COUNT;
+ thread_info(pthread_mach_thread_np(pthread_self()), THREAD_BASIC_INFO, (thread_info_t)&info, &info_count);
+
+ time_value_add(&info.user_time, &info.system_time);
+
+ return info.user_time;
+}
+
+time_value_t worker_threads_total_runtime = {};
+
/*
* Wait for a wakeup, potentially wake up another of the "0-N" threads,
* and notify the main thread when done.
static void*
worker_thread(void *arg)
{
+ static os_unfair_lock runtime_lock = OS_UNFAIR_LOCK_INIT;
+
uint32_t my_id = (uint32_t)(uintptr_t)arg;
kern_return_t kr;
mach_assert_zero_t(my_id, kr);
}
+ time_value_t runtime = get_thread_runtime();
+ os_unfair_lock_lock(&runtime_lock);
+ time_value_add(&worker_threads_total_runtime, &runtime);
+ os_unfair_lock_unlock(&runtime_lock);
+
return 0;
}
*stddevp = _dev;
}
+typedef struct {
+ natural_t sys;
+ natural_t user;
+ natural_t idle;
+} cpu_time_t;
+
+void
+record_cpu_time(cpu_time_t *cpu_time)
+{
+ host_cpu_load_info_data_t load;
+ mach_msg_type_number_t count = HOST_CPU_LOAD_INFO_COUNT;
+ kern_return_t kr = host_statistics(mach_host_self(), HOST_CPU_LOAD_INFO, (int *)&load, &count);
+ mach_assert_zero_t(0, kr);
+
+ natural_t total_system_time = load.cpu_ticks[CPU_STATE_SYSTEM];
+ natural_t total_user_time = load.cpu_ticks[CPU_STATE_USER] + load.cpu_ticks[CPU_STATE_NICE];
+ natural_t total_idle_time = load.cpu_ticks[CPU_STATE_IDLE];
+
+ cpu_time->sys = total_system_time;
+ cpu_time->user = total_user_time;
+ cpu_time->idle = total_idle_time;
+}
+
int
main(int argc, char **argv)
{
float avg, stddev;
bool test_fail = false;
+ bool test_warn = false;
for (int i = 0; i < argc; i++) {
if (strcmp(argv[i], "--switched_apptype") == 0) {
usleep(g_iteration_sleeptime_us);
}
+ cpu_time_t start_time;
+ cpu_time_t finish_time;
+
+ record_cpu_time(&start_time);
+
/* Go! */
for (uint32_t i = 0; i < g_iterations; i++) {
uint32_t j;
}
}
+ record_cpu_time(&finish_time);
+
/* Rejoin threads */
for (uint32_t i = 0; i < g_numthreads; i++) {
ret = pthread_join(threads[i], NULL);
join_churn_threads();
}
+ uint32_t cpu_idle_time = (finish_time.idle - start_time.idle) * 10;
+ uint32_t worker_threads_runtime = worker_threads_total_runtime.seconds * 1000 + worker_threads_total_runtime.microseconds / 1000;
+
compute_stats(worst_latencies_ns, g_iterations, &avg, &max, &min, &stddev);
printf("Results (from a stop):\n");
printf("Max:\t\t%.2f us\n", ((float)max) / 1000.0);
secondary ? " SECONDARY" : "",
fail ? " FAIL" : "");
}
+ test_warn |= (secondary || fail);
test_fail |= fail;
fail_count += fail;
}
}
}
+ if (g_test_rt_smt && (g_each_spin_duration_ns >= 200000) && !test_warn) {
+ printf("cpu_idle_time=%dms worker_threads_runtime=%dms\n", cpu_idle_time, worker_threads_runtime);
+ if (cpu_idle_time < worker_threads_runtime / 4) {
+ printf("FAIL cpu_idle_time unexpectedly small\n");
+ test_fail = 1;
+ } else if (cpu_idle_time > worker_threads_runtime * 2) {
+ printf("FAIL cpu_idle_time unexpectedly large\n");
+ test_fail = 1;
+ }
+ }
+
free(threads);
free(g_thread_endtimes_abs);
free(worst_latencies_ns);
usage()
{
errx(EX_USAGE, "Usage: %s <threads> <chain | hop | broadcast-single-sem | broadcast-per-thread> "
- "<realtime | timeshare | fixed> <iterations>\n\t\t"
+ "<realtime | timeshare | timeshare_no_smt | fixed> <iterations>\n\t\t"
"[--trace <traceworthy latency in ns>] "
"[--verbose] [--spin-one] [--spin-all] [--spin-time <nanos>] [--affinity]\n\t\t"
"[--no-sleep] [--drop-priority] [--churn-pri <pri>] [--churn-count <n>]\n\t\t"