+
+/*
+ * Internals for madvise(MADV_WILLNEED) system call.
+ *
+ * The present implementation is to do a read-ahead if the mapping corresponds
+ * to a mapped regular file. If it's an anonymous mapping, then we do nothing
+ * and basically ignore the "advice" (which we are always free to do).
+ */
+
+
+static kern_return_t
+vm_map_willneed(
+ vm_map_t map,
+ vm_map_offset_t start,
+ vm_map_offset_t end
+)
+{
+ vm_map_entry_t entry;
+ vm_object_t object;
+ memory_object_t pager;
+ struct vm_object_fault_info fault_info;
+ kern_return_t kr;
+ vm_object_size_t len;
+ vm_object_offset_t offset;
+
+ /*
+ * Fill in static values in fault_info. Several fields get ignored by the code
+ * we call, but we'll fill them in anyway since uninitialized fields are bad
+ * when it comes to future backwards compatibility.
+ */
+
+ fault_info.interruptible = THREAD_UNINT; /* ignored value */
+ fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
+ fault_info.no_cache = FALSE; /* ignored value */
+ fault_info.stealth = TRUE;
+
+ /*
+ * The MADV_WILLNEED operation doesn't require any changes to the
+ * vm_map_entry_t's, so the read lock is sufficient.
+ */
+
+ vm_map_lock_read(map);
+
+ /*
+ * The madvise semantics require that the address range be fully
+ * allocated with no holes. Otherwise, we're required to return
+ * an error.
+ */
+
+ if (vm_map_range_check(map, start, end, &entry)) {
+
+ /*
+ * Examine each vm_map_entry_t in the range.
+ */
+
+ for (; entry->vme_start < end; start += len, entry = entry->vme_next) {
+
+ /*
+ * The first time through, the start address could be anywhere within the
+ * vm_map_entry we found. So adjust the offset to correspond. After that,
+ * the offset will always be zero to correspond to the beginning of the current
+ * vm_map_entry.
+ */
+
+ offset = (start - entry->vme_start) + entry->offset;
+
+ /*
+ * Set the length so we don't go beyond the end of the map_entry or beyond the
+ * end of the range we were given. This range could span also multiple map
+ * entries all of which map different files, so make sure we only do the right
+ * amount of I/O for each object. Note that it's possible for there to be
+ * multiple map entries all referring to the same object but with different
+ * page permissions, but it's not worth trying to optimize that case.
+ */
+
+ len = MIN(entry->vme_end - start, end - start);
+
+ if ((vm_size_t) len != len) {
+ /* 32-bit overflow */
+ len = (vm_size_t) (0 - PAGE_SIZE);
+ }
+ fault_info.cluster_size = (vm_size_t) len;
+ fault_info.lo_offset = offset;
+ fault_info.hi_offset = offset + len;
+ fault_info.user_tag = entry->alias;
+
+ /*
+ * If there's no read permission to this mapping, then just skip it.
+ */
+
+ if ((entry->protection & VM_PROT_READ) == 0) {
+ continue;
+ }
+
+ /*
+ * Find the file object backing this map entry. If there is none,
+ * then we simply ignore the "will need" advice for this entry and
+ * go on to the next one.
+ */
+
+ if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
+ continue;
+ }
+
+ vm_object_paging_begin(object);
+ pager = object->pager;
+ vm_object_unlock(object);
+
+ /*
+ * Get the data from the object asynchronously.
+ *
+ * Note that memory_object_data_request() places limits on the amount
+ * of I/O it will do. Regardless of the len we specified, it won't do
+ * more than MAX_UPL_TRANSFER and it silently truncates the len to that
+ * size. This isn't necessarily bad since madvise shouldn't really be
+ * used to page in unlimited amounts of data. Other Unix variants limit
+ * the willneed case as well. If this turns out to be an issue for
+ * developers, then we can always adjust the policy here and still be
+ * backwards compatible since this is all just "advice".
+ */
+
+ kr = memory_object_data_request(
+ pager,
+ offset + object->paging_offset,
+ 0, /* ignored */
+ VM_PROT_READ,
+ (memory_object_fault_info_t)&fault_info);
+
+ vm_object_lock(object);
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+
+ /*
+ * If we couldn't do the I/O for some reason, just give up on the
+ * madvise. We still return success to the user since madvise isn't
+ * supposed to fail when the advice can't be taken.
+ */
+
+ if (kr != KERN_SUCCESS) {
+ break;
+ }
+ }
+
+ kr = KERN_SUCCESS;
+ } else
+ kr = KERN_INVALID_ADDRESS;
+
+ vm_map_unlock_read(map);
+ return kr;
+}
+
+static boolean_t
+vm_map_entry_is_reusable(
+ vm_map_entry_t entry)
+{
+ vm_object_t object;
+
+ if (entry->is_shared ||
+ entry->is_sub_map ||
+ entry->in_transition ||
+ entry->protection != VM_PROT_DEFAULT ||
+ entry->max_protection != VM_PROT_ALL ||
+ entry->inheritance != VM_INHERIT_DEFAULT ||
+ entry->no_cache ||
+ entry->permanent ||
+ entry->superpage_size != 0 ||
+ entry->zero_wired_pages ||
+ entry->wired_count != 0 ||
+ entry->user_wired_count != 0) {
+ return FALSE;