]> git.saurik.com Git - apple/system_cmds.git/blame - gcore.tproj/vanilla.c
system_cmds-790.tar.gz
[apple/system_cmds.git] / gcore.tproj / vanilla.c
CommitLineData
cf37c299
A
1/*
2 * Copyright (c) 2016 Apple Inc. All rights reserved.
3 */
4
5#include "options.h"
6#include "vm.h"
7#include "region.h"
8#include "utils.h"
9#include "dyld.h"
10#include "threads.h"
11#include "vanilla.h"
12#include "sparse.h"
13
14#include <sys/types.h>
15#include <sys/sysctl.h>
16#include <sys/stat.h>
17#include <sys/mman.h>
887d5eed 18#include <sys/mount.h>
cf37c299
A
19#include <libproc.h>
20
21#include <stdio.h>
22#include <string.h>
23#include <strings.h>
24#include <stdlib.h>
25#include <stdarg.h>
26#include <signal.h>
27#include <unistd.h>
28#include <errno.h>
29#include <ctype.h>
30#include <fcntl.h>
31#include <assert.h>
32#include <sysexits.h>
33
34#include <mach/mach.h>
35
887d5eed
A
36/*
37 * (Another optimization to consider is merging adjacent regions with
38 * the same properties.)
39 */
40
41static walk_return_t
42simple_region_optimization(struct region *r, __unused void *arg)
cf37c299
A
43{
44 assert(0 != R_SIZE(r));
45
46 /*
47 * Elide unreadable regions
48 */
49 if ((r->r_info.max_protection & VM_PROT_READ) != VM_PROT_READ) {
887d5eed 50 if (OPTIONS_DEBUG(opt, 2))
cf37c299
A
51 printr(r, "eliding unreadable region\n");
52 return WALK_DELETE_REGION;
53 }
54#ifdef CONFIG_SUBMAP
55 /*
56 * Elide submaps (here for debugging purposes?)
57 */
58 if (r->r_info.is_submap) {
887d5eed 59 if (OPTIONS_DEBUG(opt))
cf37c299
A
60 printr(r, "eliding submap\n");
61 return WALK_DELETE_REGION;
62 }
63#endif
64 /*
65 * Elide guard regions
66 */
67 if (r->r_info.protection == VM_PROT_NONE &&
68 (VM_MEMORY_STACK == r->r_info.user_tag ||
69 VM_MEMORY_MALLOC == r->r_info.user_tag)) {
887d5eed 70 if (OPTIONS_DEBUG(opt, 2)) {
cf37c299 71 hsize_str_t hstr;
887d5eed
A
72 tag_str_t tstr;
73 printr(r, "elide %s - %s\n", str_hsize(hstr, R_SIZE(r)), str_tagr(tstr, r));
cf37c299
A
74 }
75 return WALK_DELETE_REGION;
76 }
887d5eed
A
77
78 /*
79 * Regions full of clean zfod data e.g. VM_MEMORY_MALLOC_LARGE can be recorded as zfod
80 */
81 if (r->r_info.share_mode == SM_PRIVATE &&
82 0 == r->r_info.external_pager &&
83 0 == r->r_info.pages_dirtied) {
84 if (OPTIONS_DEBUG(opt, 2)) {
85 hsize_str_t hstr;
86 tag_str_t tstr;
87 printr(r, "convert to zfod %s - %s\n", str_hsize(hstr, R_SIZE(r)), str_tagr(tstr, r));
88 }
89 r->r_inzfodregion = true;
90 r->r_op = &zfod_ops;
91 }
92
cf37c299
A
93 return WALK_CONTINUE;
94}
95
96/*
97 * (Paranoid validation + debugging assistance.)
98 */
887d5eed 99void
cf37c299
A
100validate_core_header(const native_mach_header_t *mh, off_t corefilesize)
101{
887d5eed
A
102 assert(NATIVE_MH_MAGIC == mh->magic);
103 assert(MH_CORE == mh->filetype);
104
105 if (OPTIONS_DEBUG(opt, 2))
106 printf("%s: core file: mh %p ncmds %u sizeofcmds %u\n",
107 __func__, mh, mh->ncmds, mh->sizeofcmds);
cf37c299 108
887d5eed
A
109 unsigned sizeofcmds = 0;
110 off_t corefilemaxoff = 0;
cf37c299
A
111 const struct load_command *lc = (const void *)(mh + 1);
112 for (unsigned i = 0; i < mh->ncmds; i++) {
113
114 if ((uintptr_t)lc < (uintptr_t)mh ||
115 (uintptr_t)lc > (uintptr_t)mh + mh->sizeofcmds) {
116 warnx("load command %p outside mach header range [%p, 0x%lx]?",
117 lc, mh, (uintptr_t)mh + mh->sizeofcmds);
118 abort();
119 }
887d5eed
A
120 if (OPTIONS_DEBUG(opt, 2))
121 printf("lc %p cmd %3u size %3u ", lc, lc->cmd, lc->cmdsize);
122 sizeofcmds += lc->cmdsize;
cf37c299
A
123
124 switch (lc->cmd) {
887d5eed
A
125 case NATIVE_LC_SEGMENT: {
126 const native_segment_command_t *sc = (const void *)lc;
127 if (OPTIONS_DEBUG(opt, 2)) {
128 printf("%8s: mem %llx-%llx file %lld-%lld %s/%s nsect %u flags %x\n",
cf37c299
A
129 "SEGMENT",
130 (mach_vm_offset_t)sc->vmaddr,
131 (mach_vm_offset_t)sc->vmaddr + sc->vmsize,
132 (off_t)sc->fileoff,
133 (off_t)sc->fileoff + (off_t)sc->filesize,
887d5eed
A
134 str_prot(sc->initprot), str_prot(sc->maxprot),
135 sc->nsects, sc->flags);
cf37c299
A
136 }
137 if ((off_t)sc->fileoff < mh->sizeofcmds ||
138 (off_t)sc->filesize < 0) {
139 warnx("bad segment command");
140 abort();
141 }
887d5eed
A
142 const off_t endoff = (off_t)sc->fileoff + (off_t)sc->filesize;
143 if ((off_t)sc->fileoff > corefilesize || endoff > corefilesize) {
cf37c299
A
144 /*
145 * We may have run out of space to write the data
146 */
147 warnx("segment command points beyond end of file");
148 }
887d5eed 149 corefilemaxoff = MAX(corefilemaxoff, endoff);
cf37c299 150 break;
887d5eed
A
151 }
152 case proto_LC_COREINFO: {
153 const struct proto_coreinfo_command *cic = (const void *)lc;
154 if (OPTIONS_DEBUG(opt, 2)) {
cf37c299
A
155 uuid_string_t uustr;
156 uuid_unparse_lower(cic->uuid, uustr);
157 printf("%8s: version %d type %d uuid %s addr %llx dyninfo %llx\n",
158 "COREINFO", cic->version, cic->type, uustr, cic->address, cic->dyninfo);
159 }
160 if (cic->version < 1 ||
161 cic->type != proto_CORETYPE_USER) {
162 warnx("bad coreinfo command");
163 abort();
164 }
165 break;
887d5eed
A
166 }
167 case proto_LC_FILEREF: {
168 const struct proto_fileref_command *frc = (const void *)lc;
cf37c299 169 const char *nm = frc->filename.offset + (char *)lc;
887d5eed
A
170 if (OPTIONS_DEBUG(opt, 2)) {
171 printf("%8s: mem %llx-%llx file %lld-%lld %s/%s '%s'\n",
172 "FREF",
173 frc->vmaddr, frc->vmaddr + frc->vmsize,
cf37c299
A
174 (off_t)frc->fileoff,
175 (off_t)frc->fileoff + (off_t)frc->filesize,
887d5eed 176 str_prot(frc->prot), str_prot(frc->maxprot), nm);
cf37c299 177 }
887d5eed
A
178 switch (FREF_ID_TYPE(frc->flags)) {
179 case kFREF_ID_UUID:
180 case kFREF_ID_MTIMESPEC_LE:
181 case kFREF_ID_NONE:
182 break;
183 default:
184 warnx("unknown fref id type: flags %x", frc->flags);
185 abort();
186 }
cf37c299
A
187 if (nm <= (caddr_t)lc ||
188 nm > (caddr_t)lc + lc->cmdsize ||
189 (off_t)frc->fileoff < 0 || (off_t)frc->filesize < 0) {
190 warnx("bad fileref command");
191 abort();
192 }
193 break;
887d5eed
A
194 }
195 case proto_LC_COREDATA: {
196 const struct proto_coredata_command *cc = (const void *)lc;
197 if (OPTIONS_DEBUG(opt, 2)) {
198 printf("%8s: mem %llx-%llx file %lld-%lld %s/%s flags %x\n",
199 "COREDATA",
200 cc->vmaddr, cc->vmaddr + cc->vmsize,
201 (off_t)cc->fileoff,
202 (off_t)cc->fileoff + (off_t)cc->filesize,
203 str_prot(cc->prot), str_prot(cc->maxprot), cc->flags);
204 }
205 if ((off_t)cc->fileoff < mh->sizeofcmds ||
206 (off_t)cc->filesize < 0) {
207 warnx("bad COREDATA command");
208 abort();
209 }
210 const off_t endoff = (off_t)cc->fileoff + (off_t)cc->filesize;
211 if ((off_t)cc->fileoff > corefilesize || endoff > corefilesize) {
212 /*
213 * We may have run out of space to write the data
214 */
215 warnx("segment command points beyond end of file");
216 }
217 corefilemaxoff = MAX(corefilemaxoff, endoff);
218 break;
219 }
220 case LC_THREAD: {
221 const struct thread_command *tc = (const void *)lc;
222 if (OPTIONS_DEBUG(opt, 2))
cf37c299
A
223 printf("%8s:\n", "THREAD");
224 uint32_t *wbuf = (void *)(tc + 1);
225 do {
226 const uint32_t flavor = *wbuf++;
227 const uint32_t count = *wbuf++;
228
887d5eed 229 if (OPTIONS_DEBUG(opt, 2)) {
cf37c299
A
230 printf(" flavor %u count %u\n", flavor, count);
231 if (count) {
887d5eed 232 bool nl = false;
cf37c299
A
233 for (unsigned k = 0; k < count; k++) {
234 if (0 == (k & 7))
235 printf(" [%3u] ", k);
236 printf("%08x ", *wbuf++);
237 if (7 == (k & 7)) {
238 printf("\n");
239 nl = true;
240 } else
241 nl = false;
242 }
243 if (!nl)
244 printf("\n");
245 }
246 } else
247 wbuf += count;
248
249 if (!VALID_THREAD_STATE_FLAVOR(flavor)) {
250 warnx("bad thread state flavor");
251 abort();
252 }
253 } while ((caddr_t) wbuf < (caddr_t)tc + tc->cmdsize);
254 break;
887d5eed 255 }
cf37c299 256 default:
887d5eed 257 warnx("unknown cmd %u in header", lc->cmd);
cf37c299
A
258 abort();
259 }
260 if (lc->cmdsize)
261 lc = (const void *)((caddr_t)lc + lc->cmdsize);
262 else
263 break;
264 }
887d5eed
A
265 if (corefilemaxoff < corefilesize)
266 warnx("unused data after corefile offset %lld", corefilemaxoff);
267 if (sizeofcmds != mh->sizeofcmds) {
268 warnx("inconsistent mach header %u vs. %u", sizeofcmds, mh->sizeofcmds);
269 abort();
270 }
cf37c299
A
271}
272
273/*
274 * The vanilla Mach-O core file consists of:
275 *
276 * - A Mach-O header of type MH_CORE
277 *
278 * A set of load commands of the following types:
279 *
280 * - LC_SEGMENT{,_64} pointing at memory content in the file,
281 * each chunk consisting of a contiguous region. Regions may be zfod
887d5eed
A
282 * (no file content present).
283 *
284 * - proto_LC_COREDATA pointing at memory content in the file,
285 * each chunk consisting of a contiguous region. Regions may be zfod
cf37c299
A
286 * (no file content present) or content may be compressed (experimental)
287 *
887d5eed 288 * - proto_LC_COREINFO (experimental), pointing at dyld (10.12 onwards)
cf37c299 289 *
887d5eed
A
290 * - proto_LC_FILEREF (experimental) pointing at memory
291 * content to be mapped in from another uuid-tagged file at various offsets
cf37c299
A
292 *
293 * - LC_THREAD commands with state for each thread
294 *
295 * These load commands are followed by the relevant contents of memory,
296 * pointed to by the various commands.
297 */
298
299int
300coredump_write(
301 const task_t task,
302 const int fd,
303 struct regionhead *rhead,
304 const uuid_t aout_uuid,
305 mach_vm_offset_t aout_load_addr,
306 mach_vm_offset_t dyld_aii_addr)
307{
308 struct size_segment_data ssda;
309 bzero(&ssda, sizeof (ssda));
310
311 if (walk_region_list(rhead, region_size_memory, &ssda) < 0) {
312 warnx(0, "cannot count segments");
313 return EX_OSERR;
314 }
315
316 unsigned thread_count = 0;
317 mach_port_t *threads = NULL;
318 kern_return_t ret = task_threads(task, &threads, &thread_count);
319 if (KERN_SUCCESS != ret || thread_count < 1) {
320 err_mach(ret, NULL, "cannot retrieve threads");
321 thread_count = 0;
322 }
323
887d5eed
A
324 if (OPTIONS_DEBUG(opt, 3)) {
325 print_memory_region_header();
326 walk_region_list(rhead, region_print_memory, NULL);
327 printf("\nmach header %lu\n", sizeof (native_mach_header_t));
328 printf("threadcount %u threadsize %lu\n", thread_count, thread_count * sizeof_LC_THREAD());
329 printf("fileref %lu %lu %llu\n", ssda.ssd_fileref.count, ssda.ssd_fileref.headersize, ssda.ssd_fileref.memsize);
330 printf("zfod %lu %lu %llu\n", ssda.ssd_zfod.count, ssda.ssd_zfod.headersize, ssda.ssd_zfod.memsize);
331 printf("vanilla %lu %lu %llu\n", ssda.ssd_vanilla.count, ssda.ssd_vanilla.headersize, ssda.ssd_vanilla.memsize);
332 printf("sparse %lu %lu %llu\n", ssda.ssd_sparse.count, ssda.ssd_sparse.headersize, ssda.ssd_sparse.memsize);
333 }
cf37c299
A
334
335 size_t headersize = sizeof (native_mach_header_t) +
336 thread_count * sizeof_LC_THREAD() +
337 ssda.ssd_fileref.headersize +
338 ssda.ssd_zfod.headersize +
339 ssda.ssd_vanilla.headersize +
340 ssda.ssd_sparse.headersize;
887d5eed 341 if (opt->extended)
cf37c299
A
342 headersize += sizeof (struct proto_coreinfo_command);
343
344 void *header = calloc(1, headersize);
345 if (NULL == header)
346 errx(EX_OSERR, "out of memory for header");
347
348 native_mach_header_t *mh = make_corefile_mach_header(header);
349 struct load_command *lc = (void *)(mh + 1);
350
887d5eed 351 if (opt->extended) {
cf37c299
A
352 const struct proto_coreinfo_command *cc =
353 make_coreinfo_command(mh, lc, aout_uuid, aout_load_addr, dyld_aii_addr);
354 lc = (void *)((caddr_t)cc + cc->cmdsize);
355 }
356
887d5eed 357 if (opt->verbose) {
cf37c299
A
358 const unsigned long fileref_count = ssda.ssd_fileref.count;
359 const unsigned long segment_count = fileref_count +
360 ssda.ssd_zfod.count + ssda.ssd_vanilla.count + ssda.ssd_sparse.count;
887d5eed 361 printf("Writing %lu segments", segment_count);
cf37c299
A
362 if (0 != fileref_count)
363 printf(" (including %lu file reference%s (%lu bytes))",
364 fileref_count, 1 == fileref_count ? "" : "s",
365 ssda.ssd_fileref.headersize);
366 printf("\n");
367 }
368
887d5eed
A
369 mach_vm_offset_t pagesize = ((mach_vm_offset_t)1 << pageshift_host);
370 mach_vm_offset_t pagemask = pagesize - 1;
cf37c299
A
371
372 struct write_segment_data wsda = {
373 .wsd_task = task,
374 .wsd_mh = mh,
375 .wsd_lc = lc,
376 .wsd_fd = fd,
887d5eed
A
377 .wsd_nocache = false,
378 .wsd_foffset = ((mach_vm_offset_t)headersize + pagemask) & ~pagemask,
cf37c299
A
379 .wsd_nwritten = 0,
380 };
381
887d5eed 382 int ecode = 0;
cf37c299
A
383 if (0 != walk_region_list(rhead, region_write_memory, &wsda))
384 ecode = EX_IOERR;
385
386 del_region_list(rhead);
387
388 struct thread_command *tc = (void *)wsda.wsd_lc;
389
390 for (unsigned t = 0; t < thread_count; t++) {
391 dump_thread_state(mh, tc, threads[t]);
392 mach_port_deallocate(mach_task_self(), threads[t]);
393 tc = (void *)((caddr_t)tc + tc->cmdsize);
394 }
395
396 /*
397 * Even if we've run out of space, try our best to
398 * write out the header.
399 */
887d5eed 400 if (0 != bounded_pwrite(fd, header, headersize, 0, &wsda.wsd_nocache, NULL))
cf37c299 401 ecode = EX_IOERR;
887d5eed
A
402 if (0 == ecode && headersize != sizeof (*mh) + mh->sizeofcmds)
403 ecode = EX_SOFTWARE;
404 if (0 == ecode)
cf37c299
A
405 wsda.wsd_nwritten += headersize;
406
407 validate_core_header(mh, wsda.wsd_foffset);
408
409 if (ecode)
410 warnx("failed to write core file correctly");
411 else if (opt->verbose) {
412 hsize_str_t hsz;
413 printf("Wrote %s to corefile ", str_hsize(hsz, wsda.wsd_nwritten));
414 printf("(memory image %s", str_hsize(hsz, ssda.ssd_vanilla.memsize));
415 if (ssda.ssd_sparse.memsize)
416 printf("+%s", str_hsize(hsz, ssda.ssd_sparse.memsize));
417 if (ssda.ssd_fileref.memsize)
418 printf(", referenced %s", str_hsize(hsz, ssda.ssd_fileref.memsize));
419 if (ssda.ssd_zfod.memsize)
420 printf(", zfod %s", str_hsize(hsz, ssda.ssd_zfod.memsize));
421 printf(")\n");
422 }
423 free(header);
424 return ecode;
425}
426
887d5eed
A
427static void
428addfileref(struct region *r, const struct libent *le, const char *nm)
429{
430 r->r_fileref = calloc(1, sizeof (*r->r_fileref));
431 if (r->r_fileref) {
432 if (le) {
433 assert(NULL == nm);
434 r->r_fileref->fr_libent = le;
435 r->r_fileref->fr_pathname = le->le_pathname;
436 } else {
437 assert(NULL == le);
438 r->r_fileref->fr_pathname = strdup(nm);
439 }
440 r->r_fileref->fr_offset = r->r_pageinfo.offset;
441 r->r_op = &fileref_ops;
442 }
443}
444
445/*
446 * Once all the info about the shared cache (filerefs) and the information from
447 * dyld (filerefs and subregions), take one last look for mappings
448 * of filesystem content to convert to additional filerefs.
449 *
450 * By default we are pessimistic: read-only mappings on read-only root.
451 */
452static walk_return_t
453label_mapped_files(struct region *r, void *arg)
454{
455 const struct proc_bsdinfo *pbi = arg;
456
457 if (r->r_fileref || r->r_insharedregion || r->r_incommregion || r->r_inzfodregion)
458 return WALK_CONTINUE;
459 if (r->r_nsubregions)
460 return WALK_CONTINUE;
461 if (!r->r_info.external_pager)
462 return WALK_CONTINUE;
463 if (!opt->allfilerefs) {
464 /* must be mapped without write permission */
465 if (0 != (r->r_info.protection & VM_PROT_WRITE))
466 return WALK_CONTINUE;
467 }
468
469 char pathbuf[MAXPATHLEN+1];
470 pathbuf[0] = '\0';
471 int len = proc_regionfilename(pbi->pbi_pid, R_ADDR(r), pathbuf, sizeof (pathbuf)-1);
472 if (len <= 0 || len > MAXPATHLEN)
473 return WALK_CONTINUE;
474 pathbuf[len] = 0;
475
476#if 0
477 /*
478 * On the desktop, only refer to files beginning with particular
479 * prefixes to increase the likelihood that we'll be able to
480 * find the content later on.
481 *
482 * XXX Not practical with a writable root, but helpful for testing.
483 */
484 static const char *white[] = {
485 "/System",
486 "/Library",
487 "/usr",
488 };
489 const unsigned nwhite = sizeof (white) / sizeof (white[0]);
490 bool skip = true;
491 for (unsigned i = 0; skip && i < nwhite; i++)
492 skip = 0 != strncmp(white[i], pathbuf, strlen(white[i]));
493 if (skip) {
494 if (OPTIONS_DEBUG(opt, 3))
495 printf("\t(%s not included)\n", pathbuf);
496 return WALK_CONTINUE;
497 }
498 static const char *black[] = {
499 "/System/Library/Caches",
500 "/Library/Caches",
501 "/usr/local",
502 };
503 const unsigned nblack = sizeof (black) / sizeof (black[0]);
504 for (unsigned i = 0; !skip && i < nblack; i++)
505 skip = 0 == strncmp(black[i], pathbuf, strlen(black[i]));
506 if (skip) {
507 if (OPTIONS_DEBUG(opt, 3))
508 printf("\t(%s excluded)\n", pathbuf);
509 return WALK_CONTINUE;
510 }
511#endif
512
513 struct statfs stfs;
514 if (-1 == statfs(pathbuf, &stfs)) {
515 switch (errno) {
516 case EACCES:
517 case EPERM:
518 case ENOENT:
519 break;
520 default:
521 warnc(errno, "statfs: %s", pathbuf);
522 break;
523 }
524 return WALK_CONTINUE;
525 }
526
527 do {
528 if (OPTIONS_DEBUG(opt, 2))
529 printr(r, "found mapped file %s\n", pathbuf);
530 if (!opt->allfilerefs) {
531 if ((stfs.f_flags & MNT_ROOTFS) != MNT_ROOTFS)
532 break; // must be on the root filesystem
533 if ((stfs.f_flags & MNT_RDONLY) != MNT_RDONLY)
534 break; // must be on a read-only filesystem
535 }
536 if (OPTIONS_DEBUG(opt, 2))
537 print_memory_region(r);
538 addfileref(r, NULL, pathbuf);
539 } while (0);
540
541 return WALK_CONTINUE;
542}
543
cf37c299 544int
887d5eed 545coredump(task_t task, int fd, const struct proc_bsdinfo *__unused pbi)
cf37c299
A
546{
547 /* this is the shared cache id, if any */
548 uuid_t sc_uuid;
549 uuid_clear(sc_uuid);
550
887d5eed
A
551 dyld_process_info dpi = NULL;
552 if (opt->extended) {
553 dpi = get_task_dyld_info(task);
554 if (dpi) {
555 get_sc_uuid(dpi, sc_uuid);
556 }
557 }
cf37c299
A
558
559 /* this group is for LC_COREINFO */
560 mach_vm_offset_t dyld_addr = 0; // all_image_infos -or- dyld mach header
561 mach_vm_offset_t aout_load_addr = 0;
562 uuid_t aout_uuid;
563 uuid_clear(aout_uuid);
564
565 /*
566 * Walk the address space
567 */
568 int ecode = 0;
569 struct regionhead *rhead = coredump_prepare(task, sc_uuid);
570 if (NULL == rhead) {
571 ecode = EX_OSERR;
572 goto done;
573 }
574
887d5eed 575 if (OPTIONS_DEBUG(opt, 1))
cf37c299 576 printf("Optimizing dump content\n");
887d5eed
A
577 walk_region_list(rhead, simple_region_optimization, NULL);
578
579 if (dpi) {
580 /*
581 * Snapshot dyld's info ..
582 */
583 if (!libent_build_nametable(task, dpi))
584 warnx("error parsing dyld data => ignored");
585 else {
586 /*
587 * Find the a.out load address and uuid, and the dyld mach header for the coreinfo
588 */
589 const struct libent *le;
590 if (NULL != (le = libent_lookup_first_bytype(MH_EXECUTE))) {
591 aout_load_addr = le->le_mhaddr;
592 uuid_copy(aout_uuid, le->le_uuid);
593 }
594 if (NULL != (le = libent_lookup_first_bytype(MH_DYLINKER))) {
595 dyld_addr = le->le_mhaddr;
596 }
597
598 /*
599 * Use dyld's view of what's being used in the address
600 * space to shrink the dump.
601 */
602 if (OPTIONS_DEBUG(opt, 1))
603 printf("Decorating dump with dyld-derived data\n");
604 if (0 == walk_region_list(rhead, decorate_memory_region, (void *)dpi)) {
605 if (OPTIONS_DEBUG(opt, 1))
606 printf("Sparse dump optimization(s)\n");
607 walk_region_list(rhead, sparse_region_optimization, NULL);
608 } else {
609 walk_region_list(rhead, undecorate_memory_region, NULL);
610 warnx("error parsing dyld data => ignored");
611 }
612 }
613 free_task_dyld_info(dpi);
614 }
615
616 /*
617 * Hunt for any memory mapped files that we can include by reference
618 * Depending on whether the bsd part of the task is still present
619 * we might be able to determine filenames of other regions mapping
620 * them here - this allows fonts, images, and other read-only content
621 * to be converted into file references, further reducing the size
622 * of the dump.
623 *
624 * NOTE: Even though the corpse snapshots the VM, the filesystem is
625 * not correspondingly snapshotted and thus may mutate while the dump
626 * proceeds - so be pessimistic about inclusion.
627 */
628 if (opt->extended && NULL != pbi) {
629 if (OPTIONS_DEBUG(opt, 1))
630 printf("Mapped file optimization\n");
631 walk_region_list(rhead, label_mapped_files, (void *)pbi);
632 }
633
634 if (OPTIONS_DEBUG(opt, 1))
cf37c299 635 printf("Optimization(s) done\n");
887d5eed 636
cf37c299
A
637done:
638 if (0 == ecode)
639 ecode = coredump_write(task, fd, rhead, aout_uuid, aout_load_addr, dyld_addr);
640 return ecode;
641}
642
cf37c299
A
643struct find_shared_cache_args {
644 task_t fsc_task;
645 vm_object_id_t fsc_object_id;
646 vm32_object_id_t fsc_region_object_id;
647 uuid_t fsc_uuid;
648 const struct libent *fsc_le;
649 int fsc_fd;
650};
651
652/*
653 * This is "find the objid of the first shared cache" in the shared region.
654 */
655static walk_return_t
656find_shared_cache(struct region *r, void *arg)
657{
658 struct find_shared_cache_args *fsc = arg;
659
660 if (!r->r_insharedregion)
661 return WALK_CONTINUE; /* wrong address range! */
662 if (0 != r->r_info.user_tag)
663 return WALK_CONTINUE; /* must be tag zero */
664 if ((VM_PROT_READ | VM_PROT_EXECUTE) != r->r_info.protection ||
665 r->r_info.protection != r->r_info.max_protection)
666 return WALK_CONTINUE; /* must be r-x / r-x */
667 if (r->r_pageinfo.offset != 0)
668 return WALK_CONTINUE; /* must map beginning of file */
669
887d5eed 670 if (OPTIONS_DEBUG(opt, 1)) {
cf37c299 671 hsize_str_t hstr;
887d5eed 672 printr(r, "examining %s shared cache candidate\n", str_hsize(hstr, R_SIZE(r)));
cf37c299
A
673 }
674
675 struct copied_dyld_cache_header *ch;
676 mach_msg_type_number_t chlen = sizeof (*ch);
677 kern_return_t ret = mach_vm_read(fsc->fsc_task, R_ADDR(r), sizeof (*ch), (vm_offset_t *)&ch, &chlen);
678
679 if (KERN_SUCCESS != ret) {
887d5eed 680 err_mach(ret, NULL, "mach_vm_read() candidate shared region header");
cf37c299
A
681 return WALK_CONTINUE;
682 }
683
684 uuid_t scuuid;
685 if (get_uuid_from_shared_cache_mapping(ch, chlen, scuuid) &&
686 uuid_compare(scuuid, fsc->fsc_uuid) == 0) {
887d5eed 687 if (OPTIONS_DEBUG(opt, 1)) {
cf37c299
A
688 uuid_string_t uustr;
689 uuid_unparse_lower(fsc->fsc_uuid, uustr);
690 printr(r, "found shared cache %s here\n", uustr);
691 }
692 if (!r->r_info.external_pager) {
887d5eed 693 if (OPTIONS_DEBUG(opt, 1))
cf37c299
A
694 printf("Hmm. Found shared cache magic# + uuid, but not externally paged?\n");
695#if 0
696 return WALK_CONTINUE; /* should be "paged" from a file */
697#endif
698 }
699 // This is the ID associated with the first page of the mapping
700 fsc->fsc_object_id = r->r_pageinfo.object_id;
701 // This is the ID associated with the region
702 fsc->fsc_region_object_id = r->r_info.object_id;
703 }
704 mach_vm_deallocate(mach_task_self(), (vm_offset_t)ch, chlen);
705 if (fsc->fsc_object_id) {
887d5eed 706 if (OPTIONS_DEBUG(opt, 3)) {
cf37c299
A
707 uuid_string_t uu;
708 uuid_unparse_lower(fsc->fsc_uuid, uu);
709 printf("Shared cache objid %llx uuid %s\n",
710 fsc->fsc_object_id, uu);
711 }
712 return WALK_TERMINATE;
713 }
714 return WALK_CONTINUE;
715}
716
887d5eed 717static bool
cf37c299
A
718compare_region_with_shared_cache(const struct region *r, struct find_shared_cache_args *fsc)
719{
720 struct stat st;
721 if (-1 == fstat(fsc->fsc_fd, &st)) {
887d5eed
A
722 if (OPTIONS_DEBUG(opt, 1))
723 printr(r, "cannot fstat %s - %s\n",
cf37c299
A
724 fsc->fsc_le->le_filename, strerror(errno));
725 return false;
726 }
887d5eed 727 void *file = mmap(NULL, R_SIZEOF(r), PROT_READ, MAP_PRIVATE, fsc->fsc_fd, r->r_pageinfo.offset);
cf37c299 728 if ((void *)-1L == file) {
887d5eed 729 if (OPTIONS_DEBUG(opt, 1))
cf37c299
A
730 printr(r, "mmap %s - %s\n", fsc->fsc_le->le_filename, strerror(errno));
731 return false;
732 }
887d5eed 733 madvise(file, R_SIZEOF(r), MADV_SEQUENTIAL);
cf37c299
A
734
735 vm_offset_t data = 0;
736 mach_msg_type_number_t data_count;
737 const kern_return_t kr = mach_vm_read(fsc->fsc_task, R_ADDR(r), R_SIZE(r), &data, &data_count);
738
739 if (KERN_SUCCESS != kr || data_count < R_SIZE(r)) {
740 err_mach(kr, r, "mach_vm_read()");
887d5eed 741 munmap(file, R_SIZEOF(r));
cf37c299
A
742 return false;
743 }
744
745 mach_vm_size_t cmpsize = data_count;
746
747#ifdef RDAR_23744374
748 /*
749 * Now we have the corresponding regions mapped, we should be
750 * able to compare them. There's just one last twist that relates
751 * to heterogenous pagesize systems: rdar://23744374
752 */
753 if (st.st_size < (off_t)(r->r_pageinfo.offset + cmpsize) &&
754 pageshift_host < pageshift_app) {
755 /*
756 * Looks like we're about to map close to the end of the object.
757 * Check what's really mapped there and reduce the size accordingly.
758 */
759 if (!is_actual_size(fsc->fsc_task, r, &cmpsize)) {
887d5eed 760 if (OPTIONS_DEBUG(opt, 3))
cf37c299
A
761 printr(r, "narrowing the comparison (%llu "
762 "-> %llu)\n", R_SIZE(r), cmpsize);
763 }
764 }
765#endif
766
767 mach_vm_behavior_set(mach_task_self(), data, data_count, VM_BEHAVIOR_SEQUENTIAL);
768
887d5eed 769 const bool thesame = memcmp(file, (void *)data, (size_t)cmpsize) == 0;
cf37c299
A
770#if 0
771 if (!thesame) {
772 int diffcount = 0;
773 int samecount = 0;
774 const char *f = file;
775 const char *d = (void *)data;
776 for (mach_vm_size_t off = 0; off < cmpsize; off += 4096) {
777 if (memcmp(f, d, 4096) != 0) {
778 diffcount++;
779 } else samecount++;
780 f += 4096;
781 d += 4096;
782 }
783 if (diffcount)
784 printr(r, "%d of %d pages different\n", diffcount, diffcount + samecount);
785 }
786#endif
787 mach_vm_deallocate(mach_task_self(), data, data_count);
887d5eed 788 munmap(file, R_SIZEOF(r));
cf37c299 789
887d5eed 790 if (!thesame && OPTIONS_DEBUG(opt, 3))
cf37c299
A
791 printr(r, "mapped file (%s) region is modified\n", fsc->fsc_le->le_filename);
792 return thesame;
793}
794
795static walk_return_t
796label_shared_cache(struct region *r, void *arg)
797{
798 struct find_shared_cache_args *fsc = arg;
799
800 if (!r->r_insharedregion)
801 return WALK_CONTINUE;
802 if (!r->r_info.external_pager)
803 return WALK_CONTINUE;
804 if (r->r_pageinfo.object_id != fsc->fsc_object_id) {
805 /* wrong object, or first page already modified */
806 return WALK_CONTINUE;
807 }
808 if (((r->r_info.protection | r->r_info.max_protection) & VM_PROT_WRITE) != 0) {
887d5eed
A
809 /* potentially writable, but was it written? */
810 if (0 != r->r_info.pages_dirtied)
811 return WALK_CONTINUE;
812 if (0 != r->r_info.pages_swapped_out)
813 return WALK_CONTINUE;
814 if (0 != r->r_info.pages_resident && !r->r_info.external_pager)
815 return WALK_CONTINUE;
816 if (OPTIONS_DEBUG(opt, 1))
817 printr(r, "verifying shared cache content against memory image\n");
cf37c299
A
818 if (!compare_region_with_shared_cache(r, fsc)) {
819 /* bits don't match */
887d5eed
A
820 if (OPTIONS_DEBUG(opt, 1))
821 printr(r, "hmm .. mismatch: using memory image\n");
cf37c299
A
822 return WALK_CONTINUE;
823 }
887d5eed 824 }
cf37c299
A
825
826 /*
827 * This mapped file segment will be represented as a reference
887d5eed 828 * to the file, rather than as a copy of the mapped file.
cf37c299 829 */
887d5eed
A
830 addfileref(r, libent_lookup_byuuid(fsc->fsc_uuid), NULL);
831 return WALK_CONTINUE;
cf37c299 832}
cf37c299
A
833
834struct regionhead *
835coredump_prepare(task_t task, uuid_t sc_uuid)
836{
837 struct regionhead *rhead = build_region_list(task);
838
887d5eed
A
839 if (OPTIONS_DEBUG(opt, 2)) {
840 printf("Region list built\n");
cf37c299
A
841 print_memory_region_header();
842 walk_region_list(rhead, region_print_memory, NULL);
843 }
844
845 if (uuid_is_null(sc_uuid))
846 return rhead;
847
848 /*
849 * Name the shared cache, if we can
850 */
851 char *nm = shared_cache_filename(sc_uuid);
852 const struct libent *le;
853
854 if (NULL != nm)
887d5eed 855 le = libent_insert(nm, sc_uuid, 0, NULL, NULL, 0);
cf37c299 856 else {
887d5eed 857 libent_insert("(anonymous shared cache)", sc_uuid, 0, NULL, NULL, 0);
cf37c299 858 if (opt->verbose){
887d5eed
A
859 printf("Warning: cannot name the shared cache ");
860 if (OPTIONS_DEBUG(opt, 1)) {
861 uuid_string_t uustr;
862 uuid_unparse_lower(sc_uuid, uustr);
863 printf("(%s) ", uustr);
864 }
865 printf("- dump may be large!\n");
cf37c299 866 }
887d5eed 867 return rhead;
cf37c299
A
868 }
869
887d5eed 870 if (opt->extended) {
cf37c299
A
871 /*
872 * See if we can replace entire regions with references to the shared cache
873 * by looking at the VM meta-data about those regions.
874 */
887d5eed 875 if (OPTIONS_DEBUG(opt, 1)) {
cf37c299
A
876 uuid_string_t uustr;
877 uuid_unparse_lower(sc_uuid, uustr);
878 printf("Searching for shared cache with uuid %s\n", uustr);
879 }
880
881 /*
882 * Identify the regions mapping the shared cache by comparing the UUID via
883 * dyld with the UUID of likely-looking mappings in the right address range
884 */
885 struct find_shared_cache_args fsca;
886 bzero(&fsca, sizeof (fsca));
887 fsca.fsc_task = task;
888 uuid_copy(fsca.fsc_uuid, sc_uuid);
889 fsca.fsc_fd = -1;
890
891 walk_region_list(rhead, find_shared_cache, &fsca);
892
893 if (0 == fsca.fsc_object_id) {
894 printf("Cannot identify the shared cache region(s) => ignored\n");
895 } else {
896 if (opt->verbose)
897 printf("Referenced %s\n", nm);
898 fsca.fsc_le = le;
887d5eed
A
899 fsca.fsc_fd = open(fsca.fsc_le->le_pathname, O_RDONLY);
900 if (-1 == fsca.fsc_fd)
901 errc(EX_SOFTWARE, errno, "open %s", fsca.fsc_le->le_pathname);
902 else {
903 walk_region_list(rhead, label_shared_cache, &fsca);
904 close(fsca.fsc_fd);
905 }
cf37c299
A
906 free(nm);
907 }
908 }
cf37c299
A
909
910 return rhead;
911}