dyld-832.7.1.tar.gz
[apple/dyld.git] / dyld3 / MachOAnalyzer.h
1 /*
2 * Copyright (c) 2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 #ifndef MachOAnalyzer_h
25 #define MachOAnalyzer_h
26
27
28 #include "MachOLoaded.h"
29 #include "Array.h"
30 #include "ClosureFileSystem.h"
31
32
33 namespace dyld3 {
34
35 // Extra functionality on loaded mach-o files only used during closure building
36 struct VIS_HIDDEN MachOAnalyzer : public MachOLoaded
37 {
38 // protected members of subclass promoted to public here
39 using MachOLoaded::SegmentInfo;
40 using MachOLoaded::SectionInfo;
41 using MachOLoaded::forEachSegment;
42 using MachOLoaded::forEachSection;
43 using MachOLoaded::forEachDependentDylib;
44 using MachOLoaded::getDylibInstallName;
45 using MachOLoaded::FoundSymbol;
46 using MachOLoaded::findExportedSymbol;
47 using MachOLoaded::forEachGlobalSymbol;
48 using MachOLoaded::forEachLocalSymbol;
49 using MachOFile::canBePlacedInDyldCache;
50 using MachOFile::forEachLoadCommand;
51 using MachOFile::removeLoadCommand;
52
53 enum class Rebase {
54 unknown,
55 pointer32,
56 pointer64,
57 textPCrel32,
58 textAbsolute32,
59 };
60
61 static bool loadFromBuffer(Diagnostics& diag, const closure::FileSystem& fileSystem,
62 const char* path, const GradedArchs& archs, Platform platform,
63 closure::LoadedFileInfo& info);
64 static closure::LoadedFileInfo load(Diagnostics& diag, const closure::FileSystem& fileSystem,
65 const char* logicalPath, const GradedArchs& archs, Platform platform, char realerPath[MAXPATHLEN]);
66 static const MachOAnalyzer* validMainExecutable(Diagnostics& diag, const mach_header* mh, const char* path, uint64_t sliceLength,
67 const GradedArchs& archs, Platform platform);
68
69 typedef void (^ExportsCallback)(const char* symbolName, uint64_t imageOffset, uint64_t flags,
70 uint64_t other, const char* importName, bool& stop);
71 bool validMachOForArchAndPlatform(Diagnostics& diag, size_t mappedSize, const char* path, const GradedArchs& archs, Platform platform, bool isOSBinary) const;
72
73 // Caches data useful for converting from raw data to VM addresses
74 struct VMAddrConverter {
75 uint64_t preferredLoadAddress = 0;
76 intptr_t slide = 0;
77 uint16_t chainedPointerFormat = 0;
78 bool contentRebased = false;
79 #if !(BUILDING_LIBDYLD || BUILDING_DYLD)
80 enum class SharedCacheFormat : uint8_t {
81 none = 0,
82 v2_x86_64_tbi = 1,
83 v3 = 2
84 };
85 SharedCacheFormat sharedCacheChainedPointerFormat = SharedCacheFormat::none;
86 #endif
87
88 uint64_t convertToVMAddr(uint64_t v) const;
89 };
90
91 VMAddrConverter makeVMAddrConverter(bool contentRebased) const;
92
93 uint64_t mappedSize() const;
94 bool hasObjC() const;
95 bool hasPlusLoadMethod(Diagnostics& diag) const;
96 bool usesObjCGarbageCollection() const;
97 bool isSwiftLibrary() const;
98 uint64_t preferredLoadAddress() const;
99 void forEachRPath(void (^callback)(const char* rPath, bool& stop)) const;
100 bool hasProgramVars(Diagnostics& diag, uint32_t& progVarsOffset) const;
101 void forEachCDHash(void (^handler)(const uint8_t cdHash[20])) const;
102 bool hasCodeSignature(uint32_t& fileOffset, uint32_t& size) const;
103 bool usesLibraryValidation() const;
104 bool isRestricted() const;
105 bool getEntry(uint64_t& offset, bool& usesCRT) const;
106 bool isSlideable() const;
107 bool hasInitializer(Diagnostics& diag, const VMAddrConverter& vmAddrConverter, const void* dyldCache=nullptr) const;
108 void forEachInitializerPointerSection(Diagnostics& diag, void (^callback)(uint32_t sectionOffset, uint32_t sectionSize, const uint8_t* content, bool& stop)) const;
109 void forEachInitializer(Diagnostics& diag, const VMAddrConverter& vmAddrConverter, void (^callback)(uint32_t offset), const void* dyldCache=nullptr) const;
110 bool hasTerminators(Diagnostics& diag, const VMAddrConverter& vmAddrConverter) const;
111 void forEachTerminator(Diagnostics& diag, const VMAddrConverter& vmAddrConverter, void (^callback)(uint32_t offset)) const;
112 void forEachDOFSection(Diagnostics& diag, void (^callback)(uint32_t offset)) const;
113 uint32_t segmentCount() const;
114 void forEachExportedSymbol(Diagnostics& diag, ExportsCallback callback) const;
115 void forEachWeakDef(Diagnostics& diag, void (^callback)(bool strongDef, uint32_t dataSegIndex, uint64_t dataSegOffset,
116 uint64_t addend, const char* symbolName, bool& stop)) const;
117 void forEachIndirectPointer(Diagnostics& diag, void (^handler)(uint64_t pointerAddress, bool bind, int bindLibOrdinal,
118 const char* bindSymbolName, bool bindWeakImport, bool bindLazy, bool selfModifyingStub, bool& stop)) const;
119 void forEachInterposingSection(Diagnostics& diag, void (^handler)(uint64_t vmOffset, uint64_t vmSize, bool& stop)) const;
120 const void* content(uint64_t vmOffset);
121 void forEachLocalReloc(void (^handler)(uint64_t runtimeOffset, bool& stop)) const;
122 void forEachExternalReloc(void (^handler)(uint64_t runtimeOffset, int libOrdinal, const char* symbolName, bool& stop)) const;
123
124 const void* getRebaseOpcodes(uint32_t& size) const;
125 const void* getBindOpcodes(uint32_t& size) const;
126 const void* getLazyBindOpcodes(uint32_t& size) const;
127 const void* getSplitSeg(uint32_t& size) const;
128 bool hasSplitSeg() const;
129 bool isSplitSegV1() const;
130 bool isSplitSegV2() const;
131 uint64_t segAndOffsetToRuntimeOffset(uint8_t segIndex, uint64_t segOffset) const;
132 bool hasLazyPointers(uint32_t& runtimeOffset, uint32_t& size) const;
133 void forEachRebase(Diagnostics& diag, bool ignoreLazyPointer, void (^callback)(uint64_t runtimeOffset, bool& stop)) const;
134 void forEachRebase(Diagnostics& diag, void (^callback)(uint64_t runtimeOffset, bool isLazyPointerRebase, bool& stop)) const;
135 void forEachTextRebase(Diagnostics& diag, void (^callback)(uint64_t runtimeOffset, bool& stop)) const;
136 void forEachBind(Diagnostics& diag, void (^callback)(uint64_t runtimeOffset, int libOrdinal, const char* symbolName,
137 bool weakImport, bool lazyBind, uint64_t addend, bool& stop),
138 void (^strongHandler)(const char* symbolName)) const;
139 void forEachBind(Diagnostics& diag, void (^callback)(uint64_t runtimeOffset, int libOrdinal, uint8_t type, const char* symbolName,
140 bool weakImport, bool lazyBind, uint64_t addend, bool& stop),
141 void (^strongHandler)(const char* symbolName)) const;
142 void forEachChainedFixupTarget(Diagnostics& diag, void (^callback)(int libOrdinal, const char* symbolName, uint64_t addend, bool weakImport, bool& stop)) const;
143 void forEachRebase(Diagnostics& diag, void (^handler)(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
144 bool segIndexSet, uint32_t pointerSize, uint8_t segmentIndex, uint64_t segmentOffset, Rebase kind, bool& stop)) const;
145 void forEachBind(Diagnostics& diag, void (^handler)(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
146 bool segIndexSet, bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal,
147 uint32_t pointerSize, uint8_t segmentIndex, uint64_t segmentOffset,
148 uint8_t type, const char* symbolName, bool weakImport, bool lazyBind, uint64_t addend, bool& stop),
149 void (^strongHandler)(const char* symbolName)) const;
150 bool canBePlacedInDyldCache(const char* path, void (^failureReason)(const char*)) const;
151 bool canHavePrecomputedDlopenClosure(const char* path, void (^failureReason)(const char*)) const;
152 #if BUILDING_APP_CACHE_UTIL
153 bool canBePlacedInKernelCollection(const char* path, void (^failureReason)(const char*)) const;
154 #endif
155 bool usesClassicRelocationsInKernelCollection() const;
156 uint32_t loadCommandsFreeSpace() const;
157 bool hasStompedLazyOpcodes() const;
158
159 #if DEBUG
160 void validateDyldCacheDylib(Diagnostics& diag, const char* path) const;
161 #endif
162 void withChainStarts(Diagnostics& diag, uint64_t startsStructOffsetHint, void (^callback)(const dyld_chained_starts_in_image*)) const;
163 uint64_t chainStartsOffset() const;
164 uint16_t chainedPointerFormat() const;
165 static uint16_t chainedPointerFormat(const dyld_chained_fixups_header* chainHeader);
166 bool hasUnalignedPointerFixups() const;
167 const dyld_chained_fixups_header* chainedFixupsHeader() const;
168 bool hasFirmwareChainStarts(uint16_t* pointerFormat, uint32_t* startsCount, const uint32_t** starts) const;
169 bool isOSBinary(int fd, uint64_t sliceOffset, uint64_t sliceSize) const; // checks if binary is codesigned to be part of the OS
170 static bool sliceIsOSBinary(int fd, uint64_t sliceOffset, uint64_t sliceSize);
171
172 const MachOAnalyzer* remapIfZeroFill(Diagnostics& diag, const closure::FileSystem& fileSystem, closure::LoadedFileInfo& info) const;
173
174 struct ObjCInfo {
175 uint32_t selRefCount;
176 uint32_t classDefCount;
177 uint32_t protocolDefCount;
178 };
179 ObjCInfo getObjCInfo() const;
180
181 // This optionally caches a list of sections for lookup
182 struct SectionCache {
183 private:
184 SectionInfo buffer[2];
185
186 public:
187 SectionCache(const MachOAnalyzer* ma) : ma(ma) { }
188
189 bool findSectionForVMAddr(uint64_t vmAddr, bool (^sectionHandler)(const SectionInfo& sectionInfo));
190
191 const MachOAnalyzer* ma = nullptr;
192 dyld3::OverflowSafeArray<SectionInfo> sectionInfos = { buffer, sizeof(buffer) / sizeof(buffer[0]) };
193 };
194
195 struct ObjCClassInfo {
196 // These fields are all present on the objc_class_t struct
197 uint64_t isaVMAddr = 0;
198 uint64_t superclassVMAddr = 0;
199 //uint64_t methodCacheBuckets;
200 uint64_t methodCacheVMAddr = 0;
201 uint64_t dataVMAddr = 0;
202
203 // This field is only present if this is a Swift object, ie, has the Swift
204 // fast bits set
205 uint32_t swiftClassFlags = 0;
206
207 // These are taken from the low bits of the dataVMAddr value
208 bool isSwiftLegacy = false;
209 bool isSwiftStable = false;
210
211 // Cache the data to convert vmAddr's
212 MachOAnalyzer::VMAddrConverter vmAddrConverter;
213
214 // These are from the class_ro_t which data points to
215 enum class ReadOnlyDataField {
216 name,
217 baseProtocols,
218 baseMethods,
219 baseProperties,
220 flags
221 };
222
223 uint64_t getReadOnlyDataField(ReadOnlyDataField field, uint32_t pointerSize) const;
224 uint64_t nameVMAddr(uint32_t pointerSize) const {
225 return getReadOnlyDataField(ReadOnlyDataField::name, pointerSize);
226 }
227 uint64_t baseProtocolsVMAddr(uint32_t pointerSize) const {
228 return getReadOnlyDataField(ReadOnlyDataField::baseProtocols, pointerSize);
229 }
230 uint64_t baseMethodsVMAddr(uint32_t pointerSize) const {
231 return getReadOnlyDataField(ReadOnlyDataField::baseMethods, pointerSize);
232 }
233 uint64_t basePropertiesVMAddr(uint32_t pointerSize) const {
234 return getReadOnlyDataField(ReadOnlyDataField::baseProperties, pointerSize);
235 }
236 uint64_t flags(uint32_t pointerSize) const {
237 return getReadOnlyDataField(ReadOnlyDataField::flags, pointerSize);
238 }
239
240 // These are embedded in the Mach-O itself by the compiler
241 enum FastDataBits {
242 FAST_IS_SWIFT_LEGACY = 0x1,
243 FAST_IS_SWIFT_STABLE = 0x2
244 };
245
246 // These are embedded by the Swift compiler in the swiftClassFlags field
247 enum SwiftClassFlags {
248 isSwiftPreStableABI = 0x1
249 };
250
251 // Note this is taken from the objc runtime
252 bool isUnfixedBackwardDeployingStableSwift() const {
253 // Only classes marked as Swift legacy need apply.
254 if (!isSwiftLegacy) return false;
255
256 // Check the true legacy vs stable distinguisher.
257 // The low bit of Swift's ClassFlags is SET for true legacy
258 // and UNSET for stable pretending to be legacy.
259 bool isActuallySwiftLegacy = (swiftClassFlags & isSwiftPreStableABI) != 0;
260 return !isActuallySwiftLegacy;
261 }
262 };
263
264 struct ObjCMethodList {
265 // This matches the bits in the objc runtime
266 enum : uint32_t {
267 methodListIsUniqued = 0x1,
268 methodListIsSorted = 0x2,
269
270 // The size is bits 2 through 16 of the entsize field
271 // The low 2 bits are uniqued/sorted as above. The upper 16-bits
272 // are reserved for other flags
273 methodListSizeMask = 0x0000FFFC
274 };
275 };
276
277 struct ObjCImageInfo {
278 uint32_t version;
279 uint32_t flags;
280
281 // FIXME: Put this somewhere objc can see it.
282 enum : uint32_t {
283 dyldPreoptimized = 1 << 7
284 };
285 };
286
287 struct ObjCMethod {
288 uint64_t nameVMAddr; // & SEL
289 uint64_t typesVMAddr; // & const char *
290 uint64_t impVMAddr; // & IMP
291
292 // We also need to know where the reference to the nameVMAddr was
293 // This is so that we know how to rebind that location
294 uint64_t nameLocationVMAddr;
295 };
296
297 struct ObjCProperty {
298 uint64_t nameVMAddr; // & const char *
299 uint64_t attributesVMAddr; // & const char *
300 };
301
302 struct ObjCCategory {
303 uint64_t nameVMAddr;
304 uint64_t clsVMAddr;
305 uint64_t instanceMethodsVMAddr;
306 uint64_t classMethodsVMAddr;
307 uint64_t protocolsVMAddr;
308 uint64_t instancePropertiesVMAddr;
309 };
310
311 struct ObjCProtocol {
312 uint64_t isaVMAddr;
313 uint64_t nameVMAddr;
314 uint64_t protocolsVMAddr;
315 uint64_t instanceMethodsVMAddr;
316 uint64_t classMethodsVMAddr;
317 uint64_t optionalInstanceMethodsVMAddr;
318 uint64_t optionalClassMethodsVMAddr;
319 //uint64_t instancePropertiesVMAddr;
320 //uint32_t size;
321 //uint32_t flags;
322 // Fields below this point are not always present on disk.
323 //uint64_t extendedMethodTypesVMAddr;
324 //uint64_t demangledNameVMAddr;
325 //uint64_t classPropertiesVMAddr;
326 };
327
328 enum class PrintableStringResult {
329 CanPrint,
330 FairPlayEncrypted,
331 ProtectedSection,
332 UnknownSection
333 };
334
335 const char* getPrintableString(uint64_t stringVMAddr, PrintableStringResult& result,
336 SectionCache* sectionCache = nullptr,
337 bool (^sectionHandler)(const SectionInfo& sectionInfo) = nullptr) const;
338
339 void parseObjCClass(Diagnostics& diag, const VMAddrConverter& vmAddrConverter,
340 uint64_t classVMAddr,
341 void (^handler)(Diagnostics& diag,
342 uint64_t classSuperclassVMAddr,
343 uint64_t classDataVMAddr,
344 const ObjCClassInfo& objcClass)) const;
345
346 void forEachObjCClass(Diagnostics& diag, const VMAddrConverter& vmAddrConverter,
347 void (^handler)(Diagnostics& diag, uint64_t classVMAddr,
348 uint64_t classSuperclassVMAddr, uint64_t classDataVMAddr,
349 const ObjCClassInfo& objcClass, bool isMetaClass)) const;
350
351 void forEachObjCCategory(Diagnostics& diag, const VMAddrConverter& vmAddrConverter,
352 void (^handler)(Diagnostics& diag, uint64_t categoryVMAddr,
353 const dyld3::MachOAnalyzer::ObjCCategory& objcCategory)) const;
354
355 // lists all Protocols defined in the image
356 void forEachObjCProtocol(Diagnostics& diag, const VMAddrConverter& vmAddrConverter,
357 void (^handler)(Diagnostics& diag, uint64_t protocolVMAddr,
358 const dyld3::MachOAnalyzer::ObjCProtocol& objCProtocol)) const;
359
360 // Walk a method list starting from its vmAddr.
361 // Note, classes, categories, protocols, etc, all share the same method list struture so can all use this.
362 void forEachObjCMethod(uint64_t methodListVMAddr, const VMAddrConverter& vmAddrConverter,
363 void (^handler)(uint64_t methodVMAddr, const ObjCMethod& method),
364 bool* isRelativeMethodList = nullptr) const;
365
366 void forEachObjCProperty(uint64_t propertyListVMAddr, const VMAddrConverter& vmAddrConverter,
367 void (^handler)(uint64_t propertyVMAddr, const ObjCProperty& property)) const;
368
369 // lists all Protocols on a protocol_list_t
370 void forEachObjCProtocol(uint64_t protocolListVMAddr, const VMAddrConverter& vmAddrConverter,
371 void (^handler)(uint64_t protocolRefVMAddr, const ObjCProtocol& protocol)) const;
372
373 void forEachObjCSelectorReference(Diagnostics& diag, const VMAddrConverter& vmAddrConverter,
374 void (^handler)(uint64_t selRefVMAddr, uint64_t selRefTargetVMAddr)) const;
375
376 void forEachObjCMethodName(void (^handler)(const char* methodName)) const;
377
378 bool hasObjCMessageReferences() const;
379
380 const ObjCImageInfo* objcImageInfo() const;
381
382 void forEachWeakDef(Diagnostics& diag, void (^handler)(const char* symbolName, uint64_t imageOffset, bool isFromExportTrie)) const;
383
384 private:
385
386 struct SegmentStuff
387 {
388 uint64_t fileOffset;
389 uint64_t fileSize;
390 uint64_t writable : 1,
391 executable : 1,
392 textRelocsAllowed : 1, // segment supports text relocs (i386 only)
393 segSize : 61;
394 };
395
396 enum class Malformed { linkeditOrder, linkeditAlignment, linkeditPermissions, dyldInfoAndlocalRelocs, segmentOrder,
397 textPermissions, executableData, writableData, codeSigAlignment, sectionsAddrRangeWithinSegment };
398 bool enforceFormat(Malformed) const;
399
400 const uint8_t* getContentForVMAddr(const LayoutInfo& info, uint64_t vmAddr) const;
401 bool validLoadCommands(Diagnostics& diag, const char* path, size_t fileLen) const;
402 bool validEmbeddedPaths(Diagnostics& diag, Platform platform, const char* path) const;
403 bool validSegments(Diagnostics& diag, const char* path, size_t fileLen) const;
404 bool validLinkedit(Diagnostics& diag, const char* path) const;
405 bool validLinkeditLayout(Diagnostics& diag, const char* path) const;
406 bool validRebaseInfo(Diagnostics& diag, const char* path) const;
407 bool validBindInfo(Diagnostics& diag, const char* path) const;
408 bool validMain(Diagnostics& diag, const char* path) const;
409 bool validChainedFixupsInfo(Diagnostics& diag, const char* path) const;
410 bool validChainedFixupsInfoOldArm64e(Diagnostics& diag, const char* path) const;
411
412 bool invalidRebaseState(Diagnostics& diag, const char* opcodeName, const char* path, const LinkEditInfo& leInfo, const SegmentInfo segments[],
413 bool segIndexSet, uint32_t pointerSize, uint8_t segmentIndex, uint64_t segmentOffset, Rebase kind) const;
414 bool invalidBindState(Diagnostics& diag, const char* opcodeName, const char* path, const LinkEditInfo& leInfo, const SegmentInfo segments[],
415 bool segIndexSet, bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal, uint32_t pointerSize,
416 uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type, const char* symbolName) const;
417 bool doLocalReloc(Diagnostics& diag, uint32_t r_address, bool& stop, void (^callback)(uint32_t dataSegIndex, uint64_t dataSegOffset, uint8_t type, bool& stop)) const;
418 uint8_t relocPointerType() const;
419 int libOrdinalFromDesc(uint16_t n_desc) const;
420 bool doExternalReloc(Diagnostics& diag, uint32_t r_address, uint32_t r_symbolnum, LinkEditInfo& leInfo, bool& stop,
421 void (^callback)(uint32_t dataSegIndex, uint64_t dataSegOffset, uint8_t type, int libOrdinal,
422 uint64_t addend, const char* symbolName, bool weakImport, bool lazy, bool& stop)) const;
423
424 void getAllSegmentsInfos(Diagnostics& diag, SegmentInfo segments[]) const;
425 bool segmentHasTextRelocs(uint32_t segIndex) const;
426 uint64_t localRelocBaseAddress(const SegmentInfo segmentsInfos[], uint32_t segCount) const;
427 uint64_t externalRelocBaseAddress(const SegmentInfo segmentsInfos[], uint32_t segCount) const;
428 bool segIndexAndOffsetForAddress(uint64_t addr, const SegmentInfo segmentsInfos[], uint32_t segCount, uint32_t& segIndex, uint64_t& segOffset) const;
429 void parseOrgArm64eChainedFixups(Diagnostics& diag, void (^targetCount)(uint32_t totalTargets, bool& stop),
430 void (^addTarget)(const LinkEditInfo& leInfo, const SegmentInfo segments[], bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal, uint8_t type, const char* symbolName, uint64_t addend, bool weakImport, bool& stop),
431 void (^addChainStart)(const LinkEditInfo& leInfo, const SegmentInfo segments[], uint8_t segmentIndex, bool segIndexSet, uint64_t segmentOffset, uint16_t format, bool& stop)) const;
432 bool contentIsRegularStub(const uint8_t* helperContent) const;
433 void recurseTrie(Diagnostics& diag, const uint8_t* const start, const uint8_t* p, const uint8_t* const end,
434 OverflowSafeArray<char>& cummulativeString, int curStrOffset, bool& stop, MachOAnalyzer::ExportsCallback callback) const;
435 void analyzeSegmentsLayout(uint64_t& vmSpace, bool& hasZeroFill) const;
436
437 };
438
439
440 } // namespace dyld3
441
442 #endif /* MachOAnalyzer_h */