dyld-750.5.tar.gz
[apple/dyld.git] / dyld3 / MachOAnalyzer.h
1 /*
2 * Copyright (c) 2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 #ifndef MachOAnalyzer_h
25 #define MachOAnalyzer_h
26
27
28 #include "MachOLoaded.h"
29 #include "Array.h"
30 #include "ClosureFileSystem.h"
31
32
33 namespace dyld3 {
34
35 // Extra functionality on loaded mach-o files only used during closure building
36 struct VIS_HIDDEN MachOAnalyzer : public MachOLoaded
37 {
38 // protected members of subclass promoted to public here
39 using MachOLoaded::SegmentInfo;
40 using MachOLoaded::SectionInfo;
41 using MachOLoaded::forEachSegment;
42 using MachOLoaded::forEachSection;
43 using MachOLoaded::forEachDependentDylib;
44 using MachOLoaded::getDylibInstallName;
45 using MachOLoaded::FoundSymbol;
46 using MachOLoaded::findExportedSymbol;
47
48 static closure::LoadedFileInfo load(Diagnostics& diag, const closure::FileSystem& fileSystem,
49 const char* logicalPath, const GradedArchs& archs, Platform platform, char realerPath[MAXPATHLEN]);
50 static const MachOAnalyzer* validMainExecutable(Diagnostics& diag, const mach_header* mh, const char* path, uint64_t sliceLength,
51 const GradedArchs& archs, Platform platform);
52
53 typedef void (^ExportsCallback)(const char* symbolName, uint64_t imageOffset, uint64_t flags,
54 uint64_t other, const char* importName, bool& stop);
55 bool validMachOForArchAndPlatform(Diagnostics& diag, size_t mappedSize, const char* path, const GradedArchs& archs, Platform platform) const;
56 uint64_t mappedSize() const;
57 bool hasObjC() const;
58 bool hasPlusLoadMethod(Diagnostics& diag) const;
59 uint64_t preferredLoadAddress() const;
60 void forEachLocalSymbol(Diagnostics& diag, void (^callback)(const char* symbolName, uint64_t n_value, uint8_t n_type, uint8_t n_sect, uint16_t n_desc, bool& stop)) const;
61 void forEachRPath(void (^callback)(const char* rPath, bool& stop)) const;
62 bool hasProgramVars(Diagnostics& diag, uint32_t& progVarsOffset) const;
63 void forEachCDHash(void (^handler)(const uint8_t cdHash[20])) const;
64 bool hasCodeSignature(uint32_t& fileOffset, uint32_t& size) const;
65 bool usesLibraryValidation() const;
66 bool isRestricted() const;
67 bool getEntry(uint32_t& offset, bool& usesCRT) const;
68 bool isSlideable() const;
69 bool hasInitializer(Diagnostics& diag, bool contentRebased, const void* dyldCache=nullptr) const;
70 void forEachInitializerPointerSection(Diagnostics& diag, void (^callback)(uint32_t sectionOffset, uint32_t sectionSize, const uint8_t* content, bool& stop)) const;
71 void forEachInitializer(Diagnostics& diag, bool contentRebased, void (^callback)(uint32_t offset), const void* dyldCache=nullptr) const;
72 bool hasTerminators(Diagnostics& diag, bool contentRebased) const;
73 void forEachTerminator(Diagnostics& diag, bool contentRebased, void (^callback)(uint32_t offset)) const;
74 void forEachDOFSection(Diagnostics& diag, void (^callback)(uint32_t offset)) const;
75 uint32_t segmentCount() const;
76 void forEachExportedSymbol(Diagnostics& diag, ExportsCallback callback) const;
77 void forEachWeakDef(Diagnostics& diag, void (^callback)(bool strongDef, uint32_t dataSegIndex, uint64_t dataSegOffset,
78 uint64_t addend, const char* symbolName, bool& stop)) const;
79 void forEachIndirectPointer(Diagnostics& diag, void (^handler)(uint64_t pointerAddress, bool bind, int bindLibOrdinal,
80 const char* bindSymbolName, bool bindWeakImport, bool bindLazy, bool selfModifyingStub, bool& stop)) const;
81 void forEachInterposingSection(Diagnostics& diag, void (^handler)(uint64_t vmOffset, uint64_t vmSize, bool& stop)) const;
82 const void* content(uint64_t vmOffset);
83 void forEachLocalReloc(void (^handler)(uint64_t runtimeOffset, bool& stop)) const;
84 void forEachExternalReloc(void (^handler)(uint64_t runtimeOffset, int libOrdinal, const char* symbolName, bool& stop)) const;
85
86 const void* getRebaseOpcodes(uint32_t& size) const;
87 const void* getBindOpcodes(uint32_t& size) const;
88 const void* getLazyBindOpcodes(uint32_t& size) const;
89 const void* getSplitSeg(uint32_t& size) const;
90 uint64_t segAndOffsetToRuntimeOffset(uint8_t segIndex, uint64_t segOffset) const;
91 bool hasLazyPointers(uint32_t& runtimeOffset, uint32_t& size) const;
92 void forEachRebase(Diagnostics& diag, bool ignoreLazyPointer, void (^callback)(uint64_t runtimeOffset, bool& stop)) const;
93 void forEachTextRebase(Diagnostics& diag, void (^callback)(uint64_t runtimeOffset, bool& stop)) const;
94 void forEachBind(Diagnostics& diag, void (^callback)(uint64_t runtimeOffset, int libOrdinal, const char* symbolName,
95 bool weakImport, bool lazyBind, uint64_t addend, bool& stop),
96 void (^strongHandler)(const char* symbolName),
97 void (^missingLazyBindHandler)()) const;
98 void forEachChainedFixupTarget(Diagnostics& diag, void (^callback)(int libOrdinal, const char* symbolName, uint64_t addend, bool weakImport, bool& stop)) const;
99 bool canHavePrecomputedDlopenClosure(const char* path, void (^failureReason)(const char*)) const;
100 void forEachRebase(Diagnostics& diag, void (^handler)(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
101 bool segIndexSet, uint32_t pointerSize, uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type, bool& stop)) const;
102 void forEachBind(Diagnostics& diag, void (^handler)(const char* opcodeName, const LinkEditInfo& leInfo, const SegmentInfo segments[],
103 bool segIndexSet, bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal,
104 uint32_t pointerSize, uint8_t segmentIndex, uint64_t segmentOffset,
105 uint8_t type, const char* symbolName, bool weakImport, bool lazyBind, uint64_t addend, bool& stop),
106 void (^strongHandler)(const char* symbolName),
107 void (^missingLazyBindHandler)()) const;
108 bool canBePlacedInDyldCache(const char* path, void (^failureReason)(const char*)) const;
109 uint32_t loadCommandsFreeSpace() const;
110
111 #if DEBUG
112 void validateDyldCacheDylib(Diagnostics& diag, const char* path) const;
113 #endif
114 void withChainStarts(Diagnostics& diag, uint64_t startsStructOffsetHint, void (^callback)(const dyld_chained_starts_in_image*)) const;
115 uint64_t chainStartsOffset() const;
116 uint16_t chainedPointerFormat() const;
117 static uint16_t chainedPointerFormat(const dyld_chained_fixups_header* chainHeader);
118 bool hasUnalignedPointerFixups() const;
119 const dyld_chained_fixups_header* chainedFixupsHeader() const;
120
121 const MachOAnalyzer* remapIfZeroFill(Diagnostics& diag, const closure::FileSystem& fileSystem, closure::LoadedFileInfo& info) const;
122
123 struct ObjCInfo {
124 uint32_t selRefCount;
125 uint32_t classDefCount;
126 uint32_t protocolDefCount;
127 };
128 ObjCInfo getObjCInfo() const;
129
130 // This optionally caches a list of sections for lookup
131 struct SectionCache {
132 private:
133 SectionInfo buffer[2];
134
135 public:
136 SectionCache(const MachOAnalyzer* ma) : ma(ma) { }
137
138 bool findSectionForVMAddr(uint64_t vmAddr, bool (^sectionHandler)(const SectionInfo& sectionInfo));
139
140 const MachOAnalyzer* ma = nullptr;
141 dyld3::OverflowSafeArray<SectionInfo> sectionInfos = { buffer, sizeof(buffer) / sizeof(buffer[0]) };
142 };
143
144 // Caches data useful for converting from raw data to VM addresses
145 struct VMAddrConverter {
146 uint64_t preferredLoadAddress = 0;
147 intptr_t slide = 0;
148 uint16_t chainedPointerFormat = 0;
149 bool contentRebased = false;
150 };
151
152 struct ObjCClassInfo {
153 // These fields are all present on the objc_class_t struct
154 uint64_t isaVMAddr = 0;
155 uint64_t superclassVMAddr = 0;
156 //uint64_t methodCacheBuckets;
157 //uint64_t methodCacheProperties;
158 uint64_t dataVMAddr = 0;
159
160 // This field is only present if this is a Swift object, ie, has the Swift
161 // fast bits set
162 uint32_t swiftClassFlags = 0;
163
164 // These are taken from the low bits of the dataVMAddr value
165 bool isSwiftLegacy = false;
166 bool isSwiftStable = false;
167
168 // Cache the data to convert vmAddr's
169 MachOAnalyzer::VMAddrConverter vmAddrConverter;
170
171 // These are from the class_ro_t which data points to
172 enum class ReadOnlyDataField {
173 name,
174 baseMethods,
175 baseProperties,
176 flags
177 };
178
179 uint64_t getReadOnlyDataField(ReadOnlyDataField field, uint32_t pointerSize) const;
180 uint64_t nameVMAddr(uint32_t pointerSize) const {
181 return getReadOnlyDataField(ReadOnlyDataField::name, pointerSize);
182 }
183 uint64_t baseMethodsVMAddr(uint32_t pointerSize) const {
184 return getReadOnlyDataField(ReadOnlyDataField::baseMethods, pointerSize);
185 }
186 uint64_t basePropertiesVMAddr(uint32_t pointerSize) const {
187 return getReadOnlyDataField(ReadOnlyDataField::baseProperties, pointerSize);
188 }
189 uint64_t flags(uint32_t pointerSize) const {
190 return getReadOnlyDataField(ReadOnlyDataField::flags, pointerSize);
191 }
192
193 // These are embedded in the Mach-O itself by the compiler
194 enum FastDataBits {
195 FAST_IS_SWIFT_LEGACY = 0x1,
196 FAST_IS_SWIFT_STABLE = 0x2
197 };
198
199 // These are embedded by the Swift compiler in the swiftClassFlags field
200 enum SwiftClassFlags {
201 isSwiftPreStableABI = 0x1
202 };
203
204 // Note this is taken from the objc runtime
205 bool isUnfixedBackwardDeployingStableSwift() const {
206 // Only classes marked as Swift legacy need apply.
207 if (!isSwiftLegacy) return false;
208
209 // Check the true legacy vs stable distinguisher.
210 // The low bit of Swift's ClassFlags is SET for true legacy
211 // and UNSET for stable pretending to be legacy.
212 bool isActuallySwiftLegacy = (swiftClassFlags & isSwiftPreStableABI) != 0;
213 return !isActuallySwiftLegacy;
214 }
215 };
216
217 struct ObjCImageInfo {
218 uint32_t version;
219 uint32_t flags;
220
221 // FIXME: Put this somewhere objc can see it.
222 enum : uint32_t {
223 dyldPreoptimized = 1 << 7
224 };
225 };
226
227 struct ObjCMethod {
228 uint64_t nameVMAddr; // & SEL
229 uint64_t typesVMAddr; // & const char *
230 uint64_t impVMAddr; // & IMP
231
232 // We also need to know where the reference to the nameVMAddr was
233 // This is so that we know how to rebind that location
234 uint64_t nameLocationVMAddr;
235 };
236
237 struct ObjCProperty {
238 uint64_t nameVMAddr; // & const char *
239 uint64_t attributesVMAddr; // & const char *
240 };
241
242 struct ObjCCategory {
243 uint64_t nameVMAddr;
244 uint64_t clsVMAddr;
245 uint64_t instanceMethodsVMAddr;
246 uint64_t classMethodsVMAddr;
247 uint64_t protocolsVMAddr;
248 uint64_t instancePropertiesVMAddr;
249 };
250
251 struct ObjCProtocol {
252 uint64_t isaVMAddr;
253 uint64_t nameVMAddr;
254 //uint64_t protocolsVMAddr;
255 uint64_t instanceMethodsVMAddr;
256 uint64_t classMethodsVMAddr;
257 uint64_t optionalInstanceMethodsVMAddr;
258 uint64_t optionalClassMethodsVMAddr;
259 //uint64_t instancePropertiesVMAddr;
260 //uint32_t size;
261 //uint32_t flags;
262 // Fields below this point are not always present on disk.
263 //uint64_t extendedMethodTypesVMAddr;
264 //uint64_t demangledNameVMAddr;
265 //uint64_t classPropertiesVMAddr;
266
267 // Note this isn't in a protocol, but we use it in dyld to track if the protocol
268 // is large enough to avoid a reallocation in objc.
269 bool requiresObjCReallocation;
270 };
271
272 enum class PrintableStringResult {
273 CanPrint,
274 FairPlayEncrypted,
275 ProtectedSection,
276 UnknownSection
277 };
278
279 const char* getPrintableString(uint64_t stringVMAddr, PrintableStringResult& result,
280 SectionCache* sectionCache = nullptr,
281 bool (^sectionHandler)(const SectionInfo& sectionInfo) = nullptr) const;
282
283 void forEachObjCClass(Diagnostics& diag, bool contentRebased,
284 void (^handler)(Diagnostics& diag, uint64_t classVMAddr,
285 uint64_t classSuperclassVMAddr, uint64_t classDataVMAddr,
286 const ObjCClassInfo& objcClass, bool isMetaClass)) const;
287
288 void forEachObjCCategory(Diagnostics& diag, bool contentRebased,
289 void (^handler)(Diagnostics& diag, uint64_t categoryVMAddr,
290 const dyld3::MachOAnalyzer::ObjCCategory& objcCategory)) const;
291
292 void forEachObjCProtocol(Diagnostics& diag, bool contentRebased,
293 void (^handler)(Diagnostics& diag, uint64_t protocolVMAddr,
294 const dyld3::MachOAnalyzer::ObjCProtocol& objCProtocol)) const;
295
296 // Walk a method list starting from its vmAddr.
297 // Note, classes, categories, protocols, etc, all share the same method list struture so can all use this.
298 void forEachObjCMethod(uint64_t methodListVMAddr, bool contentRebased,
299 void (^handler)(uint64_t methodVMAddr, const ObjCMethod& method)) const;
300
301 void forEachObjCProperty(uint64_t propertyListVMAddr, bool contentRebased,
302 void (^handler)(uint64_t propertyVMAddr, const ObjCProperty& property)) const;
303
304 void forEachObjCSelectorReference(Diagnostics& diag, bool contentRebased,
305 void (^handler)(uint64_t selRefVMAddr, uint64_t selRefTargetVMAddr)) const;
306
307 void forEachObjCMethodName(void (^handler)(const char* methodName)) const;
308
309 bool hasObjCMessageReferences() const;
310
311 const ObjCImageInfo* objcImageInfo() const;
312
313 void forEachWeakDef(Diagnostics& diag, void (^handler)(const char* symbolName, uintptr_t imageOffset, bool isFromExportTrie)) const;
314
315 private:
316
317 struct SegmentStuff
318 {
319 uint64_t fileOffset;
320 uint64_t fileSize;
321 uint64_t writable : 1,
322 executable : 1,
323 textRelocsAllowed : 1, // segment supports text relocs (i386 only)
324 segSize : 61;
325 };
326
327 enum class Malformed { linkeditOrder, linkeditAlignment, linkeditPermissions, dyldInfoAndlocalRelocs, segmentOrder, textPermissions, executableData, codeSigAlignment };
328 bool enforceFormat(Malformed) const;
329
330 const uint8_t* getContentForVMAddr(const LayoutInfo& info, uint64_t vmAddr) const;
331 bool validLoadCommands(Diagnostics& diag, const char* path, size_t fileLen) const;
332 bool validEmbeddedPaths(Diagnostics& diag, Platform platform, const char* path) const;
333 bool validSegments(Diagnostics& diag, const char* path, size_t fileLen) const;
334 bool validLinkedit(Diagnostics& diag, const char* path) const;
335 bool validLinkeditLayout(Diagnostics& diag, const char* path) const;
336 bool validRebaseInfo(Diagnostics& diag, const char* path) const;
337 bool validBindInfo(Diagnostics& diag, const char* path) const;
338 bool validMain(Diagnostics& diag, const char* path) const;
339 bool validChainedFixupsInfo(Diagnostics& diag, const char* path) const;
340 bool validChainedFixupsInfoOldArm64e(Diagnostics& diag, const char* path) const;
341
342 bool invalidRebaseState(Diagnostics& diag, const char* opcodeName, const char* path, const LinkEditInfo& leInfo, const SegmentInfo segments[],
343 bool segIndexSet, uint32_t pointerSize, uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type) const;
344 bool invalidBindState(Diagnostics& diag, const char* opcodeName, const char* path, const LinkEditInfo& leInfo, const SegmentInfo segments[],
345 bool segIndexSet, bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal, uint32_t pointerSize,
346 uint8_t segmentIndex, uint64_t segmentOffset, uint8_t type, const char* symbolName) const;
347 bool doLocalReloc(Diagnostics& diag, uint32_t r_address, bool& stop, void (^callback)(uint32_t dataSegIndex, uint64_t dataSegOffset, uint8_t type, bool& stop)) const;
348 uint8_t relocPointerType() const;
349 int libOrdinalFromDesc(uint16_t n_desc) const;
350 bool doExternalReloc(Diagnostics& diag, uint32_t r_address, uint32_t r_symbolnum, LinkEditInfo& leInfo, bool& stop,
351 void (^callback)(uint32_t dataSegIndex, uint64_t dataSegOffset, uint8_t type, int libOrdinal,
352 uint64_t addend, const char* symbolName, bool weakImport, bool lazy, bool& stop)) const;
353
354 void getAllSegmentsInfos(Diagnostics& diag, SegmentInfo segments[]) const;
355 bool segmentHasTextRelocs(uint32_t segIndex) const;
356 uint64_t relocBaseAddress(const SegmentInfo segmentsInfos[], uint32_t segCount) const;
357 bool segIndexAndOffsetForAddress(uint64_t addr, const SegmentInfo segmentsInfos[], uint32_t segCount, uint32_t& segIndex, uint64_t& segOffset) const;
358 void parseOrgArm64eChainedFixups(Diagnostics& diag, void (^targetCount)(uint32_t totalTargets, bool& stop),
359 void (^addTarget)(const LinkEditInfo& leInfo, const SegmentInfo segments[], bool libraryOrdinalSet, uint32_t dylibCount, int libOrdinal, uint8_t type, const char* symbolName, uint64_t addend, bool weakImport, bool& stop),
360 void (^addChainStart)(const LinkEditInfo& leInfo, const SegmentInfo segments[], uint8_t segmentIndex, bool segIndexSet, uint64_t segmentOffset, uint16_t format, bool& stop)) const;
361 bool contentIsRegularStub(const uint8_t* helperContent) const;
362 uint64_t entryAddrFromThreadCmd(const thread_command* cmd) const;
363 void recurseTrie(Diagnostics& diag, const uint8_t* const start, const uint8_t* p, const uint8_t* const end,
364 OverflowSafeArray<char>& cummulativeString, int curStrOffset, bool& stop, MachOAnalyzer::ExportsCallback callback) const;
365 void analyzeSegmentsLayout(uint64_t& vmSpace, bool& hasZeroFill) const;
366
367 };
368
369 } // namespace dyld3
370
371 #endif /* MachOAnalyzer_h */