2 * Copyright (c) 2017 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
24 #ifndef MachOAnalyzer_h
25 #define MachOAnalyzer_h
28 #include "MachOLoaded.h"
30 #include "ClosureFileSystem.h"
35 // Extra functionality on loaded mach-o files only used during closure building
36 struct VIS_HIDDEN MachOAnalyzer
: public MachOLoaded
38 // protected members of subclass promoted to public here
39 using MachOLoaded::SegmentInfo
;
40 using MachOLoaded::SectionInfo
;
41 using MachOLoaded::forEachSegment
;
42 using MachOLoaded::forEachSection
;
43 using MachOLoaded::forEachDependentDylib
;
44 using MachOLoaded::getDylibInstallName
;
45 using MachOLoaded::FoundSymbol
;
46 using MachOLoaded::findExportedSymbol
;
47 using MachOLoaded::forEachGlobalSymbol
;
48 using MachOLoaded::forEachLocalSymbol
;
49 using MachOFile::canBePlacedInDyldCache
;
50 using MachOFile::forEachLoadCommand
;
51 using MachOFile::removeLoadCommand
;
61 static bool loadFromBuffer(Diagnostics
& diag
, const closure::FileSystem
& fileSystem
,
62 const char* path
, const GradedArchs
& archs
, Platform platform
,
63 closure::LoadedFileInfo
& info
);
64 static closure::LoadedFileInfo
load(Diagnostics
& diag
, const closure::FileSystem
& fileSystem
,
65 const char* logicalPath
, const GradedArchs
& archs
, Platform platform
, char realerPath
[MAXPATHLEN
]);
66 static const MachOAnalyzer
* validMainExecutable(Diagnostics
& diag
, const mach_header
* mh
, const char* path
, uint64_t sliceLength
,
67 const GradedArchs
& archs
, Platform platform
);
69 typedef void (^ExportsCallback
)(const char* symbolName
, uint64_t imageOffset
, uint64_t flags
,
70 uint64_t other
, const char* importName
, bool& stop
);
71 bool validMachOForArchAndPlatform(Diagnostics
& diag
, size_t mappedSize
, const char* path
, const GradedArchs
& archs
, Platform platform
, bool isOSBinary
) const;
73 // Caches data useful for converting from raw data to VM addresses
74 struct VMAddrConverter
{
75 uint64_t preferredLoadAddress
= 0;
77 uint16_t chainedPointerFormat
= 0;
78 bool contentRebased
= false;
79 #if !(BUILDING_LIBDYLD || BUILDING_DYLD)
80 enum class SharedCacheFormat
: uint8_t {
85 SharedCacheFormat sharedCacheChainedPointerFormat
= SharedCacheFormat::none
;
88 uint64_t convertToVMAddr(uint64_t v
) const;
91 VMAddrConverter
makeVMAddrConverter(bool contentRebased
) const;
93 uint64_t mappedSize() const;
95 bool hasPlusLoadMethod(Diagnostics
& diag
) const;
96 bool usesObjCGarbageCollection() const;
97 bool isSwiftLibrary() const;
98 uint64_t preferredLoadAddress() const;
99 void forEachRPath(void (^callback
)(const char* rPath
, bool& stop
)) const;
100 bool hasProgramVars(Diagnostics
& diag
, uint32_t& progVarsOffset
) const;
101 void forEachCDHash(void (^handler
)(const uint8_t cdHash
[20])) const;
102 bool hasCodeSignature(uint32_t& fileOffset
, uint32_t& size
) const;
103 bool usesLibraryValidation() const;
104 bool isRestricted() const;
105 bool getEntry(uint64_t& offset
, bool& usesCRT
) const;
106 bool isSlideable() const;
107 bool hasInitializer(Diagnostics
& diag
, const VMAddrConverter
& vmAddrConverter
, const void* dyldCache
=nullptr) const;
108 void forEachInitializerPointerSection(Diagnostics
& diag
, void (^callback
)(uint32_t sectionOffset
, uint32_t sectionSize
, const uint8_t* content
, bool& stop
)) const;
109 void forEachInitializer(Diagnostics
& diag
, const VMAddrConverter
& vmAddrConverter
, void (^callback
)(uint32_t offset
), const void* dyldCache
=nullptr) const;
110 bool hasTerminators(Diagnostics
& diag
, const VMAddrConverter
& vmAddrConverter
) const;
111 void forEachTerminator(Diagnostics
& diag
, const VMAddrConverter
& vmAddrConverter
, void (^callback
)(uint32_t offset
)) const;
112 void forEachDOFSection(Diagnostics
& diag
, void (^callback
)(uint32_t offset
)) const;
113 uint32_t segmentCount() const;
114 void forEachExportedSymbol(Diagnostics
& diag
, ExportsCallback callback
) const;
115 void forEachWeakDef(Diagnostics
& diag
, void (^callback
)(bool strongDef
, uint32_t dataSegIndex
, uint64_t dataSegOffset
,
116 uint64_t addend
, const char* symbolName
, bool& stop
)) const;
117 void forEachIndirectPointer(Diagnostics
& diag
, void (^handler
)(uint64_t pointerAddress
, bool bind
, int bindLibOrdinal
,
118 const char* bindSymbolName
, bool bindWeakImport
, bool bindLazy
, bool selfModifyingStub
, bool& stop
)) const;
119 void forEachInterposingSection(Diagnostics
& diag
, void (^handler
)(uint64_t vmOffset
, uint64_t vmSize
, bool& stop
)) const;
120 const void* content(uint64_t vmOffset
);
121 void forEachLocalReloc(void (^handler
)(uint64_t runtimeOffset
, bool& stop
)) const;
122 void forEachExternalReloc(void (^handler
)(uint64_t runtimeOffset
, int libOrdinal
, const char* symbolName
, bool& stop
)) const;
124 const void* getRebaseOpcodes(uint32_t& size
) const;
125 const void* getBindOpcodes(uint32_t& size
) const;
126 const void* getLazyBindOpcodes(uint32_t& size
) const;
127 const void* getSplitSeg(uint32_t& size
) const;
128 bool hasSplitSeg() const;
129 bool isSplitSegV1() const;
130 bool isSplitSegV2() const;
131 uint64_t segAndOffsetToRuntimeOffset(uint8_t segIndex
, uint64_t segOffset
) const;
132 bool hasLazyPointers(uint32_t& runtimeOffset
, uint32_t& size
) const;
133 void forEachRebase(Diagnostics
& diag
, bool ignoreLazyPointer
, void (^callback
)(uint64_t runtimeOffset
, bool& stop
)) const;
134 void forEachRebase(Diagnostics
& diag
, void (^callback
)(uint64_t runtimeOffset
, bool isLazyPointerRebase
, bool& stop
)) const;
135 void forEachTextRebase(Diagnostics
& diag
, void (^callback
)(uint64_t runtimeOffset
, bool& stop
)) const;
136 void forEachBind(Diagnostics
& diag
, void (^callback
)(uint64_t runtimeOffset
, int libOrdinal
, const char* symbolName
,
137 bool weakImport
, bool lazyBind
, uint64_t addend
, bool& stop
),
138 void (^strongHandler
)(const char* symbolName
)) const;
139 void forEachBind(Diagnostics
& diag
, void (^callback
)(uint64_t runtimeOffset
, int libOrdinal
, uint8_t type
, const char* symbolName
,
140 bool weakImport
, bool lazyBind
, uint64_t addend
, bool& stop
),
141 void (^strongHandler
)(const char* symbolName
)) const;
142 void forEachChainedFixupTarget(Diagnostics
& diag
, void (^callback
)(int libOrdinal
, const char* symbolName
, uint64_t addend
, bool weakImport
, bool& stop
)) const;
143 void forEachRebase(Diagnostics
& diag
, void (^handler
)(const char* opcodeName
, const LinkEditInfo
& leInfo
, const SegmentInfo segments
[],
144 bool segIndexSet
, uint32_t pointerSize
, uint8_t segmentIndex
, uint64_t segmentOffset
, Rebase kind
, bool& stop
)) const;
145 void forEachBind(Diagnostics
& diag
, void (^handler
)(const char* opcodeName
, const LinkEditInfo
& leInfo
, const SegmentInfo segments
[],
146 bool segIndexSet
, bool libraryOrdinalSet
, uint32_t dylibCount
, int libOrdinal
,
147 uint32_t pointerSize
, uint8_t segmentIndex
, uint64_t segmentOffset
,
148 uint8_t type
, const char* symbolName
, bool weakImport
, bool lazyBind
, uint64_t addend
, bool& stop
),
149 void (^strongHandler
)(const char* symbolName
)) const;
150 bool canBePlacedInDyldCache(const char* path
, void (^failureReason
)(const char*)) const;
151 bool canHavePrecomputedDlopenClosure(const char* path
, void (^failureReason
)(const char*)) const;
152 #if BUILDING_APP_CACHE_UTIL
153 bool canBePlacedInKernelCollection(const char* path
, void (^failureReason
)(const char*)) const;
155 bool usesClassicRelocationsInKernelCollection() const;
156 uint32_t loadCommandsFreeSpace() const;
157 bool hasStompedLazyOpcodes() const;
160 void validateDyldCacheDylib(Diagnostics
& diag
, const char* path
) const;
162 void withChainStarts(Diagnostics
& diag
, uint64_t startsStructOffsetHint
, void (^callback
)(const dyld_chained_starts_in_image
*)) const;
163 uint64_t chainStartsOffset() const;
164 uint16_t chainedPointerFormat() const;
165 static uint16_t chainedPointerFormat(const dyld_chained_fixups_header
* chainHeader
);
166 bool hasUnalignedPointerFixups() const;
167 const dyld_chained_fixups_header
* chainedFixupsHeader() const;
168 bool hasFirmwareChainStarts(uint16_t* pointerFormat
, uint32_t* startsCount
, const uint32_t** starts
) const;
169 bool isOSBinary(int fd
, uint64_t sliceOffset
, uint64_t sliceSize
) const; // checks if binary is codesigned to be part of the OS
170 static bool sliceIsOSBinary(int fd
, uint64_t sliceOffset
, uint64_t sliceSize
);
172 const MachOAnalyzer
* remapIfZeroFill(Diagnostics
& diag
, const closure::FileSystem
& fileSystem
, closure::LoadedFileInfo
& info
) const;
174 bool markNeverUnload(Diagnostics
&diag
) const;
177 uint32_t selRefCount
;
178 uint32_t classDefCount
;
179 uint32_t protocolDefCount
;
181 ObjCInfo
getObjCInfo() const;
183 // This optionally caches a list of sections for lookup
184 struct SectionCache
{
186 SectionInfo buffer
[2];
189 SectionCache(const MachOAnalyzer
* ma
) : ma(ma
) { }
191 bool findSectionForVMAddr(uint64_t vmAddr
, bool (^sectionHandler
)(const SectionInfo
& sectionInfo
));
193 const MachOAnalyzer
* ma
= nullptr;
194 dyld3::OverflowSafeArray
<SectionInfo
> sectionInfos
= { buffer
, sizeof(buffer
) / sizeof(buffer
[0]) };
197 struct ObjCClassInfo
{
198 // These fields are all present on the objc_class_t struct
199 uint64_t isaVMAddr
= 0;
200 uint64_t superclassVMAddr
= 0;
201 //uint64_t methodCacheBuckets;
202 uint64_t methodCacheVMAddr
= 0;
203 uint64_t dataVMAddr
= 0;
205 // This field is only present if this is a Swift object, ie, has the Swift
207 uint32_t swiftClassFlags
= 0;
209 // These are taken from the low bits of the dataVMAddr value
210 bool isSwiftLegacy
= false;
211 bool isSwiftStable
= false;
213 // Cache the data to convert vmAddr's
214 MachOAnalyzer::VMAddrConverter vmAddrConverter
;
216 // These are from the class_ro_t which data points to
217 enum class ReadOnlyDataField
{
225 uint64_t getReadOnlyDataField(ReadOnlyDataField field
, uint32_t pointerSize
) const;
226 uint64_t nameVMAddr(uint32_t pointerSize
) const {
227 return getReadOnlyDataField(ReadOnlyDataField::name
, pointerSize
);
229 uint64_t baseProtocolsVMAddr(uint32_t pointerSize
) const {
230 return getReadOnlyDataField(ReadOnlyDataField::baseProtocols
, pointerSize
);
232 uint64_t baseMethodsVMAddr(uint32_t pointerSize
) const {
233 return getReadOnlyDataField(ReadOnlyDataField::baseMethods
, pointerSize
);
235 uint64_t basePropertiesVMAddr(uint32_t pointerSize
) const {
236 return getReadOnlyDataField(ReadOnlyDataField::baseProperties
, pointerSize
);
238 uint64_t flags(uint32_t pointerSize
) const {
239 return getReadOnlyDataField(ReadOnlyDataField::flags
, pointerSize
);
242 // These are embedded in the Mach-O itself by the compiler
244 FAST_IS_SWIFT_LEGACY
= 0x1,
245 FAST_IS_SWIFT_STABLE
= 0x2
248 // These are embedded by the Swift compiler in the swiftClassFlags field
249 enum SwiftClassFlags
{
250 isSwiftPreStableABI
= 0x1
253 // Note this is taken from the objc runtime
254 bool isUnfixedBackwardDeployingStableSwift() const {
255 // Only classes marked as Swift legacy need apply.
256 if (!isSwiftLegacy
) return false;
258 // Check the true legacy vs stable distinguisher.
259 // The low bit of Swift's ClassFlags is SET for true legacy
260 // and UNSET for stable pretending to be legacy.
261 bool isActuallySwiftLegacy
= (swiftClassFlags
& isSwiftPreStableABI
) != 0;
262 return !isActuallySwiftLegacy
;
266 struct ObjCMethodList
{
267 // This matches the bits in the objc runtime
269 methodListIsUniqued
= 0x1,
270 methodListIsSorted
= 0x2,
272 // The size is bits 2 through 16 of the entsize field
273 // The low 2 bits are uniqued/sorted as above. The upper 16-bits
274 // are reserved for other flags
275 methodListSizeMask
= 0x0000FFFC
279 struct ObjCImageInfo
{
283 // FIXME: Put this somewhere objc can see it.
285 dyldPreoptimized
= 1 << 7
290 uint64_t nameVMAddr
; // & SEL
291 uint64_t typesVMAddr
; // & const char *
292 uint64_t impVMAddr
; // & IMP
294 // We also need to know where the reference to the nameVMAddr was
295 // This is so that we know how to rebind that location
296 uint64_t nameLocationVMAddr
;
299 struct ObjCProperty
{
300 uint64_t nameVMAddr
; // & const char *
301 uint64_t attributesVMAddr
; // & const char *
304 struct ObjCCategory
{
307 uint64_t instanceMethodsVMAddr
;
308 uint64_t classMethodsVMAddr
;
309 uint64_t protocolsVMAddr
;
310 uint64_t instancePropertiesVMAddr
;
313 struct ObjCProtocol
{
316 uint64_t protocolsVMAddr
;
317 uint64_t instanceMethodsVMAddr
;
318 uint64_t classMethodsVMAddr
;
319 uint64_t optionalInstanceMethodsVMAddr
;
320 uint64_t optionalClassMethodsVMAddr
;
321 //uint64_t instancePropertiesVMAddr;
324 // Fields below this point are not always present on disk.
325 //uint64_t extendedMethodTypesVMAddr;
326 //uint64_t demangledNameVMAddr;
327 //uint64_t classPropertiesVMAddr;
330 enum class PrintableStringResult
{
337 const char* getPrintableString(uint64_t stringVMAddr
, PrintableStringResult
& result
,
338 SectionCache
* sectionCache
= nullptr,
339 bool (^sectionHandler
)(const SectionInfo
& sectionInfo
) = nullptr) const;
341 void parseObjCClass(Diagnostics
& diag
, const VMAddrConverter
& vmAddrConverter
,
342 uint64_t classVMAddr
,
343 void (^handler
)(Diagnostics
& diag
,
344 uint64_t classSuperclassVMAddr
,
345 uint64_t classDataVMAddr
,
346 const ObjCClassInfo
& objcClass
)) const;
348 void forEachObjCClass(Diagnostics
& diag
, const VMAddrConverter
& vmAddrConverter
,
349 void (^handler
)(Diagnostics
& diag
, uint64_t classVMAddr
,
350 uint64_t classSuperclassVMAddr
, uint64_t classDataVMAddr
,
351 const ObjCClassInfo
& objcClass
, bool isMetaClass
)) const;
353 void forEachObjCCategory(Diagnostics
& diag
, const VMAddrConverter
& vmAddrConverter
,
354 void (^handler
)(Diagnostics
& diag
, uint64_t categoryVMAddr
,
355 const dyld3::MachOAnalyzer::ObjCCategory
& objcCategory
)) const;
357 // lists all Protocols defined in the image
358 void forEachObjCProtocol(Diagnostics
& diag
, const VMAddrConverter
& vmAddrConverter
,
359 void (^handler
)(Diagnostics
& diag
, uint64_t protocolVMAddr
,
360 const dyld3::MachOAnalyzer::ObjCProtocol
& objCProtocol
)) const;
362 // Walk a method list starting from its vmAddr.
363 // Note, classes, categories, protocols, etc, all share the same method list struture so can all use this.
364 void forEachObjCMethod(uint64_t methodListVMAddr
, const VMAddrConverter
& vmAddrConverter
,
365 void (^handler
)(uint64_t methodVMAddr
, const ObjCMethod
& method
),
366 bool* isRelativeMethodList
= nullptr) const;
368 void forEachObjCProperty(uint64_t propertyListVMAddr
, const VMAddrConverter
& vmAddrConverter
,
369 void (^handler
)(uint64_t propertyVMAddr
, const ObjCProperty
& property
)) const;
371 // lists all Protocols on a protocol_list_t
372 void forEachObjCProtocol(uint64_t protocolListVMAddr
, const VMAddrConverter
& vmAddrConverter
,
373 void (^handler
)(uint64_t protocolRefVMAddr
, const ObjCProtocol
& protocol
)) const;
375 void forEachObjCSelectorReference(Diagnostics
& diag
, const VMAddrConverter
& vmAddrConverter
,
376 void (^handler
)(uint64_t selRefVMAddr
, uint64_t selRefTargetVMAddr
)) const;
378 void forEachObjCMethodName(void (^handler
)(const char* methodName
)) const;
380 bool hasObjCMessageReferences() const;
382 const ObjCImageInfo
* objcImageInfo() const;
384 void forEachWeakDef(Diagnostics
& diag
, void (^handler
)(const char* symbolName
, uint64_t imageOffset
, bool isFromExportTrie
)) const;
392 uint64_t writable
: 1,
394 textRelocsAllowed
: 1, // segment supports text relocs (i386 only)
398 enum class Malformed
{ linkeditOrder
, linkeditAlignment
, linkeditPermissions
, dyldInfoAndlocalRelocs
, segmentOrder
,
399 textPermissions
, executableData
, writableData
, codeSigAlignment
, sectionsAddrRangeWithinSegment
};
400 bool enforceFormat(Malformed
) const;
402 const uint8_t* getContentForVMAddr(const LayoutInfo
& info
, uint64_t vmAddr
) const;
403 bool validLoadCommands(Diagnostics
& diag
, const char* path
, size_t fileLen
) const;
404 bool validEmbeddedPaths(Diagnostics
& diag
, Platform platform
, const char* path
) const;
405 bool validSegments(Diagnostics
& diag
, const char* path
, size_t fileLen
) const;
406 bool validLinkedit(Diagnostics
& diag
, const char* path
) const;
407 bool validLinkeditLayout(Diagnostics
& diag
, const char* path
) const;
408 bool validRebaseInfo(Diagnostics
& diag
, const char* path
) const;
409 bool validBindInfo(Diagnostics
& diag
, const char* path
) const;
410 bool validMain(Diagnostics
& diag
, const char* path
) const;
411 bool validChainedFixupsInfo(Diagnostics
& diag
, const char* path
) const;
412 bool validChainedFixupsInfoOldArm64e(Diagnostics
& diag
, const char* path
) const;
414 bool invalidRebaseState(Diagnostics
& diag
, const char* opcodeName
, const char* path
, const LinkEditInfo
& leInfo
, const SegmentInfo segments
[],
415 bool segIndexSet
, uint32_t pointerSize
, uint8_t segmentIndex
, uint64_t segmentOffset
, Rebase kind
) const;
416 bool invalidBindState(Diagnostics
& diag
, const char* opcodeName
, const char* path
, const LinkEditInfo
& leInfo
, const SegmentInfo segments
[],
417 bool segIndexSet
, bool libraryOrdinalSet
, uint32_t dylibCount
, int libOrdinal
, uint32_t pointerSize
,
418 uint8_t segmentIndex
, uint64_t segmentOffset
, uint8_t type
, const char* symbolName
) const;
419 bool doLocalReloc(Diagnostics
& diag
, uint32_t r_address
, bool& stop
, void (^callback
)(uint32_t dataSegIndex
, uint64_t dataSegOffset
, uint8_t type
, bool& stop
)) const;
420 uint8_t relocPointerType() const;
421 int libOrdinalFromDesc(uint16_t n_desc
) const;
422 bool doExternalReloc(Diagnostics
& diag
, uint32_t r_address
, uint32_t r_symbolnum
, LinkEditInfo
& leInfo
, bool& stop
,
423 void (^callback
)(uint32_t dataSegIndex
, uint64_t dataSegOffset
, uint8_t type
, int libOrdinal
,
424 uint64_t addend
, const char* symbolName
, bool weakImport
, bool lazy
, bool& stop
)) const;
426 void getAllSegmentsInfos(Diagnostics
& diag
, SegmentInfo segments
[]) const;
427 bool segmentHasTextRelocs(uint32_t segIndex
) const;
428 uint64_t localRelocBaseAddress(const SegmentInfo segmentsInfos
[], uint32_t segCount
) const;
429 uint64_t externalRelocBaseAddress(const SegmentInfo segmentsInfos
[], uint32_t segCount
) const;
430 bool segIndexAndOffsetForAddress(uint64_t addr
, const SegmentInfo segmentsInfos
[], uint32_t segCount
, uint32_t& segIndex
, uint64_t& segOffset
) const;
431 void parseOrgArm64eChainedFixups(Diagnostics
& diag
, void (^targetCount
)(uint32_t totalTargets
, bool& stop
),
432 void (^addTarget
)(const LinkEditInfo
& leInfo
, const SegmentInfo segments
[], bool libraryOrdinalSet
, uint32_t dylibCount
, int libOrdinal
, uint8_t type
, const char* symbolName
, uint64_t addend
, bool weakImport
, bool& stop
),
433 void (^addChainStart
)(const LinkEditInfo
& leInfo
, const SegmentInfo segments
[], uint8_t segmentIndex
, bool segIndexSet
, uint64_t segmentOffset
, uint16_t format
, bool& stop
)) const;
434 bool contentIsRegularStub(const uint8_t* helperContent
) const;
435 void recurseTrie(Diagnostics
& diag
, const uint8_t* const start
, const uint8_t* p
, const uint8_t* const end
,
436 OverflowSafeArray
<char>& cummulativeString
, int curStrOffset
, bool& stop
, MachOAnalyzer::ExportsCallback callback
) const;
437 void analyzeSegmentsLayout(uint64_t& vmSpace
, bool& hasZeroFill
) const;
444 #endif /* MachOAnalyzer_h */