X-Git-Url: https://git.saurik.com/apple/dyld.git/blobdiff_plain/16b475fcb248267b8b51f759bc62a49ec2afa88d..refs/heads/master:/dyld3/shared-cache/CacheBuilder.cpp diff --git a/dyld3/shared-cache/CacheBuilder.cpp b/dyld3/shared-cache/CacheBuilder.cpp index 2e0d5e1..b625e9f 100644 --- a/dyld3/shared-cache/CacheBuilder.cpp +++ b/dyld3/shared-cache/CacheBuilder.cpp @@ -28,7 +28,7 @@ #include "DyldSharedCache.h" #include "CacheBuilder.h" #include "Diagnostics.h" - +#include "IMPCaches.hpp" CacheBuilder::CacheBuilder(const DyldSharedCache::CreateOptions& options, const dyld3::closure::FileSystem& fileSystem) : _options(options) @@ -39,6 +39,9 @@ CacheBuilder::CacheBuilder(const DyldSharedCache::CreateOptions& options, const { } +CacheBuilder::~CacheBuilder() { +} + std::string CacheBuilder::errorMessage() { @@ -48,8 +51,9 @@ std::string CacheBuilder::errorMessage() void CacheBuilder::copyRawSegments() { const bool log = false; - dispatch_apply(_sortedDylibs.size(), DISPATCH_APPLY_AUTO, ^(size_t index) { - const DylibInfo& dylib = _sortedDylibs[index]; + const bool logCFConstants = false; + + forEachDylibInfo(^(const DylibInfo& dylib, Diagnostics& dylibDiag) { for (const SegmentMappingInfo& info : dylib.cacheLocation) { if (log) fprintf(stderr, "copy %s segment %s (0x%08X bytes) from %p to %p (logical addr 0x%llX) for %s\n", _options.archs->name(), info.segName, info.copySegmentSize, info.srcSegment, info.dstSegment, info.dstCacheUnslidAddress, dylib.input->mappedFile.runtimePath.c_str()); @@ -57,7 +61,7 @@ void CacheBuilder::copyRawSegments() } }); - // Copy the coalesced sections + // Copy the coalesced __TEXT sections const uint64_t numCoalescedSections = sizeof(CacheCoalescedText::SupportedSections) / sizeof(*CacheCoalescedText::SupportedSections); dispatch_apply(numCoalescedSections, DISPATCH_APPLY_AUTO, ^(size_t index) { const CacheCoalescedText::StringSection& cacheStringSection = _coalescedText.getSectionData(CacheCoalescedText::SupportedSections[index]); @@ -67,23 +71,51 @@ void CacheBuilder::copyRawSegments() for (const auto& stringAndOffset : cacheStringSection.stringsToOffsets) ::memcpy(cacheStringSection.bufferAddr + stringAndOffset.second, stringAndOffset.first.data(), stringAndOffset.first.size() + 1); }); + + // Copy the coalesced CF sections + if ( _coalescedText.cfStrings.bufferSize != 0 ) { + uint8_t* dstBuffer = _coalescedText.cfStrings.bufferAddr; + uint64_t dstBufferVMAddr = _coalescedText.cfStrings.bufferVMAddr; + forEachDylibInfo(^(const DylibInfo& dylib, Diagnostics& dylibDiag) { + const char* segmentName = "__OBJC_CONST"; + const char* sectionName = "__cfstring"; + const DylibTextCoalescer::DylibSectionOffsetToCacheSectionOffset& sectionData = dylib.textCoalescer.getSectionCoalescer(segmentName, sectionName); + if ( sectionData.empty() ) + return; + + uint64_t sectionContentSize = 0; + const void* sectionContent = dylib.input->mappedFile.mh->findSectionContent(segmentName, sectionName, sectionContentSize); + assert(sectionContent != nullptr); + assert(sectionContentSize != 0); + for (const auto& dylibOffsetAndCacheOffset : sectionData) { + uint64_t dylibOffset = dylibOffsetAndCacheOffset.first; + uint64_t cacheOffset = dylibOffsetAndCacheOffset.second; + if (logCFConstants) fprintf(stderr, "copy %s %s section %s (0x%08X bytes) to %p (logical addr 0x%llX)\n", + _options.archs->name(), segmentName, sectionName, + (uint32_t)DyldSharedCache::ConstantClasses::cfStringAtomSize, dstBuffer + cacheOffset, dstBufferVMAddr + cacheOffset); + ::memcpy(dstBuffer + cacheOffset, (const uint8_t*)sectionContent + dylibOffset, (size_t)DyldSharedCache::ConstantClasses::cfStringAtomSize); + } + }); + } } -void CacheBuilder::adjustAllImagesForNewSegmentLocations() +void CacheBuilder::adjustAllImagesForNewSegmentLocations(uint64_t cacheBaseAddress, + ASLR_Tracker& aslrTracker, LOH_Tracker* lohTracker, + const CacheBuilder::CacheCoalescedText* coalescedText) { - __block std::vector diags; - diags.resize(_sortedDylibs.size()); - // Note this cannot to be done in parallel because the LOH Tracker and aslr tracker are not thread safe - for (size_t index = 0; index != _sortedDylibs.size(); ++index) { - const DylibInfo& dylib = _sortedDylibs[index]; - adjustDylibSegments(dylib, diags[index]); - } - for (const Diagnostics& diag : diags) { - if ( diag.hasError() ) { - _diagnostics.error("%s", diag.errorMessage().c_str()); - break; - } + __block bool badDylib = false; + forEachDylibInfo(^(const DylibInfo& dylib, Diagnostics& dylibDiag) { + if ( dylibDiag.hasError() ) + return; + adjustDylibSegments(dylib, dylibDiag, cacheBaseAddress, aslrTracker, + lohTracker, coalescedText); + if ( dylibDiag.hasError() ) + badDylib = true; + }); + + if ( badDylib && !_diagnostics.hasError() ) { + _diagnostics.error("One or more binaries has an error which prevented linking. See other errors."); } } @@ -92,6 +124,10 @@ CacheBuilder::ASLR_Tracker::~ASLR_Tracker() { if ( _bitmap != nullptr ) ::free(_bitmap); +#if BUILDING_APP_CACHE_UTIL + if ( _cacheLevels != nullptr ) + ::free(_cacheLevels); +#endif } void CacheBuilder::ASLR_Tracker::setDataRegion(const void* rwRegionStart, size_t rwRegionSize) @@ -99,17 +135,28 @@ void CacheBuilder::ASLR_Tracker::setDataRegion(const void* rwRegionStart, size_t _pageCount = (unsigned)(rwRegionSize+_pageSize-1)/_pageSize; _regionStart = (uint8_t*)rwRegionStart; _regionEnd = (uint8_t*)rwRegionStart + rwRegionSize; - _bitmap = (bool*)calloc(_pageCount*(_pageSize/4)*sizeof(bool), 1); + _bitmap = (bool*)calloc(_pageCount*(_pageSize/kMinimumFixupAlignment)*sizeof(bool), 1); +#if BUILDING_APP_CACHE_UTIL + size_t cacheLevelsSize = (_pageCount*(_pageSize/kMinimumFixupAlignment)*sizeof(uint8_t)); + _cacheLevels = (uint8_t*)malloc(cacheLevelsSize); + memset(_cacheLevels, (int)~0U, cacheLevelsSize); +#endif } -void CacheBuilder::ASLR_Tracker::add(void* loc) +void CacheBuilder::ASLR_Tracker::add(void* loc, uint8_t level) { if (!_enabled) return; uint8_t* p = (uint8_t*)loc; assert(p >= _regionStart); assert(p < _regionEnd); - _bitmap[(p-_regionStart)/4] = true; + _bitmap[(p-_regionStart)/kMinimumFixupAlignment] = true; + +#if BUILDING_APP_CACHE_UTIL + if ( level != (uint8_t)~0U ) { + _cacheLevels[(p-_regionStart)/kMinimumFixupAlignment] = level; + } +#endif } void CacheBuilder::ASLR_Tracker::remove(void* loc) @@ -119,17 +166,28 @@ void CacheBuilder::ASLR_Tracker::remove(void* loc) uint8_t* p = (uint8_t*)loc; assert(p >= _regionStart); assert(p < _regionEnd); - _bitmap[(p-_regionStart)/4] = false; + _bitmap[(p-_regionStart)/kMinimumFixupAlignment] = false; } -bool CacheBuilder::ASLR_Tracker::has(void* loc) +bool CacheBuilder::ASLR_Tracker::has(void* loc, uint8_t* level) const { if (!_enabled) return true; uint8_t* p = (uint8_t*)loc; assert(p >= _regionStart); assert(p < _regionEnd); - return _bitmap[(p-_regionStart)/4]; + + if ( _bitmap[(p-_regionStart)/kMinimumFixupAlignment] ) { +#if BUILDING_APP_CACHE_UTIL + if ( level != nullptr ) { + uint8_t levelValue = _cacheLevels[(p-_regionStart)/kMinimumFixupAlignment]; + if ( levelValue != (uint8_t)~0U ) + *level = levelValue; + } +#endif + return true; + } + return false; } void CacheBuilder::ASLR_Tracker::setHigh8(void* p, uint8_t high8) @@ -152,7 +210,7 @@ void CacheBuilder::ASLR_Tracker::setRebaseTarget64(void*p, uint64_t targetVMAddr _rebaseTarget64[p] = targetVMAddr; } -bool CacheBuilder::ASLR_Tracker::hasHigh8(void* p, uint8_t* highByte) +bool CacheBuilder::ASLR_Tracker::hasHigh8(void* p, uint8_t* highByte) const { auto pos = _high8Map.find(p); if ( pos == _high8Map.end() ) @@ -161,7 +219,7 @@ bool CacheBuilder::ASLR_Tracker::hasHigh8(void* p, uint8_t* highByte) return true; } -bool CacheBuilder::ASLR_Tracker::hasAuthData(void* p, uint16_t* diversity, bool* hasAddrDiv, uint8_t* key) +bool CacheBuilder::ASLR_Tracker::hasAuthData(void* p, uint16_t* diversity, bool* hasAddrDiv, uint8_t* key) const { auto pos = _authDataMap.find(p); if ( pos == _authDataMap.end() ) @@ -172,7 +230,7 @@ bool CacheBuilder::ASLR_Tracker::hasAuthData(void* p, uint16_t* diversity, bool* return true; } -bool CacheBuilder::ASLR_Tracker::hasRebaseTarget32(void* p, uint32_t* vmAddr) +bool CacheBuilder::ASLR_Tracker::hasRebaseTarget32(void* p, uint32_t* vmAddr) const { auto pos = _rebaseTarget32.find(p); if ( pos == _rebaseTarget32.end() ) @@ -181,7 +239,7 @@ bool CacheBuilder::ASLR_Tracker::hasRebaseTarget32(void* p, uint32_t* vmAddr) return true; } -bool CacheBuilder::ASLR_Tracker::hasRebaseTarget64(void* p, uint64_t* vmAddr) +bool CacheBuilder::ASLR_Tracker::hasRebaseTarget64(void* p, uint64_t* vmAddr) const { auto pos = _rebaseTarget64.find(p); if ( pos == _rebaseTarget64.end() ) @@ -190,46 +248,108 @@ bool CacheBuilder::ASLR_Tracker::hasRebaseTarget64(void* p, uint64_t* vmAddr) return true; } +std::vector CacheBuilder::ASLR_Tracker::getRebaseTargets() const { + std::vector targets; + for (const auto& target : _rebaseTarget32) + targets.push_back(target.first); + for (const auto& target : _rebaseTarget64) + targets.push_back(target.first); + return targets; +} + //////////////////////////// DylibTextCoalescer //////////////////////////////////// -bool CacheBuilder::DylibTextCoalescer::sectionWasCoalesced(std::string_view sectionName) const { +bool CacheBuilder::DylibTextCoalescer::segmentWasCoalesced(std::string_view segmentName) const { + if (segmentName.size() > 16) + segmentName = segmentName.substr(0, 16); + + if ( segmentName == "__OBJC_CONST" ) { + return !cfStrings.empty(); + } + + return false; +} + +bool CacheBuilder::DylibTextCoalescer::sectionWasCoalesced(std::string_view segmentName, + std::string_view sectionName) const { + if (segmentName.size() > 16) + segmentName = segmentName.substr(0, 16); if (sectionName.size() > 16) sectionName = sectionName.substr(0, 16); - std::map supportedSections = { - { "__objc_classname", &objcClassNames }, - { "__objc_methname", &objcMethNames }, - { "__objc_methtype", &objcMethTypes } - }; - auto it = supportedSections.find(sectionName); - if (it == supportedSections.end()) - return false; - return !it->second->empty(); + + if ( segmentName == "__TEXT" ) { + std::map supportedSections = { + { "__objc_classname", &objcClassNames }, + { "__objc_methname", &objcMethNames }, + { "__objc_methtype", &objcMethTypes } + }; + auto it = supportedSections.find(sectionName); + if (it == supportedSections.end()) + return false; + return !it->second->empty(); + } + + if ( segmentName == "__OBJC_CONST" ) { + if ( sectionName == "__cfstring" ) { + return !cfStrings.empty(); + } + } + + return false; } -CacheBuilder::DylibTextCoalescer::DylibSectionOffsetToCacheSectionOffset& CacheBuilder::DylibTextCoalescer::getSectionCoalescer(std::string_view sectionName) { +CacheBuilder::DylibTextCoalescer::DylibSectionOffsetToCacheSectionOffset& +CacheBuilder::DylibTextCoalescer::getSectionCoalescer(std::string_view segmentName, std::string_view sectionName) { + if (segmentName.size() > 16) + segmentName = segmentName.substr(0, 16); if (sectionName.size() > 16) sectionName = sectionName.substr(0, 16); - std::map supportedSections = { - { "__objc_classname", &objcClassNames }, - { "__objc_methname", &objcMethNames }, - { "__objc_methtype", &objcMethTypes } - }; - auto it = supportedSections.find(sectionName); - assert(it != supportedSections.end()); - return *it->second; + + if ( segmentName == "__TEXT" ) { + std::map supportedSections = { + { "__objc_classname", &objcClassNames }, + { "__objc_methname", &objcMethNames }, + { "__objc_methtype", &objcMethTypes } + }; + auto it = supportedSections.find(sectionName); + assert(it != supportedSections.end()); + return *it->second; + } + + if ( segmentName == "__OBJC_CONST" ) { + if ( sectionName == "__cfstring" ) { + return cfStrings; + } + } + + assert(false); } -const CacheBuilder::DylibTextCoalescer::DylibSectionOffsetToCacheSectionOffset& CacheBuilder::DylibTextCoalescer::getSectionCoalescer(std::string_view sectionName) const { +const CacheBuilder::DylibTextCoalescer::DylibSectionOffsetToCacheSectionOffset& +CacheBuilder::DylibTextCoalescer::getSectionCoalescer(std::string_view segmentName, std::string_view sectionName) const { + if (segmentName.size() > 16) + segmentName = segmentName.substr(0, 16); if (sectionName.size() > 16) sectionName = sectionName.substr(0, 16); - std::map supportedSections = { - { "__objc_classname", &objcClassNames }, - { "__objc_methname", &objcMethNames }, - { "__objc_methtype", &objcMethTypes } - }; - auto it = supportedSections.find(sectionName); - assert(it != supportedSections.end()); - return *it->second; + + if ( segmentName == "__TEXT" ) { + std::map supportedSections = { + { "__objc_classname", &objcClassNames }, + { "__objc_methname", &objcMethNames }, + { "__objc_methtype", &objcMethTypes } + }; + auto it = supportedSections.find(sectionName); + assert(it != supportedSections.end()); + return *it->second; + } + + if ( segmentName == "__OBJC_CONST" ) { + if ( sectionName == "__cfstring" ) { + return cfStrings; + } + } + + assert(false); } //////////////////////////// CacheCoalescedText //////////////////////////////////// @@ -239,8 +359,11 @@ const char* CacheBuilder::CacheCoalescedText::SupportedSections[] = { "__objc_methtype", }; -void CacheBuilder::CacheCoalescedText::parseCoalescableText(const dyld3::MachOAnalyzer *ma, - DylibTextCoalescer& textCoalescer) { +void CacheBuilder::CacheCoalescedText:: + parseCoalescableText(const dyld3::MachOAnalyzer* ma, + DylibTextCoalescer& textCoalescer, + const IMPCaches::SelectorMap& selectors, + IMPCaches::HoleMap& selectorsHoleMap) { static const bool log = false; // We can only remove sections if we know we have split seg v2 to point to it @@ -266,17 +389,20 @@ void CacheBuilder::CacheCoalescedText::parseCoalescableText(const dyld3::MachOAn const std::set supportedSections(std::begin(SupportedSections), std::end(SupportedSections)); int64_t slide = ma->getSlide(); + bool isSelectorsSection = false; for (auto sectionInfoIt = textSectionInfos.rbegin(); sectionInfoIt != textSectionInfos.rend(); ++sectionInfoIt) { const std::string& sectionName = sectionInfoIt->first; const dyld3::MachOAnalyzer::SectionInfo& sectInfo = sectionInfoIt->second; + isSelectorsSection = (sectionName == "__objc_methname"); + // If we find a section we can't handle then stop here. Hopefully we coalesced some from the end. if (supportedSections.find(sectionName) == supportedSections.end()) break; StringSection& cacheStringSection = getSectionData(sectionName); - DylibTextCoalescer::DylibSectionOffsetToCacheSectionOffset& sectionStringData = textCoalescer.getSectionCoalescer(sectionName); + DylibTextCoalescer::DylibSectionOffsetToCacheSectionOffset& sectionStringData = textCoalescer.getSectionCoalescer("__TEXT", sectionName); // Walk the strings in this section const uint8_t* content = (uint8_t*)(sectInfo.sectAddr + slide); @@ -284,26 +410,313 @@ void CacheBuilder::CacheCoalescedText::parseCoalescableText(const dyld3::MachOAn const char* end = s + sectInfo.sectSize; while ( s < end ) { std::string_view str = s; - auto itAndInserted = cacheStringSection.stringsToOffsets.insert({ str, cacheStringSection.bufferSize }); - if (itAndInserted.second) { - // If we inserted the string then we need to include it in the total - cacheStringSection.bufferSize += str.size() + 1; - if (log) - printf("Selector: %s -> %s\n", ma->installName(), s); - } else { + int cacheSectionOffset = 0; + + auto it = cacheStringSection.stringsToOffsets.find(str); + if (it != cacheStringSection.stringsToOffsets.end()) { // Debugging only. If we didn't include the string then we saved that many bytes cacheStringSection.savedSpace += str.size() + 1; + cacheSectionOffset = it->second; + } else if (isSelectorsSection) { + // If we are in the selectors section, we need to move + // the selectors in the selector map to their correct addresses, + // and fill the holes with the rest + +#if BUILDING_APP_CACHE_UTIL + cacheSectionOffset = cacheStringSection.bufferSize; +#else + const IMPCaches::SelectorMap::UnderlyingMap & map = selectors.map; + IMPCaches::SelectorMap::UnderlyingMap::const_iterator selectorsIterator = map.find(str); + if (selectorsIterator != map.end()) { + cacheSectionOffset = selectorsIterator->second->offset; + } else { + cacheSectionOffset = selectorsHoleMap.addStringOfSize((unsigned)str.size() + 1); + } +#endif + cacheStringSection.stringsToOffsets[str] = cacheSectionOffset; + uint32_t sizeAtLeast = cacheSectionOffset + (uint32_t)str.size() + 1; + if (cacheStringSection.bufferSize < sizeAtLeast) { + cacheStringSection.bufferSize = sizeAtLeast; + } + } else { + auto itAndInserted = cacheStringSection.stringsToOffsets.insert({ str, cacheStringSection.bufferSize }); + cacheSectionOffset = itAndInserted.first->second; + assert(itAndInserted.second); + + cacheStringSection.bufferSize += str.size() + 1; + if (log) { + printf("Selector: %s -> %s\n", ma->installName(), s); + } } // Now keep track of this offset in our source dylib as pointing to this offset uint32_t sourceSectionOffset = (uint32_t)((uint64_t)s - (uint64_t)content); - uint32_t cacheSectionOffset = itAndInserted.first->second; sectionStringData[sourceSectionOffset] = cacheSectionOffset; s += str.size() + 1; } } } +void CacheBuilder::CacheCoalescedText::parseCFConstants(const dyld3::MachOAnalyzer *ma, + DylibTextCoalescer &textCoalescer) { + static const bool log = false; + + // FIXME: Re-enable this once we can correctly patch the shared cache + if ( ma != nullptr ) + return; + + if ( !ma->is64() ) + return; + + // We only support chained fixups as we need to rewrite binds/rebases after applying split seg + // and that is much easier with chained fixups than opcodes + if ( !ma->hasChainedFixupsLoadCommand() ) + return; + + // FIXME: Support DYLD_CHAINED_PTR_ARM64E_USERLAND once ld64 moves to it. + const uint16_t pointerFormat = ma->chainedPointerFormat(); + if ( pointerFormat != DYLD_CHAINED_PTR_ARM64E ) + return; + + // We can only remove sections if we know we have split seg v2 to point to it + // Otherwise, a PC relative load in the __TEXT segment wouldn't know how to point to the new constants + // which are no longer in the same segment + if ( !ma->isSplitSegV2() ) + return; + + // We can only remove sections from the end of a segment, so cache them all and walk backwards. + __block std::vector> dataSectionInfos; + __block uint64_t cstringStartVMAddr = 0; + __block uint64_t cstringEndVMAddr = 0; + ma->forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo §Info, bool malformedSectionRange, bool &stop) { + if ( malformedSectionRange ) + return; + if ( strcmp(sectInfo.segInfo.segName, "__OBJC_CONST") == 0 ) { + dataSectionInfos.push_back({ sectInfo.sectName, sectInfo }); + return; + } + if ( strcmp(sectInfo.segInfo.segName, "__TEXT") == 0 ) { + if ( strcmp(sectInfo.sectName, "__cstring") == 0 ) { + if ( ( (sectInfo.sectFlags & SECTION_TYPE) == S_CSTRING_LITERALS ) ) { + cstringStartVMAddr = sectInfo.sectAddr; + cstringEndVMAddr = cstringStartVMAddr + sectInfo.sectSize; + } + } + } + }); + + // We need to clear the chained pointer fixups for the whole segment, so can only + // process any type of CF object if we can process them all + if ( dataSectionInfos.size() != 1 ) + return; + + if ( dataSectionInfos.front().first != "__cfstring" ) + return; + + if ( cstringStartVMAddr == 0 ) + return; + + const dyld3::MachOAnalyzer::SectionInfo& cfStringsSection = dataSectionInfos.back().second; + + // A CFString is layed out in memory as + // { + // uintptr_t isa; + // uint32_t encoding; + // uint32_t padding; + // uintptr_t cstringData; + // uintptr_t cstringLength; + // } + const uint64_t cstringDataOffset = 16; + const char* className = cfStrings.isaClassName; + if ( cfStringsSection.sectSize % (uint32_t)DyldSharedCache::ConstantClasses::cfStringAtomSize ) { + // We don't support padding or any kind on the section + return; + } + + uint64_t baseAddress = ma->preferredLoadAddress(); + + uint64_t startVMOffset = cfStringsSection.sectAddr - baseAddress; + uint64_t endVMOffset = startVMOffset + cfStringsSection.sectSize; + + __block Diagnostics diag; + + // Make sure no symbols are pointing in to this section + __block bool hasSymbols = false; + ma->forEachGlobalSymbol(diag, ^(const char *symbolName, uint64_t n_value, uint8_t n_type, uint8_t n_sect, uint16_t n_desc, bool &stop) { + uint64_t vmOffset = n_value - baseAddress; + if ( vmOffset < startVMOffset ) + return; + if ( vmOffset >= endVMOffset ) + return; + // In range of our section + hasSymbols = true; + stop = true; + }); + if ( diag.hasError() ) + return; + if ( hasSymbols ) + return; + + ma->forEachLocalSymbol(diag, ^(const char *symbolName, uint64_t n_value, uint8_t n_type, uint8_t n_sect, uint16_t n_desc, bool &stop) { + uint64_t vmOffset = n_value - baseAddress; + if ( vmOffset < startVMOffset ) + return; + if ( vmOffset >= endVMOffset ) + return; + // In range of our section + hasSymbols = true; + stop = true; + }); + if ( diag.hasError() ) + return; + if ( hasSymbols ) + return; + + ma->forEachExportedSymbol(diag, ^(const char *symbolName, uint64_t imageOffset, uint64_t flags, uint64_t other, const char *importName, bool &stop) { + if ( imageOffset < startVMOffset ) + return; + if ( imageOffset >= endVMOffset ) + return; + // In range of our section + hasSymbols = true; + stop = true; + }); + if ( diag.hasError() ) + return; + if ( hasSymbols ) + return; + + __block std::vector dependentPaths; + ma->forEachDependentDylib(^(const char *loadPath, bool isWeak, bool isReExport, + bool isUpward, uint32_t compatVersion, uint32_t curVersion, bool &stop) { + dependentPaths.push_back(loadPath); + }); + + // Find all the binds to the ISA class. These delineate the atoms + // In CoreFoundation itself, we are looking for rebases to the ISA + __block std::vector atomOffsets; + + bool dylibExportsISA = strcmp(ma->installName(), cfStrings.isaInstallName) == 0; + if ( !dylibExportsISA ) { + // This dylib doens't export the class, so look for binds to the ISA + __block std::vector> bindTargetSymbols; + ma->forEachChainedFixupTarget(diag, ^(int libraryOrdinal, const char* symbolName, uint64_t addend, bool weakImport, bool& stop) { + bindTargetSymbols.push_back({ symbolName, libraryOrdinal }); + }); + + __block bool foundBadBind = false; + ma->withChainStarts(diag, ma->chainStartsOffset(), ^(const dyld_chained_starts_in_image* startsInfo) { + if ( foundBadBind ) + return; + ma->forEachFixupInAllChains(diag, startsInfo, false, ^(dyld3::MachOLoaded::ChainedFixupPointerOnDisk* fixupLoc, + const dyld_chained_starts_in_segment* segInfo, bool& stopFixups) { + // Skip anything not in this section + uint64_t vmOffset = (uint8_t*)fixupLoc - (uint8_t*)ma; + if ( vmOffset < startVMOffset ) + return; + if ( vmOffset >= endVMOffset ) + return; + + uint32_t bindOrdinal; + int64_t ptrAddend; + if ( fixupLoc->isBind(pointerFormat, bindOrdinal, ptrAddend) ) { + if ( ptrAddend != 0 ) { + foundBadBind = true; + stopFixups = true; + return; + } + if ( bindOrdinal >= bindTargetSymbols.size() ) { + foundBadBind = true; + stopFixups = true; + return; + } + if ( strcmp(bindTargetSymbols[bindOrdinal].first, className) != 0 ) { + foundBadBind = true; + stopFixups = true; + return; + } + int libOrdinal = bindTargetSymbols[bindOrdinal].second; + if ( libOrdinal <= 0 ) { + foundBadBind = true; + stopFixups = true; + return; + } + int depIndex = libOrdinal - 1; + if ( depIndex >= dependentPaths.size() ) { + foundBadBind = true; + stopFixups = true; + return; + } + const char* depLoadPath = dependentPaths[depIndex]; + // All dylibs must find the ISA in the same place + if ( strcmp(cfStrings.isaInstallName, depLoadPath) != 0 ) { + foundBadBind = true; + stopFixups = true; + return; + } + atomOffsets.push_back(vmOffset); + } + }); + }); + if ( foundBadBind ) + return; + + if ( atomOffsets.empty() ) + return; + } + + if ( diag.hasError() ) + return; + + // Find all the rebases in the atoms, which correpond to pointers strings + __block std::map sectionRebases; + ma->withChainStarts(diag, ma->chainStartsOffset(), ^(const dyld_chained_starts_in_image* startsInfo) { + ma->forEachFixupInAllChains(diag, startsInfo, false, ^(dyld3::MachOLoaded::ChainedFixupPointerOnDisk* fixupLoc, const dyld_chained_starts_in_segment* segInfo, bool& stopFixups) { + // Skip anything not in this section + uint64_t vmOffset = (uint8_t*)fixupLoc - (uint8_t*)ma; + if ( vmOffset < startVMOffset ) + return; + if ( vmOffset >= endVMOffset ) + return; + + uint64_t rebaseTargetRuntimeOffset; + if ( fixupLoc->isRebase(pointerFormat, 0, rebaseTargetRuntimeOffset) ) { + if ( dylibExportsISA && (rebaseTargetRuntimeOffset == cfStrings.isaVMOffset) ) { + atomOffsets.push_back(vmOffset); + } else { + sectionRebases[vmOffset] = rebaseTargetRuntimeOffset; + } + } + }); + }); + if ( diag.hasError() ) + return; + + // Every atom should have a single rebase to a cstring + if ( sectionRebases.size() != atomOffsets.size() ) + return; + + std::sort(atomOffsets.begin(), atomOffsets.end()); + for (uint64_t atomOffset : atomOffsets) { + auto it = sectionRebases.find(atomOffset + cstringDataOffset); + if ( it == sectionRebases.end() ) + return; + } + + CFSection& stringSection = this->cfStrings; + DylibTextCoalescer::DylibSectionOffsetToCacheSectionOffset& sectionData = textCoalescer.getSectionCoalescer("__OBJC_CONST", "__cfstring"); + for (uint64_t atomOffset : atomOffsets) { + if ( log ) + printf("%s: found __cfstring at: 0x%llx\n", ma->installName(), atomOffset); + + // Now keep track of this offset in our source dylib as pointing to this offset + uint32_t sourceSectionOffset = (uint32_t)(atomOffset - startVMOffset); + uint32_t cacheSectionOffset = stringSection.bufferSize; + sectionData[sourceSectionOffset] = cacheSectionOffset; + stringSection.bufferSize += (uint32_t)DyldSharedCache::ConstantClasses::cfStringAtomSize; + } +} + void CacheBuilder::CacheCoalescedText::clear() { *this = CacheBuilder::CacheCoalescedText(); } @@ -335,3 +748,67 @@ const CacheBuilder::CacheCoalescedText::StringSection& CacheBuilder::CacheCoales assert(it != supportedSections.end()); return *it->second; } + +uint64_t CacheBuilder::CacheCoalescedText::getSectionVMAddr(std::string_view segmentName, + std::string_view sectionName) const { + if (segmentName.size() > 16) + segmentName = segmentName.substr(0, 16); + if (sectionName.size() > 16) + sectionName = sectionName.substr(0, 16); + + if ( segmentName == "__TEXT" ) { + return getSectionData(sectionName).bufferVMAddr; + } + + if ( segmentName == "__OBJC_CONST" ) { + if ( sectionName == "__cfstring" ) { + return cfStrings.bufferVMAddr; + } + } + + assert(false); +} + +uint8_t* CacheBuilder::CacheCoalescedText::getSectionBufferAddr(std::string_view segmentName, + std::string_view sectionName) const { + if (segmentName.size() > 16) + segmentName = segmentName.substr(0, 16); + if (sectionName.size() > 16) + sectionName = sectionName.substr(0, 16); + + if ( segmentName == "__TEXT" ) { + return getSectionData(sectionName).bufferAddr; + } + + if ( segmentName == "__OBJC_CONST" ) { + if ( sectionName == "__cfstring" ) { + return cfStrings.bufferAddr; + } + } + + assert(false); +} + +uint64_t CacheBuilder::CacheCoalescedText::getSectionObjcTag(std::string_view segmentName, + std::string_view sectionName) const { + if (segmentName.size() > 16) + segmentName = segmentName.substr(0, 16); + if (sectionName.size() > 16) + sectionName = sectionName.substr(0, 16); + + if ( segmentName == "__TEXT" ) { + // Nothing has a tag in __TEXT + return 0; + } + + if ( segmentName == "__OBJC_CONST" ) { + if ( sectionName == "__cfstring" ) { + // This is defined by objc as the tag we put in the high bits + // FIXME: Get a tag from objc + // return 1ULL << 63; + return 0; + } + } + + assert(false); +}