+void CacheBuilder::CacheCoalescedText::parseCFConstants(const dyld3::MachOAnalyzer *ma,
+ DylibTextCoalescer &textCoalescer) {
+ static const bool log = false;
+
+ // FIXME: Re-enable this once we can correctly patch the shared cache
+ if ( ma != nullptr )
+ return;
+
+ if ( !ma->is64() )
+ return;
+
+ // We only support chained fixups as we need to rewrite binds/rebases after applying split seg
+ // and that is much easier with chained fixups than opcodes
+ if ( !ma->hasChainedFixupsLoadCommand() )
+ return;
+
+ // FIXME: Support DYLD_CHAINED_PTR_ARM64E_USERLAND once ld64 moves to it.
+ const uint16_t pointerFormat = ma->chainedPointerFormat();
+ if ( pointerFormat != DYLD_CHAINED_PTR_ARM64E )
+ return;
+
+ // We can only remove sections if we know we have split seg v2 to point to it
+ // Otherwise, a PC relative load in the __TEXT segment wouldn't know how to point to the new constants
+ // which are no longer in the same segment
+ if ( !ma->isSplitSegV2() )
+ return;
+
+ // We can only remove sections from the end of a segment, so cache them all and walk backwards.
+ __block std::vector<std::pair<std::string, dyld3::MachOAnalyzer::SectionInfo>> dataSectionInfos;
+ __block uint64_t cstringStartVMAddr = 0;
+ __block uint64_t cstringEndVMAddr = 0;
+ ma->forEachSection(^(const dyld3::MachOAnalyzer::SectionInfo §Info, bool malformedSectionRange, bool &stop) {
+ if ( malformedSectionRange )
+ return;
+ if ( strcmp(sectInfo.segInfo.segName, "__OBJC_CONST") == 0 ) {
+ dataSectionInfos.push_back({ sectInfo.sectName, sectInfo });
+ return;
+ }
+ if ( strcmp(sectInfo.segInfo.segName, "__TEXT") == 0 ) {
+ if ( strcmp(sectInfo.sectName, "__cstring") == 0 ) {
+ if ( ( (sectInfo.sectFlags & SECTION_TYPE) == S_CSTRING_LITERALS ) ) {
+ cstringStartVMAddr = sectInfo.sectAddr;
+ cstringEndVMAddr = cstringStartVMAddr + sectInfo.sectSize;
+ }
+ }
+ }
+ });
+
+ // We need to clear the chained pointer fixups for the whole segment, so can only
+ // process any type of CF object if we can process them all
+ if ( dataSectionInfos.size() != 1 )
+ return;
+
+ if ( dataSectionInfos.front().first != "__cfstring" )
+ return;
+
+ if ( cstringStartVMAddr == 0 )
+ return;
+
+ const dyld3::MachOAnalyzer::SectionInfo& cfStringsSection = dataSectionInfos.back().second;
+
+ // A CFString is layed out in memory as
+ // {
+ // uintptr_t isa;
+ // uint32_t encoding;
+ // uint32_t padding;
+ // uintptr_t cstringData;
+ // uintptr_t cstringLength;
+ // }
+ const uint64_t cstringDataOffset = 16;
+ const char* className = cfStrings.isaClassName;
+ if ( cfStringsSection.sectSize % (uint32_t)DyldSharedCache::ConstantClasses::cfStringAtomSize ) {
+ // We don't support padding or any kind on the section
+ return;
+ }
+
+ uint64_t baseAddress = ma->preferredLoadAddress();
+
+ uint64_t startVMOffset = cfStringsSection.sectAddr - baseAddress;
+ uint64_t endVMOffset = startVMOffset + cfStringsSection.sectSize;
+
+ __block Diagnostics diag;
+
+ // Make sure no symbols are pointing in to this section
+ __block bool hasSymbols = false;
+ ma->forEachGlobalSymbol(diag, ^(const char *symbolName, uint64_t n_value, uint8_t n_type, uint8_t n_sect, uint16_t n_desc, bool &stop) {
+ uint64_t vmOffset = n_value - baseAddress;
+ if ( vmOffset < startVMOffset )
+ return;
+ if ( vmOffset >= endVMOffset )
+ return;
+ // In range of our section
+ hasSymbols = true;
+ stop = true;
+ });
+ if ( diag.hasError() )
+ return;
+ if ( hasSymbols )
+ return;
+
+ ma->forEachLocalSymbol(diag, ^(const char *symbolName, uint64_t n_value, uint8_t n_type, uint8_t n_sect, uint16_t n_desc, bool &stop) {
+ uint64_t vmOffset = n_value - baseAddress;
+ if ( vmOffset < startVMOffset )
+ return;
+ if ( vmOffset >= endVMOffset )
+ return;
+ // In range of our section
+ hasSymbols = true;
+ stop = true;
+ });
+ if ( diag.hasError() )
+ return;
+ if ( hasSymbols )
+ return;
+
+ ma->forEachExportedSymbol(diag, ^(const char *symbolName, uint64_t imageOffset, uint64_t flags, uint64_t other, const char *importName, bool &stop) {
+ if ( imageOffset < startVMOffset )
+ return;
+ if ( imageOffset >= endVMOffset )
+ return;
+ // In range of our section
+ hasSymbols = true;
+ stop = true;
+ });
+ if ( diag.hasError() )
+ return;
+ if ( hasSymbols )
+ return;
+
+ __block std::vector<const char*> dependentPaths;
+ ma->forEachDependentDylib(^(const char *loadPath, bool isWeak, bool isReExport,
+ bool isUpward, uint32_t compatVersion, uint32_t curVersion, bool &stop) {
+ dependentPaths.push_back(loadPath);
+ });
+
+ // Find all the binds to the ISA class. These delineate the atoms
+ // In CoreFoundation itself, we are looking for rebases to the ISA
+ __block std::vector<uint64_t> atomOffsets;
+
+ bool dylibExportsISA = strcmp(ma->installName(), cfStrings.isaInstallName) == 0;
+ if ( !dylibExportsISA ) {
+ // This dylib doens't export the class, so look for binds to the ISA
+ __block std::vector<std::pair<const char*, int>> bindTargetSymbols;
+ ma->forEachChainedFixupTarget(diag, ^(int libraryOrdinal, const char* symbolName, uint64_t addend, bool weakImport, bool& stop) {
+ bindTargetSymbols.push_back({ symbolName, libraryOrdinal });
+ });
+
+ __block bool foundBadBind = false;
+ ma->withChainStarts(diag, ma->chainStartsOffset(), ^(const dyld_chained_starts_in_image* startsInfo) {
+ if ( foundBadBind )
+ return;
+ ma->forEachFixupInAllChains(diag, startsInfo, false, ^(dyld3::MachOLoaded::ChainedFixupPointerOnDisk* fixupLoc,
+ const dyld_chained_starts_in_segment* segInfo, bool& stopFixups) {
+ // Skip anything not in this section
+ uint64_t vmOffset = (uint8_t*)fixupLoc - (uint8_t*)ma;
+ if ( vmOffset < startVMOffset )
+ return;
+ if ( vmOffset >= endVMOffset )
+ return;
+
+ uint32_t bindOrdinal;
+ int64_t ptrAddend;
+ if ( fixupLoc->isBind(pointerFormat, bindOrdinal, ptrAddend) ) {
+ if ( ptrAddend != 0 ) {
+ foundBadBind = true;
+ stopFixups = true;
+ return;
+ }
+ if ( bindOrdinal >= bindTargetSymbols.size() ) {
+ foundBadBind = true;
+ stopFixups = true;
+ return;
+ }
+ if ( strcmp(bindTargetSymbols[bindOrdinal].first, className) != 0 ) {
+ foundBadBind = true;
+ stopFixups = true;
+ return;
+ }
+ int libOrdinal = bindTargetSymbols[bindOrdinal].second;
+ if ( libOrdinal <= 0 ) {
+ foundBadBind = true;
+ stopFixups = true;
+ return;
+ }
+ int depIndex = libOrdinal - 1;
+ if ( depIndex >= dependentPaths.size() ) {
+ foundBadBind = true;
+ stopFixups = true;
+ return;
+ }
+ const char* depLoadPath = dependentPaths[depIndex];
+ // All dylibs must find the ISA in the same place
+ if ( strcmp(cfStrings.isaInstallName, depLoadPath) != 0 ) {
+ foundBadBind = true;
+ stopFixups = true;
+ return;
+ }
+ atomOffsets.push_back(vmOffset);
+ }
+ });
+ });
+ if ( foundBadBind )
+ return;
+
+ if ( atomOffsets.empty() )
+ return;
+ }
+
+ if ( diag.hasError() )
+ return;
+
+ // Find all the rebases in the atoms, which correpond to pointers strings
+ __block std::map<uint64_t, uint64_t> sectionRebases;
+ ma->withChainStarts(diag, ma->chainStartsOffset(), ^(const dyld_chained_starts_in_image* startsInfo) {
+ ma->forEachFixupInAllChains(diag, startsInfo, false, ^(dyld3::MachOLoaded::ChainedFixupPointerOnDisk* fixupLoc, const dyld_chained_starts_in_segment* segInfo, bool& stopFixups) {
+ // Skip anything not in this section
+ uint64_t vmOffset = (uint8_t*)fixupLoc - (uint8_t*)ma;
+ if ( vmOffset < startVMOffset )
+ return;
+ if ( vmOffset >= endVMOffset )
+ return;
+
+ uint64_t rebaseTargetRuntimeOffset;
+ if ( fixupLoc->isRebase(pointerFormat, 0, rebaseTargetRuntimeOffset) ) {
+ if ( dylibExportsISA && (rebaseTargetRuntimeOffset == cfStrings.isaVMOffset) ) {
+ atomOffsets.push_back(vmOffset);
+ } else {
+ sectionRebases[vmOffset] = rebaseTargetRuntimeOffset;
+ }
+ }
+ });
+ });
+ if ( diag.hasError() )
+ return;
+
+ // Every atom should have a single rebase to a cstring
+ if ( sectionRebases.size() != atomOffsets.size() )
+ return;
+
+ std::sort(atomOffsets.begin(), atomOffsets.end());
+ for (uint64_t atomOffset : atomOffsets) {
+ auto it = sectionRebases.find(atomOffset + cstringDataOffset);
+ if ( it == sectionRebases.end() )
+ return;
+ }
+
+ CFSection& stringSection = this->cfStrings;
+ DylibTextCoalescer::DylibSectionOffsetToCacheSectionOffset& sectionData = textCoalescer.getSectionCoalescer("__OBJC_CONST", "__cfstring");
+ for (uint64_t atomOffset : atomOffsets) {
+ if ( log )
+ printf("%s: found __cfstring at: 0x%llx\n", ma->installName(), atomOffset);
+
+ // Now keep track of this offset in our source dylib as pointing to this offset
+ uint32_t sourceSectionOffset = (uint32_t)(atomOffset - startVMOffset);
+ uint32_t cacheSectionOffset = stringSection.bufferSize;
+ sectionData[sourceSectionOffset] = cacheSectionOffset;
+ stringSection.bufferSize += (uint32_t)DyldSharedCache::ConstantClasses::cfStringAtomSize;
+ }
+}
+