From 15c62a5b13cda41eee74f9d224e8a74cf31377b5 Mon Sep 17 00:00:00 2001 From: Kenny Root Date: Fri, 4 Dec 2009 09:38:48 -0800 Subject: [PATCH] Optional use of UTF-8 strings in resource bundles Allows the use of UTF-8 for packing resources instead of the default of UTF-16 for Java. When strings are extracted from the ResStringPool, they are converted to UTF-16 and the result is cached for subsequent calls. When using aapt to package, add in the "-8" switch to pack the resources using UTF-8. This will result in the value, key, and type strings as well as the compiled XML string values taking significantly less space in the final application package in most scenarios. Change-Id: I129483f8b3d3b1c5869dced05cb525e494a6c83a --- Bundle.h | 5 +++- Command.cpp | 1 + Main.cpp | 4 +++ Resource.cpp | 16 ++++++++---- ResourceTable.cpp | 10 +++++--- ResourceTable.h | 1 + StringPool.cpp | 62 ++++++++++++++++++++++++++++++++++++++--------- StringPool.h | 6 ++++- XMLNode.cpp | 3 ++- XMLNode.h | 5 ++++ 10 files changed, 90 insertions(+), 23 deletions(-) diff --git a/Bundle.h b/Bundle.h index 1ac13f2..cf70121 100644 --- a/Bundle.h +++ b/Bundle.h @@ -37,7 +37,7 @@ public: mForce(false), mGrayscaleTolerance(0), mMakePackageDirs(false), mUpdate(false), mExtending(false), mRequireLocalization(false), mPseudolocalize(false), - mValues(false), + mUTF8(false), mValues(false), mCompressionMethod(0), mOutputAPKFile(NULL), mAssetSourceDir(NULL), mProguardFile(NULL), mAndroidManifestFile(NULL), mPublicOutputFile(NULL), @@ -76,6 +76,8 @@ public: void setRequireLocalization(bool val) { mRequireLocalization = val; } bool getPseudolocalize(void) const { return mPseudolocalize; } void setPseudolocalize(bool val) { mPseudolocalize = val; } + bool getUTF8(void) const { return mUTF8; } + void setUTF8(bool val) { mUTF8 = val; } bool getValues(void) const { return mValues; } void setValues(bool val) { mValues = val; } int getCompressionMethod(void) const { return mCompressionMethod; } @@ -161,6 +163,7 @@ private: bool mExtending; bool mRequireLocalization; bool mPseudolocalize; + bool mUTF8; bool mValues; int mCompressionMethod; bool mJunkPath; diff --git a/Command.cpp b/Command.cpp index 1a536d6..ff9cc11 100644 --- a/Command.cpp +++ b/Command.cpp @@ -412,6 +412,7 @@ int doDump(Bundle* bundle) } tree.restart(); printXMLBlock(&tree); + tree.uninit(); delete asset; asset = NULL; } diff --git a/Main.cpp b/Main.cpp index 98286c0..bd03b74 100644 --- a/Main.cpp +++ b/Main.cpp @@ -118,6 +118,7 @@ void usage(void) " -P specify where to output public resource definitions\n" " -S directory in which to find resources. Multiple directories will be scanned" " and the first match found (left to right) will take precedence." + " -8 Encode string resources in UTF-8.\n" " -0 specifies an additional extension for which such files will not\n" " be stored compressed in the .apk. An empty string means to not\n" " compress any files at all.\n" @@ -370,6 +371,9 @@ int main(int argc, char* const argv[]) bundle.setCompressionMethod(ZipEntry::kCompressStored); } break; + case '8': + bundle.setUTF8(true); + break; case '-': if (strcmp(cp, "-min-sdk-version") == 0) { argc--; diff --git a/Resource.cpp b/Resource.cpp index fdcada4..d04a873 100644 --- a/Resource.cpp +++ b/Resource.cpp @@ -613,6 +613,12 @@ status_t buildResources(Bundle* bundle, const sp& assets) NOISY(printf("Found %d included resource packages\n", (int)table.size())); + // Standard flags for compiled XML and optional UTF-8 encoding + int xmlFlags = XML_COMPILE_STANDARD_RESOURCE; + if (bundle->getUTF8()) { + xmlFlags |= XML_COMPILE_UTF8; + } + // -------------------------------------------------------------- // First, gather all resource information. // -------------------------------------------------------------- @@ -763,7 +769,7 @@ status_t buildResources(Bundle* bundle, const sp& assets) ResourceDirIterator it(layouts, String8("layout")); while ((err=it.next()) == NO_ERROR) { String8 src = it.getFile()->getPrintableSource(); - err = compileXmlFile(assets, it.getFile(), &table); + err = compileXmlFile(assets, it.getFile(), &table, xmlFlags); if (err == NO_ERROR) { ResXMLTree block; block.setTo(it.getFile()->getData(), it.getFile()->getSize(), true); @@ -782,7 +788,7 @@ status_t buildResources(Bundle* bundle, const sp& assets) if (anims != NULL) { ResourceDirIterator it(anims, String8("anim")); while ((err=it.next()) == NO_ERROR) { - err = compileXmlFile(assets, it.getFile(), &table); + err = compileXmlFile(assets, it.getFile(), &table, xmlFlags); if (err != NO_ERROR) { hasErrors = true; } @@ -797,7 +803,7 @@ status_t buildResources(Bundle* bundle, const sp& assets) if (xmls != NULL) { ResourceDirIterator it(xmls, String8("xml")); while ((err=it.next()) == NO_ERROR) { - err = compileXmlFile(assets, it.getFile(), &table); + err = compileXmlFile(assets, it.getFile(), &table, xmlFlags); if (err != NO_ERROR) { hasErrors = true; } @@ -819,7 +825,7 @@ status_t buildResources(Bundle* bundle, const sp& assets) if (colors != NULL) { ResourceDirIterator it(colors, String8("color")); while ((err=it.next()) == NO_ERROR) { - err = compileXmlFile(assets, it.getFile(), &table); + err = compileXmlFile(assets, it.getFile(), &table, xmlFlags); if (err != NO_ERROR) { hasErrors = true; } @@ -835,7 +841,7 @@ status_t buildResources(Bundle* bundle, const sp& assets) ResourceDirIterator it(menus, String8("menu")); while ((err=it.next()) == NO_ERROR) { String8 src = it.getFile()->getPrintableSource(); - err = compileXmlFile(assets, it.getFile(), &table); + err = compileXmlFile(assets, it.getFile(), &table, xmlFlags); if (err != NO_ERROR) { hasErrors = true; } diff --git a/ResourceTable.cpp b/ResourceTable.cpp index 19b9b01..a9cbd11 100644 --- a/ResourceTable.cpp +++ b/ResourceTable.cpp @@ -39,6 +39,10 @@ status_t compileXmlFile(const sp& assets, root->removeWhitespace(false, NULL); } + if ((options&XML_COMPILE_UTF8) != 0) { + root->setUTF8(true); + } + bool hasErrors = false; if ((options&XML_COMPILE_ASSIGN_ATTRIBUTE_IDS) != 0) { @@ -2505,7 +2509,7 @@ status_t ResourceTable::flatten(Bundle* bundle, const sp& dest) // Iterate through all data, collecting all values (strings, // references, etc). - StringPool valueStrings; + StringPool valueStrings = StringPool(false, bundle->getUTF8()); for (pi=0; pi p = mOrderedPackages.itemAt(pi); if (p->getTypes().size() == 0) { @@ -2513,8 +2517,8 @@ status_t ResourceTable::flatten(Bundle* bundle, const sp& dest) continue; } - StringPool typeStrings; - StringPool keyStrings; + StringPool typeStrings = StringPool(false, bundle->getUTF8()); + StringPool keyStrings = StringPool(false, bundle->getUTF8()); const size_t N = p->getOrderedTypes().size(); for (size_t ti=0; ti StringPool::createStringBlock() return err == NO_ERROR ? pool : NULL; } +#define ENCODE_LENGTH(str, chrsz, strSize) \ +{ \ + size_t maxMask = 1 << ((chrsz*8)-1); \ + size_t maxSize = maxMask-1; \ + if (strSize > maxSize) { \ + *str++ = maxMask | ((strSize>>(chrsz*8))&maxSize); \ + } \ + *str++ = strSize; \ +} + status_t StringPool::writeStringBlock(const sp& pool) { // Allow appending. Sorry this is a little wacky. @@ -213,28 +223,53 @@ status_t StringPool::writeStringBlock(const sp& pool) return NO_MEMORY; } + const size_t charSize = mUTF8 ? sizeof(uint8_t) : sizeof(char16_t); + size_t strPos = 0; for (i=0; i 0x7fff ? sizeof(uint32_t) : sizeof(uint16_t); - const size_t totalSize = lenSize + ((strSize+1)*sizeof(uint16_t)); + const size_t lenSize = strSize > (size_t)(1<<((charSize*8)-1))-1 ? + charSize*2 : charSize; + + String8 encStr; + if (mUTF8) { + encStr = String8(ent.value); + } + + const size_t encSize = mUTF8 ? encStr.size() : 0; + const size_t encLenSize = mUTF8 ? + (encSize > (size_t)(1<<((charSize*8)-1))-1 ? + charSize*2 : charSize) : 0; ent.offset = strPos; - uint16_t* dat = (uint16_t*)pool->editData(preSize + strPos + totalSize); + + const size_t totalSize = lenSize + encLenSize + + ((mUTF8 ? encSize : strSize)+1)*charSize; + + void* dat = (void*)pool->editData(preSize + strPos + totalSize); if (dat == NULL) { fprintf(stderr, "ERROR: Out of memory for string pool\n"); return NO_MEMORY; } - dat += (preSize+strPos)/sizeof(uint16_t); - if (lenSize > sizeof(uint16_t)) { - *dat = htods(0x8000 | ((strSize>>16)&0x7fff)); - dat++; + dat = (uint8_t*)dat + preSize + strPos; + if (mUTF8) { + uint8_t* strings = (uint8_t*)dat; + + ENCODE_LENGTH(strings, sizeof(uint8_t), strSize) + + ENCODE_LENGTH(strings, sizeof(uint8_t), encSize) + + strncpy((char*)strings, encStr, encSize+1); + } else { + uint16_t* strings = (uint16_t*)dat; + + ENCODE_LENGTH(strings, sizeof(uint16_t), strSize) + + strcpy16_htod(strings, ent.value); } - *dat++ = htods(strSize); - strcpy16_htod(dat, ent.value); - strPos += lenSize + (strSize+1)*sizeof(uint16_t); + strPos += totalSize; } // Pad ending string position up to a uint32_t boundary. @@ -312,6 +347,9 @@ status_t StringPool::writeStringBlock(const sp& pool) if (mSorted) { header->flags |= htodl(ResStringPool_header::SORTED_FLAG); } + if (mUTF8) { + header->flags |= htodl(ResStringPool_header::UTF8_FLAG); + } header->stringsStart = htodl(preSize); header->stylesStart = htodl(STYLES > 0 ? (preSize+strPos) : 0); diff --git a/StringPool.h b/StringPool.h index 9082b37..7275259 100644 --- a/StringPool.h +++ b/StringPool.h @@ -68,8 +68,11 @@ public: * lookup with ResStringPool::indexOfString() (O(log n)), at the expense * of support for styled string entries (which requires the same string * be included multiple times in the pool). + * + * If 'utf8' is true, strings will be encoded with UTF-8 instead of + * left in Java's native UTF-16. */ - explicit StringPool(bool sorted = false); + explicit StringPool(bool sorted = false, bool utf8 = false); /** * Add a new string to the pool. If mergeDuplicates is true, thenif @@ -123,6 +126,7 @@ public: private: const bool mSorted; + const bool mUTF8; // Raw array of unique strings, in some arbitrary order. Vector mEntries; // Array of indices into mEntries, in the order they were diff --git a/XMLNode.cpp b/XMLNode.cpp index d4d2a45..036dde4 100644 --- a/XMLNode.cpp +++ b/XMLNode.cpp @@ -478,6 +478,7 @@ XMLNode::XMLNode(const String8& filename, const String16& s1, const String16& s2 , mFilename(filename) , mStartLineNumber(0) , mEndLineNumber(0) + , mUTF8(false) { if (isNamespace) { mNamespacePrefix = s1; @@ -837,7 +838,7 @@ status_t XMLNode::assignResourceIds(const sp& assets, status_t XMLNode::flatten(const sp& dest, bool stripComments, bool stripRawValues) const { - StringPool strings; + StringPool strings = StringPool(false, mUTF8); Vector resids; // First collect just the strings for attribute names that have a diff --git a/XMLNode.h b/XMLNode.h index a9bea43..dc92fa7 100644 --- a/XMLNode.h +++ b/XMLNode.h @@ -124,6 +124,8 @@ public: void removeWhitespace(bool stripAll=true, const char** cDataTags=NULL); + void setUTF8(bool val) { mUTF8 = val; } + status_t parseValues(const sp& assets, ResourceTable* table); status_t assignResourceIds(const sp& assets, @@ -189,6 +191,9 @@ private: String8 mFilename; int32_t mStartLineNumber; int32_t mEndLineNumber; + + // Encode compiled XML with UTF-8 StringPools? + bool mUTF8; }; #endif -- 2.45.2