[apple/icu.git] / icuSources / common / aaplbfct.cpp

/**
 *******************************************************************************
 * Copyright (C) 2007,2012 International Business Machines Corporation, Apple Inc.,*
 * and others.  All Rights Reserved.                                           *
 *******************************************************************************
 */

#define __STDC_LIMIT_MACROS 1
#include "unicode/utypes.h"

#if !UCONFIG_NO_BREAK_ITERATION && U_PLATFORM_IS_DARWIN_BASED

#include "brkeng.h"
#include "dictbe.h"
#include "aaplbfct.h"
#include "unicode/uscript.h"
#include "unicode/uniset.h"
#include "unicode/ucnv.h"
#include "unicode/uchar.h"
#include <limits.h>
#include <unistd.h>
#include <glob.h>
#include <strings.h>
#include <NSSystemDirectories.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <time.h>
#include <stdio.h>
#include <stdint.h>
// The following is now already included by platform.h (included indirectly by
// utypes.h) if U_PLATFORM_IS_DARWIN_BASED but it doesn't hurt to re-include here
#include <TargetConditionals.h>

U_NAMESPACE_BEGIN

/*
 ******************************************************************
 */

AppleLanguageBreakFactory::AppleLanguageBreakFactory(UErrorCode &status)
: ICULanguageBreakFactory(status)
{
}

AppleLanguageBreakFactory::~AppleLanguageBreakFactory() {
}

#if !TARGET_OS_EMBEDDED
#if 0
// need to update loadDictionaryMatcherFor implementation below

// Helper function that makes a length-delimited buffer look NUL-terminated
static __attribute__((always_inline)) inline UChar nextUChar(const UChar *&p, ptrdiff_t &l) {
	if (l > 0) {
		l -= 1;
		return *p++;
	}
	else {
		return 0;
	}
}

// Add a file's worth of words to the supplied mutable dictionary
static void addDictFile(MutableTrieDictionary *to, const char *path) {
	UErrorCode status = U_ZERO_ERROR;
	off_t fileLength;
	const char *dictRawData = (const char *) -1;
	const UChar *dictData = NULL;
	ptrdiff_t dictDataLength = 0;
	UChar *dictBuffer = NULL;
	const char *encoding = NULL;
	int32_t		signatureLength = 0;
	
	// Open the dictionary file
	int dictFile = open(path, O_RDONLY, 0);
	if (dictFile == -1) {
		status = U_FILE_ACCESS_ERROR;
	}
	
	// Determine its length
	if (U_SUCCESS(status)) {
		fileLength = lseek(dictFile, 0, SEEK_END);
		(void) lseek(dictFile, 0, SEEK_SET);
		if (fileLength < 0 || fileLength > PTRDIFF_MAX) {
			status = U_FILE_ACCESS_ERROR;
		}
	}
	
	// Map it
	if (U_SUCCESS(status)) {
		dictRawData = (const char *) mmap(0, (size_t) fileLength, PROT_READ, MAP_SHARED, dictFile, 0);
		if ((intptr_t)dictRawData == -1) {
			status = U_FILE_ACCESS_ERROR;
		}
	}
	
	// No longer need the file descriptor open
	if (dictFile != -1) {
		(void) close(dictFile);
	}
	
	// Look for a Unicode signature
	if (U_SUCCESS(status)) {
		encoding = ucnv_detectUnicodeSignature(dictRawData, fileLength, &signatureLength, &status);
	}
	
	// If necessary, convert the data to UChars
	if (U_SUCCESS(status) && encoding != NULL) {
		UConverter *conv = ucnv_open(encoding, &status);
		// Preflight to get buffer size
		uint32_t destCap = ucnv_toUChars(conv, NULL, 0, dictRawData, fileLength, &status);
		if (status == U_BUFFER_OVERFLOW_ERROR) {
			status = U_ZERO_ERROR;
		}
		if (U_SUCCESS(status)) {
			dictBuffer = new UChar[destCap+1];
		}
		(void) ucnv_toUChars(conv, dictBuffer, destCap+1, dictRawData, fileLength, &status);
		dictData = dictBuffer;
		dictDataLength = destCap;
		if (U_SUCCESS(status) && dictData[0] == 0xFEFF) {	// BOM? Skip it
			dictData += 1;
			dictDataLength -= 1;
		}
		
		ucnv_close(conv);
	}
	
	// If it didn't need converting, just assume it's native-endian UTF-16, no BOM
	if (U_SUCCESS(status) && dictData == NULL) {
		dictData = (const UChar *) dictRawData;
		dictDataLength = fileLength/sizeof(UChar);
	}
	
	// OK, we now have a pointer to native-endian UTF-16. Process it as one word per line,
	// stopping at the first space.
	if (U_SUCCESS(status)) {
		UnicodeSet breaks(UNICODE_STRING_SIMPLE("[[:lb=BK:][:lb=CR:][:lb=LF:][:lb=NL:]]"), status);
		const UChar *candidate = dictData;
		int32_t length = 0;
		UChar uc = nextUChar(dictData, dictDataLength);
		while (U_SUCCESS(status) && uc) {
			while (uc && !u_isspace(uc)) {
				length += 1;
				uc = nextUChar(dictData, dictDataLength);
			}
			
			if (length > 0) {
				to->addWord(candidate, length, status);
			}
			
			// Find beginning of next line
			// 1. Skip non-line-break characters
			while (uc && !breaks.contains(uc)) {
				uc = nextUChar(dictData, dictDataLength);
			}
			// 2. Skip line break characters
			while (uc && breaks.contains(uc)) {
				uc = nextUChar(dictData, dictDataLength);
			}
			
			// Prepare for next line
			candidate = dictData-1;
			length = 0;
		}
	}

	// Unmap the file if we mapped it
	if ((intptr_t) dictRawData != -1) {
		(void) munmap((void *)dictRawData, (size_t) fileLength);
	}
	
	// Delete any temporary buffer
	delete [] dictBuffer;
}

#if U_IS_BIG_ENDIAN
	static const char	sArchType[] = "";
#else
	static const char	sArchType[] = ".le";	// little endian
#endif

#endif
#endif

/*
In ICU50,
ICULanguageBreakFactory changes from 
  virtual const CompactTrieDictionary *loadDictionaryFor(UScriptCode script, int32_t breakType);
to
  virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script, int32_t breakType);
and CompactTrieDictionary no longer exists. Need to work out  new implementation below.
*/

DictionaryMatcher *
AppleLanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script, int32_t breakType) {
	DictionaryMatcher *icuDictMatcher = ICULanguageBreakFactory::loadDictionaryMatcherFor(script);
#if !TARGET_OS_EMBEDDED
#if 0
// need to update loadDictionaryMatcherFor implementation below
	// We only look for a user dictionary if there is actually an ICU dictionary
	if (icuDictMatcher != NULL) {
		UErrorCode status = U_ZERO_ERROR;
		const char *scriptName = uscript_getName(script);
		char path[256];			// PATH_MAX is overkill in this case
		char cachePath[128];
		char cacheTargetPath[256];
		glob_t dirGlob;
		glob_t fileGlob;
		struct stat cacheStat;
		struct stat dictStat;
		bool cacheGood = true;
		int globFlags = (GLOB_NOESCAPE|GLOB_NOSORT|GLOB_TILDE);
		const CompactTrieDictionary *cacheDict = NULL;
		
		// Iterate the dictionary directories and accumulate in dirGlob
		NSSearchPathEnumerationState state = NSStartSearchPathEnumeration(NSLibraryDirectory, (NSSearchPathDomainMask) (NSUserDomainMask|NSLocalDomainMask|NSNetworkDomainMask));
		while ((state = NSGetNextSearchPathEnumeration(state, path)) != 0) {
			// First get the directory itself. We should never overflow, but use strlcat anyway
			// to avoid a crash if we do.
			strlcat(path, "/Dictionaries", sizeof(path));
			if (!glob(path, globFlags, NULL, &dirGlob)) {
				globFlags |= GLOB_APPEND;
			}
		}
		
		// If there are no Dictionaries directories, ignore any cache file and return the ICU
		// standard dictionary
		// TODO: Delete the cache?
		if (dirGlob.gl_pathc == 0) {
			globfree(&dirGlob);
			return icuDictMatcher;
		}
		
		// See if there is a cache file already; get its mod time
		// TODO: should we be using geteuid() here instead of getuid()?
		state = NSStartSearchPathEnumeration(NSCachesDirectory, NSLocalDomainMask);
		state = NSGetNextSearchPathEnumeration(state, cachePath);	// Just use first one
		// Create the cache file name. We should never overflow, but use snprintf to avoid a crash
		// if we do.
		snprintf(cacheTargetPath, sizeof(cacheTargetPath), "%s/com.apple.ICUUserDictionaryCache%s.%s.%d", cachePath, sArchType, scriptName, getuid());
		if (stat(cacheTargetPath, &cacheStat) || cacheStat.st_mode != (S_IFREG|S_IRUSR|S_IWUSR)) {
			cacheGood = false;		// No file or bad permissions or type
		}
		
		// Stat the dictionary folders, and glob the dictionary files
		globFlags &= ~GLOB_APPEND;
		char **pathsp = dirGlob.gl_pathv;
		const char *dictpath;
		while ((dictpath = *pathsp++) != NULL) {
			// Stat the directory -- ignore if stat failure
			if (!stat(dictpath, &dictStat)) {
				// Glob the dictionaries in the directory
				snprintf(path, sizeof(path), "%s/*-%s.txt", dictpath, scriptName);
				if (!glob(path, globFlags, NULL, &fileGlob)) {
					globFlags |= GLOB_APPEND;
				}
				// If the directory has been modified after the cache file, we need to rebuild;
				// a dictionary might have been deleted.
				if (cacheGood && (dictStat.st_mtimespec.tv_sec > cacheStat.st_mtimespec.tv_sec || (dictStat.st_mtimespec.tv_sec == cacheStat.st_mtimespec.tv_sec && dictStat.st_mtimespec.tv_nsec > cacheStat.st_mtimespec.tv_nsec))) {
					cacheGood = false;
				}
			}
		}
		
		// No longer need the directory glob
		globfree(&dirGlob);
		
		// If there are no dictionaries, ignore the cache file and return the ICU dictionary
		// TODO: Delete the cache?
		if (fileGlob.gl_pathc == 0) {
			globfree(&fileGlob);
			return icuDictMatcher;
		}
		
		// Now compare the last modified stamp for the cache against all the dictionaries
		pathsp = fileGlob.gl_pathv;
		while (cacheGood && (dictpath = *pathsp++)) {
			// Stat the dictionary -- ignore if stat failure
			if (!stat(dictpath, &dictStat) && (dictStat.st_mtimespec.tv_sec > cacheStat.st_mtimespec.tv_sec || (dictStat.st_mtimespec.tv_sec == cacheStat.st_mtimespec.tv_sec && dictStat.st_mtimespec.tv_nsec > cacheStat.st_mtimespec.tv_nsec))) {
				cacheGood = false;
			}
		}
		
		// Do we need to build the dictionary cache?
		if (!cacheGood) {
			// Create a mutable dictionary from the ICU dictionary
			MutableTrieDictionary *sum = icuDictMatcher->cloneMutable(status);
			pathsp = fileGlob.gl_pathv;
			while (U_SUCCESS(status) && (dictpath = *pathsp++)) {
				// Add the contents of a file to the sum
				addDictFile(sum, dictpath);
			}
			
			// Create a compact (read-only) dictionary
			CompactTrieDictionary compact(*sum, status);
			delete sum;
			
			if (U_SUCCESS(status)) {
				// Open a temp file to write out the cache
				strlcat(cachePath, "/temp.XXXXXXXXXX", sizeof(cachePath));
				int temp = mkstemp(cachePath);
				if (temp == -1) {
					status = U_FILE_ACCESS_ERROR;
				}
				size_t dictSize = compact.dataSize();
				if (U_SUCCESS(status) && write(temp, compact.data(), dictSize) != dictSize) {
					status = U_FILE_ACCESS_ERROR;
				}
				// Rename the temp file to the cache. Note that race conditions here are
				// fine, as the file system operations are atomic. If an outdated version wins
				// over a newer version, it will get rebuilt at the next app launch due to the
				// modification time checks above. We don't care that any given app launch gets
				// the most up-to-date cache (impossible since we can't lock all the Dictionaries
				// directories), only that the cache (eventually) reflects the current state of
				// any user dictionaries. That will happen on the next app launch after changes
				// to the user dictionaries quiesce.
				if (U_SUCCESS(status)) {
					if (rename(cachePath, cacheTargetPath)) {
						status = U_FILE_ACCESS_ERROR;
						(void) unlink(cachePath);	// Clean up the temp file
					}
				}
				if (temp != -1) {
					close(temp);
				}
			}
		}

		// Done with dictionary paths; release memory allocated by glob()
		globfree(&fileGlob);
		
		// Map the cache and build the dictionary
		if (U_SUCCESS(status)) {
			int cache = open(cacheTargetPath, O_RDONLY, 0);
			off_t length;
			const void *cacheData = (const void *) -1;
			if (cache == -1) {
				status = U_FILE_ACCESS_ERROR;
			}
			if (U_SUCCESS(status)) {
				length = lseek(cache, 0, SEEK_END);
				(void) lseek(cache, 0, SEEK_SET);
				if (length < 0 || length > PTRDIFF_MAX) {
					status = U_FILE_ACCESS_ERROR;
				}
			}
			
			// Map the cache. Note: it is left mapped until process exit. This is the normal
			// behavior anyway, so it shouldn't be an issue.
			if (U_SUCCESS(status)) {
				cacheData = mmap(0, (size_t) length, PROT_READ, MAP_SHARED, cache, 0);
				if ((intptr_t)cacheData == -1) {
					status = U_FILE_ACCESS_ERROR;
				}
			}
			// We can close the cache file now that it's mapped (or not)
			if (cache != -1) {
				(void) close(cache);
			}
			// If all was successful, try to create the dictionary. The constructor will
			// check the magic number for us.
			if (U_SUCCESS(status)) {
				cacheDict = new CompactTrieDictionary(cacheData, status);
			}
			if (U_FAILURE(status) && (intptr_t)cacheData != -1) {
				// Clean up the mmap
				(void) munmap((void *)cacheData, (size_t) length);
			}
		}
		
		// If we were successful, free the ICU dictionary and return ours
		if (U_SUCCESS(status)) {
			delete icuDictMatcher;
			return cacheDict;
		}
		else {
			delete cacheDict;
		}
	}
#endif
#endif
	return icuDictMatcher;
}

U_NAMESPACE_END

#endif /* #if !UCONFIG_NO_BREAK_ITERATION && U_PLATFORM_IS_DARWIN_BASED */
Commit	Line	Data
73c04bcf A	1	/**
73c04bcf A	2	*******************************************************************************
51004dcb	3	* Copyright (C) 2007,2012 International Business Machines Corporation, Apple Inc.,*
73c04bcf A	4	* and others. All Rights Reserved. *
	5	*******************************************************************************
	6	*/
	7
	8	#define __STDC_LIMIT_MACROS 1
	9	#include "unicode/utypes.h"
	10
4388f060	11	#if !UCONFIG_NO_BREAK_ITERATION && U_PLATFORM_IS_DARWIN_BASED
73c04bcf A	12
	13	#include "brkeng.h"
	14	#include "dictbe.h"
73c04bcf A	15	#include "aaplbfct.h"
	16	#include "unicode/uscript.h"
	17	#include "unicode/uniset.h"
	18	#include "unicode/ucnv.h"
	19	#include "unicode/uchar.h"
	20	#include <limits.h>
	21	#include <unistd.h>
	22	#include <glob.h>
	23	#include <strings.h>
	24	#include <NSSystemDirectories.h>
	25	#include <sys/types.h>
	26	#include <sys/stat.h>
	27	#include <sys/mman.h>
	28	#include <fcntl.h>
	29	#include <time.h>
	30	#include <stdio.h>
	31	#include <stdint.h>
4388f060 A	32	// The following is now already included by platform.h (included indirectly by
4388f060 A	33	// utypes.h) if U_PLATFORM_IS_DARWIN_BASED but it doesn't hurt to re-include here
729e4ab9	34	#include <TargetConditionals.h>
73c04bcf A	35
	36	U_NAMESPACE_BEGIN
	37
	38	/*
	39	******************************************************************
	40	*/
	41
	42	AppleLanguageBreakFactory::AppleLanguageBreakFactory(UErrorCode &status)
	43	: ICULanguageBreakFactory(status)
	44	{
	45	}
	46
	47	AppleLanguageBreakFactory::~AppleLanguageBreakFactory() {
	48	}
	49
729e4ab9	50	#if !TARGET_OS_EMBEDDED
51004dcb A	51	#if 0
51004dcb A	52	// need to update loadDictionaryMatcherFor implementation below
729e4ab9	53
73c04bcf A	54	// Helper function that makes a length-delimited buffer look NUL-terminated
	55	static __attribute__((always_inline)) inline UChar nextUChar(const UChar *&p, ptrdiff_t &l) {
	56	if (l > 0) {
	57	l -= 1;
	58	return *p++;
	59	}
	60	else {
	61	return 0;
	62	}
	63	}
	64
	65	// Add a file's worth of words to the supplied mutable dictionary
	66	static void addDictFile(MutableTrieDictionary to, const char path) {
	67	UErrorCode status = U_ZERO_ERROR;
	68	off_t fileLength;
	69	const char dictRawData = (const char ) -1;
	70	const UChar *dictData = NULL;
	71	ptrdiff_t dictDataLength = 0;
	72	UChar *dictBuffer = NULL;
	73	const char *encoding = NULL;
	74	int32_t signatureLength = 0;
	75
	76	// Open the dictionary file
	77	int dictFile = open(path, O_RDONLY, 0);
	78	if (dictFile == -1) {
	79	status = U_FILE_ACCESS_ERROR;
	80	}
	81
	82	// Determine its length
	83	if (U_SUCCESS(status)) {
	84	fileLength = lseek(dictFile, 0, SEEK_END);
	85	(void) lseek(dictFile, 0, SEEK_SET);
	86	if (fileLength < 0 \|\| fileLength > PTRDIFF_MAX) {
	87	status = U_FILE_ACCESS_ERROR;
	88	}
	89	}
	90
	91	// Map it
	92	if (U_SUCCESS(status)) {
	93	dictRawData = (const char *) mmap(0, (size_t) fileLength, PROT_READ, MAP_SHARED, dictFile, 0);
	94	if ((intptr_t)dictRawData == -1) {
	95	status = U_FILE_ACCESS_ERROR;
	96	}
	97	}
	98
	99	// No longer need the file descriptor open
	100	if (dictFile != -1) {
	101	(void) close(dictFile);
	102	}
	103
	104	// Look for a Unicode signature
	105	if (U_SUCCESS(status)) {
	106	encoding = ucnv_detectUnicodeSignature(dictRawData, fileLength, &signatureLength, &status);
	107	}
	108
	109	// If necessary, convert the data to UChars
	110	if (U_SUCCESS(status) && encoding != NULL) {
	111	UConverter *conv = ucnv_open(encoding, &status);
	112	// Preflight to get buffer size
	113	uint32_t destCap = ucnv_toUChars(conv, NULL, 0, dictRawData, fileLength, &status);
	114	if (status == U_BUFFER_OVERFLOW_ERROR) {
	115	status = U_ZERO_ERROR;
	116	}
	117	if (U_SUCCESS(status)) {
118	dictBuffer = new UChar[destCap+1];
119	}
120	(void) ucnv_toUChars(conv, dictBuffer, destCap+1, dictRawData, fileLength, &status);
121	dictData = dictBuffer;
122	dictDataLength = destCap;
123	if (U_SUCCESS(status) && dictData[0] == 0xFEFF) { // BOM? Skip it
124	dictData += 1;
125	dictDataLength -= 1;
126	}
127
128	ucnv_close(conv);
129	}
130
131	// If it didn't need converting, just assume it's native-endian UTF-16, no BOM
132	if (U_SUCCESS(status) && dictData == NULL) {
133	dictData = (const UChar *) dictRawData;
134	dictDataLength = fileLength/sizeof(UChar);
135	}
136
137	// OK, we now have a pointer to native-endian UTF-16. Process it as one word per line,
138	// stopping at the first space.
139	if (U_SUCCESS(status)) {
140	UnicodeSet breaks(UNICODE_STRING_SIMPLE("[[:lb=BK:][:lb=CR:][:lb=LF:][:lb=NL:]]"), status);
141	const UChar *candidate = dictData;
142	int32_t length = 0;
143	UChar uc = nextUChar(dictData, dictDataLength);
144	while (U_SUCCESS(status) && uc) {
145	while (uc && !u_isspace(uc)) {
146	length += 1;
147	uc = nextUChar(dictData, dictDataLength);
148	}
149
150	if (length > 0) {
151	to->addWord(candidate, length, status);
152	}
153
154	// Find beginning of next line
155	// 1. Skip non-line-break characters
156	while (uc && !breaks.contains(uc)) {
157	uc = nextUChar(dictData, dictDataLength);
158	}
159	// 2. Skip line break characters
160	while (uc && breaks.contains(uc)) {
161	uc = nextUChar(dictData, dictDataLength);
162	}
163
164	// Prepare for next line
165	candidate = dictData-1;
166	length = 0;
167	}
168	}
169
170	// Unmap the file if we mapped it
171	if ((intptr_t) dictRawData != -1) {
172	(void) munmap((void *)dictRawData, (size_t) fileLength);
173	}
174
175	// Delete any temporary buffer
176	delete [] dictBuffer;
177	}
178
179	#if U_IS_BIG_ENDIAN
180	static const char sArchType[] = "";
181	#else
182	static const char sArchType[] = ".le"; // little endian
183	#endif
184
729e4ab9	185	#endif
51004dcb A	186	#endif
	187
	188	/*
	189	In ICU50,
	190	ICULanguageBreakFactory changes from
	191	virtual const CompactTrieDictionary *loadDictionaryFor(UScriptCode script, int32_t breakType);
	192	to
	193	virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script, int32_t breakType);
	194	and CompactTrieDictionary no longer exists. Need to work out new implementation below.
	195	*/
729e4ab9	196
51004dcb A	197	DictionaryMatcher *
51004dcb A	198	AppleLanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script, int32_t breakType) {
0f5d89e8	199	DictionaryMatcher *icuDictMatcher = ICULanguageBreakFactory::loadDictionaryMatcherFor(script);
729e4ab9	200	#if !TARGET_OS_EMBEDDED
51004dcb A	201	#if 0
51004dcb A	202	// need to update loadDictionaryMatcherFor implementation below
73c04bcf	203	// We only look for a user dictionary if there is actually an ICU dictionary
51004dcb	204	if (icuDictMatcher != NULL) {
73c04bcf A	205	UErrorCode status = U_ZERO_ERROR;
	206	const char *scriptName = uscript_getName(script);
	207	char path[256]; // PATH_MAX is overkill in this case
	208	char cachePath[128];
	209	char cacheTargetPath[256];
	210	glob_t dirGlob;
	211	glob_t fileGlob;
	212	struct stat cacheStat;
	213	struct stat dictStat;
	214	bool cacheGood = true;
	215	int globFlags = (GLOB_NOESCAPE\|GLOB_NOSORT\|GLOB_TILDE);
	216	const CompactTrieDictionary *cacheDict = NULL;
	217
	218	// Iterate the dictionary directories and accumulate in dirGlob
	219	NSSearchPathEnumerationState state = NSStartSearchPathEnumeration(NSLibraryDirectory, (NSSearchPathDomainMask) (NSUserDomainMask\|NSLocalDomainMask\|NSNetworkDomainMask));
4388f060	220	while ((state = NSGetNextSearchPathEnumeration(state, path)) != 0) {
73c04bcf A	221	// First get the directory itself. We should never overflow, but use strlcat anyway
	222	// to avoid a crash if we do.
	223	strlcat(path, "/Dictionaries", sizeof(path));
	224	if (!glob(path, globFlags, NULL, &dirGlob)) {
	225	globFlags \|= GLOB_APPEND;
	226	}
	227	}
	228
	229	// If there are no Dictionaries directories, ignore any cache file and return the ICU
	230	// standard dictionary
	231	// TODO: Delete the cache?
	232	if (dirGlob.gl_pathc == 0) {
	233	globfree(&dirGlob);
51004dcb	234	return icuDictMatcher;
73c04bcf A	235	}
	236
	237	// See if there is a cache file already; get its mod time
	238	// TODO: should we be using geteuid() here instead of getuid()?
	239	state = NSStartSearchPathEnumeration(NSCachesDirectory, NSLocalDomainMask);
	240	state = NSGetNextSearchPathEnumeration(state, cachePath); // Just use first one
	241	// Create the cache file name. We should never overflow, but use snprintf to avoid a crash
	242	// if we do.
	243	snprintf(cacheTargetPath, sizeof(cacheTargetPath), "%s/com.apple.ICUUserDictionaryCache%s.%s.%d", cachePath, sArchType, scriptName, getuid());
	244	if (stat(cacheTargetPath, &cacheStat) \|\| cacheStat.st_mode != (S_IFREG\|S_IRUSR\|S_IWUSR)) {
	245	cacheGood = false; // No file or bad permissions or type
	246	}
	247
	248	// Stat the dictionary folders, and glob the dictionary files
	249	globFlags &= ~GLOB_APPEND;
	250	char **pathsp = dirGlob.gl_pathv;
	251	const char *dictpath;
4388f060	252	while ((dictpath = *pathsp++) != NULL) {
73c04bcf A	253	// Stat the directory -- ignore if stat failure
	254	if (!stat(dictpath, &dictStat)) {
	255	// Glob the dictionaries in the directory
	256	snprintf(path, sizeof(path), "%s/*-%s.txt", dictpath, scriptName);
	257	if (!glob(path, globFlags, NULL, &fileGlob)) {
	258	globFlags \|= GLOB_APPEND;
	259	}
	260	// If the directory has been modified after the cache file, we need to rebuild;
	261	// a dictionary might have been deleted.
	262	if (cacheGood && (dictStat.st_mtimespec.tv_sec > cacheStat.st_mtimespec.tv_sec \|\| (dictStat.st_mtimespec.tv_sec == cacheStat.st_mtimespec.tv_sec && dictStat.st_mtimespec.tv_nsec > cacheStat.st_mtimespec.tv_nsec))) {
	263	cacheGood = false;
	264	}
	265	}
	266	}
	267
	268	// No longer need the directory glob
	269	globfree(&dirGlob);
	270
	271	// If there are no dictionaries, ignore the cache file and return the ICU dictionary
	272	// TODO: Delete the cache?
	273	if (fileGlob.gl_pathc == 0) {
	274	globfree(&fileGlob);
51004dcb	275	return icuDictMatcher;
73c04bcf A	276	}
	277
	278	// Now compare the last modified stamp for the cache against all the dictionaries
	279	pathsp = fileGlob.gl_pathv;
	280	while (cacheGood && (dictpath = *pathsp++)) {
	281	// Stat the dictionary -- ignore if stat failure
	282	if (!stat(dictpath, &dictStat) && (dictStat.st_mtimespec.tv_sec > cacheStat.st_mtimespec.tv_sec \|\| (dictStat.st_mtimespec.tv_sec == cacheStat.st_mtimespec.tv_sec && dictStat.st_mtimespec.tv_nsec > cacheStat.st_mtimespec.tv_nsec))) {
	283	cacheGood = false;
	284	}
	285	}
	286
	287	// Do we need to build the dictionary cache?
	288	if (!cacheGood) {
	289	// Create a mutable dictionary from the ICU dictionary
51004dcb	290	MutableTrieDictionary *sum = icuDictMatcher->cloneMutable(status);
73c04bcf A	291	pathsp = fileGlob.gl_pathv;
	292	while (U_SUCCESS(status) && (dictpath = *pathsp++)) {
	293	// Add the contents of a file to the sum
	294	addDictFile(sum, dictpath);
	295	}
	296
	297	// Create a compact (read-only) dictionary
	298	CompactTrieDictionary compact(*sum, status);
	299	delete sum;
	300
	301	if (U_SUCCESS(status)) {
	302	// Open a temp file to write out the cache
	303	strlcat(cachePath, "/temp.XXXXXXXXXX", sizeof(cachePath));
	304	int temp = mkstemp(cachePath);
	305	if (temp == -1) {
	306	status = U_FILE_ACCESS_ERROR;
	307	}
	308	size_t dictSize = compact.dataSize();
	309	if (U_SUCCESS(status) && write(temp, compact.data(), dictSize) != dictSize) {
	310	status = U_FILE_ACCESS_ERROR;
	311	}
	312	// Rename the temp file to the cache. Note that race conditions here are
	313	// fine, as the file system operations are atomic. If an outdated version wins
	314	// over a newer version, it will get rebuilt at the next app launch due to the
	315	// modification time checks above. We don't care that any given app launch gets
	316	// the most up-to-date cache (impossible since we can't lock all the Dictionaries
	317	// directories), only that the cache (eventually) reflects the current state of
	318	// any user dictionaries. That will happen on the next app launch after changes
	319	// to the user dictionaries quiesce.
	320	if (U_SUCCESS(status)) {
	321	if (rename(cachePath, cacheTargetPath)) {
	322	status = U_FILE_ACCESS_ERROR;
	323	(void) unlink(cachePath); // Clean up the temp file
	324	}
	325	}
	326	if (temp != -1) {
	327	close(temp);
	328	}
	329	}
	330	}
	331
	332	// Done with dictionary paths; release memory allocated by glob()
	333	globfree(&fileGlob);
	334
	335	// Map the cache and build the dictionary
	336	if (U_SUCCESS(status)) {
	337	int cache = open(cacheTargetPath, O_RDONLY, 0);
	338	off_t length;
	339	const void cacheData = (const void ) -1;
	340	if (cache == -1) {
	341	status = U_FILE_ACCESS_ERROR;
	342	}
	343	if (U_SUCCESS(status)) {
	344	length = lseek(cache, 0, SEEK_END);
	345	(void) lseek(cache, 0, SEEK_SET);
	346	if (length < 0 \|\| length > PTRDIFF_MAX) {
	347	status = U_FILE_ACCESS_ERROR;
	348	}
	349	}
	350
	351	// Map the cache. Note: it is left mapped until process exit. This is the normal
	352	// behavior anyway, so it shouldn't be an issue.
	353	if (U_SUCCESS(status)) {
	354	cacheData = mmap(0, (size_t) length, PROT_READ, MAP_SHARED, cache, 0);
355	if ((intptr_t)cacheData == -1) {
356	status = U_FILE_ACCESS_ERROR;
357	}
358	}
359	// We can close the cache file now that it's mapped (or not)
360	if (cache != -1) {
361	(void) close(cache);
362	}
363	// If all was successful, try to create the dictionary. The constructor will
364	// check the magic number for us.
365	if (U_SUCCESS(status)) {
366	cacheDict = new CompactTrieDictionary(cacheData, status);
367	}
368	if (U_FAILURE(status) && (intptr_t)cacheData != -1) {
369	// Clean up the mmap
370	(void) munmap((void *)cacheData, (size_t) length);
371	}
372	}
373
374	// If we were successful, free the ICU dictionary and return ours
375	if (U_SUCCESS(status)) {
51004dcb	376	delete icuDictMatcher;
73c04bcf A	377	return cacheDict;
	378	}
	379	else {
	380	delete cacheDict;
	381	}
	382	}
729e4ab9	383	#endif
51004dcb A	384	#endif
51004dcb A	385	return icuDictMatcher;
73c04bcf A	386	}
	387
	388	U_NAMESPACE_END
	389
4388f060	390	#endif /* #if !UCONFIG_NO_BREAK_ITERATION && U_PLATFORM_IS_DARWIN_BASED */