]>
git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/uspoof_conf.h
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 ******************************************************************************
6 * Copyright (C) 2008-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 ******************************************************************************
10 * file name: uspoof_conf.h
12 * tab size: 8 (not used)
15 * created on: 2009Jan05
16 * created by: Andy Heninger
18 * Internal classes for compiling confusable data into its binary (runtime) form.
21 #ifndef __USPOOF_BUILDCONF_H__
22 #define __USPOOF_BUILDCONF_H__
24 #include "unicode/utypes.h"
26 #if !UCONFIG_NO_NORMALIZATION
28 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
30 #include "unicode/uregex.h"
32 #include "uspoof_impl.h"
37 // Holds a string that is the result of one of the mappings defined
38 // by the confusable mapping data (confusables.txt from Unicode.org)
39 // Instances of SPUString exist during the compilation process only.
41 struct SPUString
: public UMemory
{
42 UnicodeString
*fStr
; // The actual string.
43 int32_t fCharOrStrTableIndex
; // Index into the final runtime data for this
44 // string (or, for length 1, the single string char
45 // itself, there being no string table entry for it.)
46 SPUString(UnicodeString
*s
);
51 // String Pool A utility class for holding the strings that are the result of
52 // the spoof mappings. These strings will utimately end up in the
53 // run-time String Table.
54 // This is sort of like a sorted set of strings, except that ICU's anemic
55 // built-in collections don't support those, so it is implemented with a
56 // combination of a uhash and a UVector.
59 class SPUStringPool
: public UMemory
{
61 SPUStringPool(UErrorCode
&status
);
64 // Add a string. Return the string from the table.
65 // If the input parameter string is already in the table, delete the
66 // input parameter and return the existing string.
67 SPUString
*addString(UnicodeString
*src
, UErrorCode
&status
);
70 // Get the n-th string in the collection.
71 SPUString
*getByIndex(int32_t i
);
73 // Sort the contents; affects the ordering of getByIndex().
74 void sort(UErrorCode
&status
);
79 UVector
*fVec
; // Elements are SPUString *
80 UHashtable
*fHash
; // Key: UnicodeString Value: SPUString
84 // class ConfusabledataBuilder
85 // An instance of this class exists while the confusable data is being built from source.
86 // It encapsulates the intermediate data structures that are used for building.
87 // It exports one static function, to do a confusable data build.
89 class ConfusabledataBuilder
: public UMemory
{
91 SpoofImpl
*fSpoofImpl
;
94 UnicodeSet
*fKeySet
; // A set of all keys (UChar32s) that go into the four mapping tables.
96 // The binary data is first assembled into the following four collections, then
97 // copied to its final raw-memory destination.
100 UnicodeString
*fStringTable
;
102 SPUStringPool
*stringPool
;
103 URegularExpression
*fParseLine
;
104 URegularExpression
*fParseHexNum
;
107 ConfusabledataBuilder(SpoofImpl
*spImpl
, UErrorCode
&status
);
108 ~ConfusabledataBuilder();
109 void build(const char * confusables
, int32_t confusablesLen
, UErrorCode
&status
);
111 // Add an entry to the key and value tables being built
112 // input: data from SLTable, MATable, etc.
113 // outut: entry added to fKeyVec and fValueVec
114 void addKeyEntry(UChar32 keyChar
, // The key character
115 UHashtable
*table
, // The table, one of SATable, MATable, etc.
116 int32_t tableFlag
, // One of USPOOF_SA_TABLE_FLAG, etc.
119 // From an index into fKeyVec & fValueVec
120 // get a UnicodeString with the corresponding mapping.
121 UnicodeString
getMapping(int32_t index
);
123 // Populate the final binary output data array with the compiled data.
124 void outputData(UErrorCode
&status
);
127 static void buildConfusableData(SpoofImpl
*spImpl
, const char * confusables
,
128 int32_t confusablesLen
, int32_t *errorType
, UParseError
*pe
, UErrorCode
&status
);
133 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
134 #endif // __USPOOF_BUILDCONF_H__