]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/uspoof_conf.h
ICU-57131.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / uspoof_conf.h
1 /*
2 ******************************************************************************
3 *
4 * Copyright (C) 2008-2016, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ******************************************************************************
8 * file name: uspoof_conf.h
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2009Jan05
14 * created by: Andy Heninger
15 *
16 * Internal classes for compiling confusable data into its binary (runtime) form.
17 */
18
19 #ifndef __USPOOF_BUILDCONF_H__
20 #define __USPOOF_BUILDCONF_H__
21
22 #if !UCONFIG_NO_NORMALIZATION
23
24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
25
26 #include "unicode/uregex.h"
27 #include "uhash.h"
28 #include "uspoof_impl.h"
29
30 U_NAMESPACE_BEGIN
31
32 // SPUString
33 // Holds a string that is the result of one of the mappings defined
34 // by the confusable mapping data (confusables.txt from Unicode.org)
35 // Instances of SPUString exist during the compilation process only.
36
37 struct SPUString : public UMemory {
38 UnicodeString *fStr; // The actual string.
39 int32_t fStrTableIndex; // Index into the final runtime data for this string.
40 // (or, for length 1, the single string char itself,
41 // there being no string table entry for it.)
42 SPUString(UnicodeString *s);
43 ~SPUString();
44 };
45
46
47 // String Pool A utility class for holding the strings that are the result of
48 // the spoof mappings. These strings will utimately end up in the
49 // run-time String Table.
50 // This is sort of like a sorted set of strings, except that ICU's anemic
51 // built-in collections don't support those, so it is implemented with a
52 // combination of a uhash and a UVector.
53
54
55 class SPUStringPool : public UMemory {
56 public:
57 SPUStringPool(UErrorCode &status);
58 ~SPUStringPool();
59
60 // Add a string. Return the string from the table.
61 // If the input parameter string is already in the table, delete the
62 // input parameter and return the existing string.
63 SPUString *addString(UnicodeString *src, UErrorCode &status);
64
65
66 // Get the n-th string in the collection.
67 SPUString *getByIndex(int32_t i);
68
69 // Sort the contents; affects the ordering of getByIndex().
70 void sort(UErrorCode &status);
71
72 int32_t size();
73
74 private:
75 UVector *fVec; // Elements are SPUString *
76 UHashtable *fHash; // Key: UnicodeString Value: SPUString
77 };
78
79
80 // class ConfusabledataBuilder
81 // An instance of this class exists while the confusable data is being built from source.
82 // It encapsulates the intermediate data structures that are used for building.
83 // It exports one static function, to do a confusable data build.
84
85 class ConfusabledataBuilder : public UMemory {
86 private:
87 SpoofImpl *fSpoofImpl;
88 UChar *fInput;
89 UHashtable *fSLTable;
90 UHashtable *fSATable;
91 UHashtable *fMLTable;
92 UHashtable *fMATable;
93 UnicodeSet *fKeySet; // A set of all keys (UChar32s) that go into the four mapping tables.
94
95 // The binary data is first assembled into the following four collections, then
96 // copied to its final raw-memory destination.
97 UVector *fKeyVec;
98 UVector *fValueVec;
99 UnicodeString *fStringTable;
100 UVector *fStringLengthsTable;
101
102 SPUStringPool *stringPool;
103 URegularExpression *fParseLine;
104 URegularExpression *fParseHexNum;
105 int32_t fLineNum;
106
107 ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status);
108 ~ConfusabledataBuilder();
109 void build(const char * confusables, int32_t confusablesLen, UErrorCode &status);
110
111 // Add an entry to the key and value tables being built
112 // input: data from SLTable, MATable, etc.
113 // outut: entry added to fKeyVec and fValueVec
114 void addKeyEntry(UChar32 keyChar, // The key character
115 UHashtable *table, // The table, one of SATable, MATable, etc.
116 int32_t tableFlag, // One of USPOOF_SA_TABLE_FLAG, etc.
117 UErrorCode &status);
118
119 // From an index into fKeyVec & fValueVec
120 // get a UnicodeString with the corresponding mapping.
121 UnicodeString getMapping(int32_t index);
122
123 // Populate the final binary output data array with the compiled data.
124 void outputData(UErrorCode &status);
125
126 public:
127 static void buildConfusableData(SpoofImpl *spImpl, const char * confusables,
128 int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status);
129 };
130 U_NAMESPACE_END
131
132 #endif
133 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
134 #endif // __USPOOF_BUILDCONF_H__