]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/uspoof_conf.h
ICU-59180.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / uspoof_conf.h
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 * Copyright (C) 2008-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 ******************************************************************************
10 * file name: uspoof_conf.h
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2009Jan05
16 * created by: Andy Heninger
17 *
18 * Internal classes for compiling confusable data into its binary (runtime) form.
19 */
20
21 #ifndef __USPOOF_BUILDCONF_H__
22 #define __USPOOF_BUILDCONF_H__
23
24 #if !UCONFIG_NO_NORMALIZATION
25
26 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
27
28 #include "unicode/uregex.h"
29 #include "uhash.h"
30 #include "uspoof_impl.h"
31
32 U_NAMESPACE_BEGIN
33
34 // SPUString
35 // Holds a string that is the result of one of the mappings defined
36 // by the confusable mapping data (confusables.txt from Unicode.org)
37 // Instances of SPUString exist during the compilation process only.
38
39 struct SPUString : public UMemory {
40 UnicodeString *fStr; // The actual string.
41 int32_t fCharOrStrTableIndex; // Index into the final runtime data for this
42 // string (or, for length 1, the single string char
43 // itself, there being no string table entry for it.)
44 SPUString(UnicodeString *s);
45 ~SPUString();
46 };
47
48
49 // String Pool A utility class for holding the strings that are the result of
50 // the spoof mappings. These strings will utimately end up in the
51 // run-time String Table.
52 // This is sort of like a sorted set of strings, except that ICU's anemic
53 // built-in collections don't support those, so it is implemented with a
54 // combination of a uhash and a UVector.
55
56
57 class SPUStringPool : public UMemory {
58 public:
59 SPUStringPool(UErrorCode &status);
60 ~SPUStringPool();
61
62 // Add a string. Return the string from the table.
63 // If the input parameter string is already in the table, delete the
64 // input parameter and return the existing string.
65 SPUString *addString(UnicodeString *src, UErrorCode &status);
66
67
68 // Get the n-th string in the collection.
69 SPUString *getByIndex(int32_t i);
70
71 // Sort the contents; affects the ordering of getByIndex().
72 void sort(UErrorCode &status);
73
74 int32_t size();
75
76 private:
77 UVector *fVec; // Elements are SPUString *
78 UHashtable *fHash; // Key: UnicodeString Value: SPUString
79 };
80
81
82 // class ConfusabledataBuilder
83 // An instance of this class exists while the confusable data is being built from source.
84 // It encapsulates the intermediate data structures that are used for building.
85 // It exports one static function, to do a confusable data build.
86
87 class ConfusabledataBuilder : public UMemory {
88 private:
89 SpoofImpl *fSpoofImpl;
90 UChar *fInput;
91 UHashtable *fTable;
92 UnicodeSet *fKeySet; // A set of all keys (UChar32s) that go into the four mapping tables.
93
94 // The binary data is first assembled into the following four collections, then
95 // copied to its final raw-memory destination.
96 UVector *fKeyVec;
97 UVector *fValueVec;
98 UnicodeString *fStringTable;
99
100 SPUStringPool *stringPool;
101 URegularExpression *fParseLine;
102 URegularExpression *fParseHexNum;
103 int32_t fLineNum;
104
105 ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status);
106 ~ConfusabledataBuilder();
107 void build(const char * confusables, int32_t confusablesLen, UErrorCode &status);
108
109 // Add an entry to the key and value tables being built
110 // input: data from SLTable, MATable, etc.
111 // outut: entry added to fKeyVec and fValueVec
112 void addKeyEntry(UChar32 keyChar, // The key character
113 UHashtable *table, // The table, one of SATable, MATable, etc.
114 int32_t tableFlag, // One of USPOOF_SA_TABLE_FLAG, etc.
115 UErrorCode &status);
116
117 // From an index into fKeyVec & fValueVec
118 // get a UnicodeString with the corresponding mapping.
119 UnicodeString getMapping(int32_t index);
120
121 // Populate the final binary output data array with the compiled data.
122 void outputData(UErrorCode &status);
123
124 public:
125 static void buildConfusableData(SpoofImpl *spImpl, const char * confusables,
126 int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status);
127 };
128 U_NAMESPACE_END
129
130 #endif
131 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
132 #endif // __USPOOF_BUILDCONF_H__