]> git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/i18n/uspoof_conf.h
ICU-551.51.4.tar.gz
[apple/icu.git] / icuSources / i18n / uspoof_conf.h
... / ...
CommitLineData
1/*
2******************************************************************************
3*
4* Copyright (C) 2008-2011, International Business Machines
5* Corporation and others. All Rights Reserved.
6*
7******************************************************************************
8* file name: uspoof_conf.h
9* encoding: US-ASCII
10* tab size: 8 (not used)
11* indentation:4
12*
13* created on: 2009Jan05
14* created by: Andy Heninger
15*
16* Internal classes for compiling confusable data into its binary (runtime) form.
17*/
18
19#ifndef __USPOOF_BUILDCONF_H__
20#define __USPOOF_BUILDCONF_H__
21
22#if !UCONFIG_NO_NORMALIZATION
23
24#if !UCONFIG_NO_REGULAR_EXPRESSIONS
25
26#include "uspoof_impl.h"
27
28U_NAMESPACE_BEGIN
29
30// SPUString
31// Holds a string that is the result of one of the mappings defined
32// by the confusable mapping data (confusables.txt from Unicode.org)
33// Instances of SPUString exist during the compilation process only.
34
35struct SPUString : public UMemory {
36 UnicodeString *fStr; // The actual string.
37 int32_t fStrTableIndex; // Index into the final runtime data for this string.
38 // (or, for length 1, the single string char itself,
39 // there being no string table entry for it.)
40 SPUString(UnicodeString *s);
41 ~SPUString();
42};
43
44
45// String Pool A utility class for holding the strings that are the result of
46// the spoof mappings. These strings will utimately end up in the
47// run-time String Table.
48// This is sort of like a sorted set of strings, except that ICU's anemic
49// built-in collections don't support those, so it is implemented with a
50// combination of a uhash and a UVector.
51
52
53class SPUStringPool : public UMemory {
54 public:
55 SPUStringPool(UErrorCode &status);
56 ~SPUStringPool();
57
58 // Add a string. Return the string from the table.
59 // If the input parameter string is already in the table, delete the
60 // input parameter and return the existing string.
61 SPUString *addString(UnicodeString *src, UErrorCode &status);
62
63
64 // Get the n-th string in the collection.
65 SPUString *getByIndex(int32_t i);
66
67 // Sort the contents; affects the ordering of getByIndex().
68 void sort(UErrorCode &status);
69
70 int32_t size();
71
72 private:
73 UVector *fVec; // Elements are SPUString *
74 UHashtable *fHash; // Key: UnicodeString Value: SPUString
75};
76
77
78// class ConfusabledataBuilder
79// An instance of this class exists while the confusable data is being built from source.
80// It encapsulates the intermediate data structures that are used for building.
81// It exports one static function, to do a confusable data build.
82
83class ConfusabledataBuilder : public UMemory {
84 private:
85 SpoofImpl *fSpoofImpl;
86 UChar *fInput;
87 UHashtable *fSLTable;
88 UHashtable *fSATable;
89 UHashtable *fMLTable;
90 UHashtable *fMATable;
91 UnicodeSet *fKeySet; // A set of all keys (UChar32s) that go into the four mapping tables.
92
93 // The binary data is first assembled into the following four collections, then
94 // copied to its final raw-memory destination.
95 UVector *fKeyVec;
96 UVector *fValueVec;
97 UnicodeString *fStringTable;
98 UVector *fStringLengthsTable;
99
100 SPUStringPool *stringPool;
101 URegularExpression *fParseLine;
102 URegularExpression *fParseHexNum;
103 int32_t fLineNum;
104
105 ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status);
106 ~ConfusabledataBuilder();
107 void build(const char * confusables, int32_t confusablesLen, UErrorCode &status);
108
109 // Add an entry to the key and value tables being built
110 // input: data from SLTable, MATable, etc.
111 // outut: entry added to fKeyVec and fValueVec
112 void addKeyEntry(UChar32 keyChar, // The key character
113 UHashtable *table, // The table, one of SATable, MATable, etc.
114 int32_t tableFlag, // One of USPOOF_SA_TABLE_FLAG, etc.
115 UErrorCode &status);
116
117 // From an index into fKeyVec & fValueVec
118 // get a UnicodeString with the corresponding mapping.
119 UnicodeString getMapping(int32_t index);
120
121 // Populate the final binary output data array with the compiled data.
122 void outputData(UErrorCode &status);
123
124 public:
125 static void buildConfusableData(SpoofImpl *spImpl, const char * confusables,
126 int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status);
127};
128U_NAMESPACE_END
129
130#endif
131#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
132#endif // __USPOOF_BUILDCONF_H__