]> git.saurik.com Git - apple/icu.git/blob - icuSources/tools/gennorm2/n2builder.h
ICU-62108.0.1.tar.gz
[apple/icu.git] / icuSources / tools / gennorm2 / n2builder.h
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2009-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: n2builder.h
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2009nov25
16 * created by: Markus W. Scherer
17 */
18
19 #ifndef __N2BUILDER_H__
20 #define __N2BUILDER_H__
21
22 #include "unicode/utypes.h"
23
24 #if !UCONFIG_NO_NORMALIZATION
25
26 #include "unicode/errorcode.h"
27 #include "unicode/unistr.h"
28 #include "normalizer2impl.h" // for IX_COUNT
29 #include "toolutil.h"
30 #include "utrie2.h"
31 #include "norms.h"
32
33 U_NAMESPACE_BEGIN
34
35 extern UBool beVerbose, haveCopyright;
36
37 class Normalizer2DataBuilder {
38 public:
39 Normalizer2DataBuilder(UErrorCode &errorCode);
40 ~Normalizer2DataBuilder();
41
42 enum OverrideHandling {
43 OVERRIDE_NONE,
44 OVERRIDE_ANY,
45 OVERRIDE_PREVIOUS
46 };
47
48 void setOverrideHandling(OverrideHandling oh);
49
50 enum Optimization {
51 OPTIMIZE_NORMAL,
52 OPTIMIZE_FAST
53 };
54
55 void setOptimization(Optimization opt) { optimization=opt; }
56
57 void setCC(UChar32 c, uint8_t cc);
58 void setOneWayMapping(UChar32 c, const UnicodeString &m);
59 void setRoundTripMapping(UChar32 c, const UnicodeString &m);
60 void removeMapping(UChar32 c);
61
62 void setUnicodeVersion(const char *v);
63
64 void writeBinaryFile(const char *filename);
65 void writeCSourceFile(const char *filename);
66 void writeDataFile(const char *filename, bool writeRemoved) const;
67
68 static void computeDiff(const Normalizer2DataBuilder &b1,
69 const Normalizer2DataBuilder &b2,
70 Normalizer2DataBuilder &diff);
71
72 private:
73 friend class Norm16Writer;
74
75 Normalizer2DataBuilder(const Normalizer2DataBuilder &other) = delete;
76 Normalizer2DataBuilder &operator=(const Normalizer2DataBuilder &other) = delete;
77
78 Norm *checkNormForMapping(Norm *p, UChar32 c); // check for permitted overrides
79
80 /**
81 * A starter character with a mapping does not have a composition boundary after it
82 * if the character itself combines-forward (which is tested by the caller of this function),
83 * or it is deleted (mapped to the empty string),
84 * or its mapping contains no starter,
85 * or the last starter combines-forward.
86 */
87 UBool mappingHasCompBoundaryAfter(const BuilderReorderingBuffer &buffer,
88 Norm::MappingType mappingType) const;
89 /** Returns TRUE if the mapping by itself recomposes, that is, it is not comp-normalized. */
90 UBool mappingRecomposes(const BuilderReorderingBuffer &buffer) const;
91 void postProcess(Norm &norm);
92
93 void setSmallFCD(UChar32 c);
94 int32_t getMinNoNoDelta() const {
95 return indexes[Normalizer2Impl::IX_MIN_MAYBE_YES]-
96 ((2*Normalizer2Impl::MAX_DELTA+1)<<Normalizer2Impl::DELTA_SHIFT);
97 }
98 void writeNorm16(UChar32 start, UChar32 end, Norm &norm);
99 void setHangulData();
100 void processData();
101
102 Norms norms;
103
104 int32_t phase;
105 OverrideHandling overrideHandling;
106
107 Optimization optimization;
108
109 int32_t indexes[Normalizer2Impl::IX_COUNT];
110 UTrie2 *norm16Trie;
111 int32_t norm16TrieLength;
112 UnicodeString extraData;
113 uint8_t smallFCD[0x100];
114
115 UVersionInfo unicodeVersion;
116 };
117
118 U_NAMESPACE_END
119
120 #endif // #if !UCONFIG_NO_NORMALIZATION
121
122 #endif // __N2BUILDER_H__