]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
4388f060 A |
3 | /* |
4 | ******************************************************************************* | |
57a6839d | 5 | * Copyright (C) 2011-2013, International Business Machines |
4388f060 A |
6 | * Corporation and others. All Rights Reserved. |
7 | ******************************************************************************* | |
8 | * file name: ppucd.h | |
f3c0d7a5 | 9 | * encoding: UTF-8 |
4388f060 A |
10 | * tab size: 8 (not used) |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 2011dec11 | |
14 | * created by: Markus W. Scherer | |
15 | */ | |
16 | ||
17 | #ifndef __PPUCD_H__ | |
18 | #define __PPUCD_H__ | |
19 | ||
20 | #include "unicode/utypes.h" | |
21 | #include "unicode/uniset.h" | |
22 | #include "unicode/unistr.h" | |
23 | ||
24 | #include <stdio.h> | |
25 | ||
26 | /** Additions to the uchar.h enum UProperty. */ | |
27 | enum { | |
28 | /** Name_Alias */ | |
29 | PPUCD_NAME_ALIAS=UCHAR_STRING_LIMIT, | |
30 | PPUCD_CONDITIONAL_CASE_MAPPINGS, | |
31 | PPUCD_TURKIC_CASE_FOLDING | |
32 | }; | |
33 | ||
34 | U_NAMESPACE_BEGIN | |
35 | ||
36 | class U_TOOLUTIL_API PropertyNames { | |
37 | public: | |
38 | virtual ~PropertyNames(); | |
39 | virtual int32_t getPropertyEnum(const char *name) const; | |
40 | virtual int32_t getPropertyValueEnum(int32_t property, const char *name) const; | |
41 | }; | |
42 | ||
43 | struct U_TOOLUTIL_API UniProps { | |
44 | UniProps(); | |
45 | ~UniProps(); | |
46 | ||
47 | int32_t getIntProp(int32_t prop) const { return intProps[prop-UCHAR_INT_START]; } | |
48 | ||
49 | UChar32 start, end; | |
50 | UBool binProps[UCHAR_BINARY_LIMIT]; | |
51 | int32_t intProps[UCHAR_INT_LIMIT-UCHAR_INT_START]; | |
52 | UVersionInfo age; | |
57a6839d | 53 | UChar32 bmg, bpb; |
4388f060 A |
54 | UChar32 scf, slc, stc, suc; |
55 | int32_t digitValue; | |
56 | const char *numericValue; | |
57 | const char *name; | |
58 | const char *nameAlias; | |
59 | UnicodeString cf, lc, tc, uc; | |
60 | UnicodeSet scx; | |
61 | }; | |
62 | ||
63 | class U_TOOLUTIL_API PreparsedUCD { | |
64 | public: | |
65 | enum LineType { | |
66 | /** No line, end of file. */ | |
67 | NO_LINE, | |
68 | /** Empty line. (Might contain a comment.) */ | |
69 | EMPTY_LINE, | |
70 | ||
71 | /** ucd;6.1.0 */ | |
72 | UNICODE_VERSION_LINE, | |
73 | ||
74 | /** property;Binary;Alpha;Alphabetic */ | |
75 | PROPERTY_LINE, | |
76 | /** binary;N;No;F;False */ | |
77 | BINARY_LINE, | |
78 | /** value;gc;Zs;Space_Separator */ | |
79 | VALUE_LINE, | |
80 | ||
81 | /** defaults;0000..10FFFF;age=NA;bc=L;... */ | |
82 | DEFAULTS_LINE, | |
83 | /** block;0000..007F;age=1.1;blk=ASCII;ea=Na;... */ | |
84 | BLOCK_LINE, | |
85 | /** cp;0030;AHex;bc=EN;gc=Nd;na=DIGIT ZERO;... */ | |
86 | CP_LINE, | |
6be67b06 A |
87 | /** unassigned;E01F0..E0FFF;bc=BN;CWKCF;DI;GCB=CN;NFKC_CF= */ |
88 | UNASSIGNED_LINE, | |
4388f060 A |
89 | |
90 | /** algnamesrange;4E00..9FCC;han;CJK UNIFIED IDEOGRAPH- */ | |
91 | ALG_NAMES_RANGE_LINE, | |
92 | ||
93 | LINE_TYPE_COUNT | |
94 | }; | |
95 | ||
96 | /** | |
97 | * Constructor. | |
98 | * Prepare this object for a new, empty package. | |
99 | */ | |
100 | PreparsedUCD(const char *filename, UErrorCode &errorCode); | |
101 | ||
102 | /** Destructor. */ | |
103 | ~PreparsedUCD(); | |
104 | ||
105 | /** Sets (aliases) a non-standard PropertyNames implementation. Caller retains ownership. */ | |
106 | void setPropertyNames(const PropertyNames *pn) { pnames=pn; } | |
107 | ||
108 | /** | |
109 | * Reads a line from the preparsed UCD file. | |
110 | * Splits the line by replacing each ';' with a NUL. | |
111 | */ | |
112 | LineType readLine(UErrorCode &errorCode); | |
113 | ||
114 | /** Returns the number of the line read by readLine(). */ | |
115 | int32_t getLineNumber() const { return lineNumber; } | |
116 | ||
117 | /** Returns the line's next field, or NULL. */ | |
118 | const char *nextField(); | |
119 | ||
120 | /** Returns the Unicode version when or after the UNICODE_VERSION_LINE has been read. */ | |
121 | const UVersionInfo &getUnicodeVersion() const { return ucdVersion; } | |
122 | ||
123 | /** Returns TRUE if the current line has property values. */ | |
6be67b06 A |
124 | UBool lineHasPropertyValues() const { |
125 | return DEFAULTS_LINE<=lineType && lineType<=UNASSIGNED_LINE; | |
126 | } | |
4388f060 A |
127 | |
128 | /** | |
129 | * Parses properties from the current line. | |
130 | * Clears newValues and sets UProperty codes for property values mentioned | |
131 | * on the current line (as opposed to being inherited). | |
132 | * Returns a pointer to the filled-in UniProps, or NULL if something went wrong. | |
133 | * The returned UniProps are usable until the next line of the same type is read. | |
134 | */ | |
135 | const UniProps *getProps(UnicodeSet &newValues, UErrorCode &errorCode); | |
136 | ||
137 | /** | |
138 | * Returns the code point range for the current algnamesrange line. | |
139 | * Calls & parses nextField(). | |
140 | * Further nextField() calls will yield the range's type & prefix string. | |
141 | * Returns U_SUCCESS(errorCode). | |
142 | */ | |
143 | UBool getRangeForAlgNames(UChar32 &start, UChar32 &end, UErrorCode &errorCode); | |
144 | ||
145 | private: | |
146 | UBool isLineBufferAvailable(int32_t i) { | |
147 | return defaultLineIndex!=i && blockLineIndex!=i; | |
148 | } | |
149 | ||
150 | /** Resets the field iterator and returns the line's first field (the line type field). */ | |
151 | const char *firstField(); | |
152 | ||
153 | UBool parseProperty(UniProps &props, const char *field, UnicodeSet &newValues, | |
154 | UErrorCode &errorCode); | |
155 | UChar32 parseCodePoint(const char *s, UErrorCode &errorCode); | |
156 | UBool parseCodePointRange(const char *s, UChar32 &start, UChar32 &end, UErrorCode &errorCode); | |
157 | void parseString(const char *s, UnicodeString &uni, UErrorCode &errorCode); | |
158 | void parseScriptExtensions(const char *s, UnicodeSet &scx, UErrorCode &errorCode); | |
159 | ||
160 | static const int32_t kNumLineBuffers=3; | |
161 | ||
162 | PropertyNames *icuPnames; // owned | |
163 | const PropertyNames *pnames; // aliased | |
164 | FILE *file; | |
165 | int32_t defaultLineIndex, blockLineIndex, lineIndex; | |
166 | int32_t lineNumber; | |
167 | LineType lineType; | |
168 | char *fieldLimit; | |
169 | char *lineLimit; | |
170 | ||
171 | UVersionInfo ucdVersion; | |
172 | UniProps defaultProps, blockProps, cpProps; | |
6be67b06 | 173 | UnicodeSet blockValues; |
4388f060 A |
174 | // Multiple lines so that default and block properties can maintain pointers |
175 | // into their line buffers. | |
176 | char lines[kNumLineBuffers][4096]; | |
177 | }; | |
178 | ||
179 | U_NAMESPACE_END | |
180 | ||
181 | #endif // __PPUCD_H__ |