]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | ******************************************************************************* | |
3 | * Copyright (C) 2011-2012, International Business Machines | |
4 | * Corporation and others. All Rights Reserved. | |
5 | ******************************************************************************* | |
6 | * file name: ppucd.h | |
7 | * encoding: US-ASCII | |
8 | * tab size: 8 (not used) | |
9 | * indentation:4 | |
10 | * | |
11 | * created on: 2011dec11 | |
12 | * created by: Markus W. Scherer | |
13 | */ | |
14 | ||
15 | #ifndef __PPUCD_H__ | |
16 | #define __PPUCD_H__ | |
17 | ||
18 | #include "unicode/utypes.h" | |
19 | #include "unicode/uniset.h" | |
20 | #include "unicode/unistr.h" | |
21 | ||
22 | #include <stdio.h> | |
23 | ||
24 | /** Additions to the uchar.h enum UProperty. */ | |
25 | enum { | |
26 | /** Name_Alias */ | |
27 | PPUCD_NAME_ALIAS=UCHAR_STRING_LIMIT, | |
28 | PPUCD_CONDITIONAL_CASE_MAPPINGS, | |
29 | PPUCD_TURKIC_CASE_FOLDING | |
30 | }; | |
31 | ||
32 | U_NAMESPACE_BEGIN | |
33 | ||
34 | class U_TOOLUTIL_API PropertyNames { | |
35 | public: | |
36 | virtual ~PropertyNames(); | |
37 | virtual int32_t getPropertyEnum(const char *name) const; | |
38 | virtual int32_t getPropertyValueEnum(int32_t property, const char *name) const; | |
39 | }; | |
40 | ||
41 | struct U_TOOLUTIL_API UniProps { | |
42 | UniProps(); | |
43 | ~UniProps(); | |
44 | ||
45 | int32_t getIntProp(int32_t prop) const { return intProps[prop-UCHAR_INT_START]; } | |
46 | ||
47 | UChar32 start, end; | |
48 | UBool binProps[UCHAR_BINARY_LIMIT]; | |
49 | int32_t intProps[UCHAR_INT_LIMIT-UCHAR_INT_START]; | |
50 | UVersionInfo age; | |
51 | UChar32 bmg; | |
52 | UChar32 scf, slc, stc, suc; | |
53 | int32_t digitValue; | |
54 | const char *numericValue; | |
55 | const char *name; | |
56 | const char *nameAlias; | |
57 | UnicodeString cf, lc, tc, uc; | |
58 | UnicodeSet scx; | |
59 | }; | |
60 | ||
61 | class U_TOOLUTIL_API PreparsedUCD { | |
62 | public: | |
63 | enum LineType { | |
64 | /** No line, end of file. */ | |
65 | NO_LINE, | |
66 | /** Empty line. (Might contain a comment.) */ | |
67 | EMPTY_LINE, | |
68 | ||
69 | /** ucd;6.1.0 */ | |
70 | UNICODE_VERSION_LINE, | |
71 | ||
72 | /** property;Binary;Alpha;Alphabetic */ | |
73 | PROPERTY_LINE, | |
74 | /** binary;N;No;F;False */ | |
75 | BINARY_LINE, | |
76 | /** value;gc;Zs;Space_Separator */ | |
77 | VALUE_LINE, | |
78 | ||
79 | /** defaults;0000..10FFFF;age=NA;bc=L;... */ | |
80 | DEFAULTS_LINE, | |
81 | /** block;0000..007F;age=1.1;blk=ASCII;ea=Na;... */ | |
82 | BLOCK_LINE, | |
83 | /** cp;0030;AHex;bc=EN;gc=Nd;na=DIGIT ZERO;... */ | |
84 | CP_LINE, | |
85 | ||
86 | /** algnamesrange;4E00..9FCC;han;CJK UNIFIED IDEOGRAPH- */ | |
87 | ALG_NAMES_RANGE_LINE, | |
88 | ||
89 | LINE_TYPE_COUNT | |
90 | }; | |
91 | ||
92 | /** | |
93 | * Constructor. | |
94 | * Prepare this object for a new, empty package. | |
95 | */ | |
96 | PreparsedUCD(const char *filename, UErrorCode &errorCode); | |
97 | ||
98 | /** Destructor. */ | |
99 | ~PreparsedUCD(); | |
100 | ||
101 | /** Sets (aliases) a non-standard PropertyNames implementation. Caller retains ownership. */ | |
102 | void setPropertyNames(const PropertyNames *pn) { pnames=pn; } | |
103 | ||
104 | /** | |
105 | * Reads a line from the preparsed UCD file. | |
106 | * Splits the line by replacing each ';' with a NUL. | |
107 | */ | |
108 | LineType readLine(UErrorCode &errorCode); | |
109 | ||
110 | /** Returns the number of the line read by readLine(). */ | |
111 | int32_t getLineNumber() const { return lineNumber; } | |
112 | ||
113 | /** Returns the line's next field, or NULL. */ | |
114 | const char *nextField(); | |
115 | ||
116 | /** Returns the Unicode version when or after the UNICODE_VERSION_LINE has been read. */ | |
117 | const UVersionInfo &getUnicodeVersion() const { return ucdVersion; } | |
118 | ||
119 | /** Returns TRUE if the current line has property values. */ | |
120 | UBool lineHasPropertyValues() const { return DEFAULTS_LINE<=lineType && lineType<=CP_LINE; } | |
121 | ||
122 | /** | |
123 | * Parses properties from the current line. | |
124 | * Clears newValues and sets UProperty codes for property values mentioned | |
125 | * on the current line (as opposed to being inherited). | |
126 | * Returns a pointer to the filled-in UniProps, or NULL if something went wrong. | |
127 | * The returned UniProps are usable until the next line of the same type is read. | |
128 | */ | |
129 | const UniProps *getProps(UnicodeSet &newValues, UErrorCode &errorCode); | |
130 | ||
131 | /** | |
132 | * Returns the code point range for the current algnamesrange line. | |
133 | * Calls & parses nextField(). | |
134 | * Further nextField() calls will yield the range's type & prefix string. | |
135 | * Returns U_SUCCESS(errorCode). | |
136 | */ | |
137 | UBool getRangeForAlgNames(UChar32 &start, UChar32 &end, UErrorCode &errorCode); | |
138 | ||
139 | private: | |
140 | UBool isLineBufferAvailable(int32_t i) { | |
141 | return defaultLineIndex!=i && blockLineIndex!=i; | |
142 | } | |
143 | ||
144 | /** Resets the field iterator and returns the line's first field (the line type field). */ | |
145 | const char *firstField(); | |
146 | ||
147 | UBool parseProperty(UniProps &props, const char *field, UnicodeSet &newValues, | |
148 | UErrorCode &errorCode); | |
149 | UChar32 parseCodePoint(const char *s, UErrorCode &errorCode); | |
150 | UBool parseCodePointRange(const char *s, UChar32 &start, UChar32 &end, UErrorCode &errorCode); | |
151 | void parseString(const char *s, UnicodeString &uni, UErrorCode &errorCode); | |
152 | void parseScriptExtensions(const char *s, UnicodeSet &scx, UErrorCode &errorCode); | |
153 | ||
154 | static const int32_t kNumLineBuffers=3; | |
155 | ||
156 | PropertyNames *icuPnames; // owned | |
157 | const PropertyNames *pnames; // aliased | |
158 | FILE *file; | |
159 | int32_t defaultLineIndex, blockLineIndex, lineIndex; | |
160 | int32_t lineNumber; | |
161 | LineType lineType; | |
162 | char *fieldLimit; | |
163 | char *lineLimit; | |
164 | ||
165 | UVersionInfo ucdVersion; | |
166 | UniProps defaultProps, blockProps, cpProps; | |
167 | // Multiple lines so that default and block properties can maintain pointers | |
168 | // into their line buffers. | |
169 | char lines[kNumLineBuffers][4096]; | |
170 | }; | |
171 | ||
172 | U_NAMESPACE_END | |
173 | ||
174 | #endif // __PPUCD_H__ |