1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
5 * Copyright (C) 2011-2013, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
10 * tab size: 8 (not used)
13 * created on: 2011dec11
14 * created by: Markus W. Scherer
20 #include "unicode/utypes.h"
21 #include "unicode/uniset.h"
22 #include "unicode/unistr.h"
26 /** Additions to the uchar.h enum UProperty. */
29 PPUCD_NAME_ALIAS
=UCHAR_STRING_LIMIT
,
30 PPUCD_CONDITIONAL_CASE_MAPPINGS
,
31 PPUCD_TURKIC_CASE_FOLDING
36 class U_TOOLUTIL_API PropertyNames
{
38 virtual ~PropertyNames();
39 virtual int32_t getPropertyEnum(const char *name
) const;
40 virtual int32_t getPropertyValueEnum(int32_t property
, const char *name
) const;
43 struct U_TOOLUTIL_API UniProps
{
47 int32_t getIntProp(int32_t prop
) const { return intProps
[prop
-UCHAR_INT_START
]; }
50 UBool binProps
[UCHAR_BINARY_LIMIT
];
51 int32_t intProps
[UCHAR_INT_LIMIT
-UCHAR_INT_START
];
54 UChar32 scf
, slc
, stc
, suc
;
56 const char *numericValue
;
58 const char *nameAlias
;
59 UnicodeString cf
, lc
, tc
, uc
;
63 class U_TOOLUTIL_API PreparsedUCD
{
66 /** No line, end of file. */
68 /** Empty line. (Might contain a comment.) */
74 /** property;Binary;Alpha;Alphabetic */
76 /** binary;N;No;F;False */
78 /** value;gc;Zs;Space_Separator */
81 /** defaults;0000..10FFFF;age=NA;bc=L;... */
83 /** block;0000..007F;age=1.1;blk=ASCII;ea=Na;... */
85 /** cp;0030;AHex;bc=EN;gc=Nd;na=DIGIT ZERO;... */
87 /** unassigned;E01F0..E0FFF;bc=BN;CWKCF;DI;GCB=CN;NFKC_CF= */
90 /** algnamesrange;4E00..9FCC;han;CJK UNIFIED IDEOGRAPH- */
98 * Prepare this object for a new, empty package.
100 PreparsedUCD(const char *filename
, UErrorCode
&errorCode
);
105 /** Sets (aliases) a non-standard PropertyNames implementation. Caller retains ownership. */
106 void setPropertyNames(const PropertyNames
*pn
) { pnames
=pn
; }
109 * Reads a line from the preparsed UCD file.
110 * Splits the line by replacing each ';' with a NUL.
112 LineType
readLine(UErrorCode
&errorCode
);
114 /** Returns the number of the line read by readLine(). */
115 int32_t getLineNumber() const { return lineNumber
; }
117 /** Returns the line's next field, or NULL. */
118 const char *nextField();
120 /** Returns the Unicode version when or after the UNICODE_VERSION_LINE has been read. */
121 const UVersionInfo
&getUnicodeVersion() const { return ucdVersion
; }
123 /** Returns TRUE if the current line has property values. */
124 UBool
lineHasPropertyValues() const {
125 return DEFAULTS_LINE
<=lineType
&& lineType
<=UNASSIGNED_LINE
;
129 * Parses properties from the current line.
130 * Clears newValues and sets UProperty codes for property values mentioned
131 * on the current line (as opposed to being inherited).
132 * Returns a pointer to the filled-in UniProps, or NULL if something went wrong.
133 * The returned UniProps are usable until the next line of the same type is read.
135 const UniProps
*getProps(UnicodeSet
&newValues
, UErrorCode
&errorCode
);
138 * Returns the code point range for the current algnamesrange line.
139 * Calls & parses nextField().
140 * Further nextField() calls will yield the range's type & prefix string.
141 * Returns U_SUCCESS(errorCode).
143 UBool
getRangeForAlgNames(UChar32
&start
, UChar32
&end
, UErrorCode
&errorCode
);
146 UBool
isLineBufferAvailable(int32_t i
) {
147 return defaultLineIndex
!=i
&& blockLineIndex
!=i
;
150 /** Resets the field iterator and returns the line's first field (the line type field). */
151 const char *firstField();
153 UBool
parseProperty(UniProps
&props
, const char *field
, UnicodeSet
&newValues
,
154 UErrorCode
&errorCode
);
155 UChar32
parseCodePoint(const char *s
, UErrorCode
&errorCode
);
156 UBool
parseCodePointRange(const char *s
, UChar32
&start
, UChar32
&end
, UErrorCode
&errorCode
);
157 void parseString(const char *s
, UnicodeString
&uni
, UErrorCode
&errorCode
);
158 void parseScriptExtensions(const char *s
, UnicodeSet
&scx
, UErrorCode
&errorCode
);
160 static const int32_t kNumLineBuffers
=3;
162 PropertyNames
*icuPnames
; // owned
163 const PropertyNames
*pnames
; // aliased
165 int32_t defaultLineIndex
, blockLineIndex
, lineIndex
;
171 UVersionInfo ucdVersion
;
172 UniProps defaultProps
, blockProps
, cpProps
;
173 UnicodeSet blockValues
;
174 // Multiple lines so that default and block properties can maintain pointers
175 // into their line buffers.
176 char lines
[kNumLineBuffers
][4096];
181 #endif // __PPUCD_H__