]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/unicode/ucol.h
ICU-3.13.tar.gz
[apple/icu.git] / icuSources / i18n / unicode / ucol.h
1 /*
2 *******************************************************************************
3 * Copyright (c) 1996-2003, International Business Machines Corporation and others.
4 * All Rights Reserved.
5 *******************************************************************************
6 */
7
8 #ifndef UCOL_H
9 #define UCOL_H
10
11 #include "unicode/utypes.h"
12
13 #if !UCONFIG_NO_COLLATION
14
15 #include "unicode/unorm.h"
16 #include "unicode/parseerr.h"
17 #include "unicode/uloc.h"
18 #include "unicode/uset.h"
19
20 /**
21 * \file
22 * \brief C API: Collator
23 *
24 * <h2> Collator C API </h2>
25 *
26 * The C API for Collator performs locale-sensitive
27 * string comparison. You use this service to build
28 * searching and sorting routines for natural language text.
29 * <em>Important: </em>The ICU collation service has been reimplemented
30 * in order to achieve better performance and UCA compliance.
31 * For details, see the
32 * <a href="http://oss.software.ibm.com/cvs/icu/~checkout~/icuhtml/design/collation/ICU_collation_design.htm">
33 * collation design document</a>.
34 * <p>
35 * For more information about the collation service see
36 * <a href="http://oss.software.ibm.com/icu/userguide/Collate_Intro.html">the users guide</a>.
37 * <p>
38 * Collation service provides correct sorting orders for most locales supported in ICU.
39 * If specific data for a locale is not available, the orders eventually falls back
40 * to the <a href="http://www.unicode.org/unicode/reports/tr10/">UCA sort order</a>.
41 * <p>
42 * Sort ordering may be customized by providing your own set of rules. For more on
43 * this subject see the
44 * <a href="http://oss.software.ibm.com/icu/userguide/Collate_Customization.html">
45 * Collation customization</a> section of the users guide.
46 * <p>
47 * @see UCollationResult
48 * @see UNormalizationMode
49 * @see UCollationStrength
50 * @see UCollationElements
51 */
52
53 /** A collation element iterator.
54 * For usage in C programs.
55 */
56 struct collIterate;
57 /** structure representing collation element iterator instance
58 * @stable ICU 2.0
59 */
60 typedef struct collIterate collIterate;
61
62 /** A collator.
63 * For usage in C programs.
64 */
65 struct UCollator;
66 /** structure representing a collator object instance
67 * @stable ICU 2.0
68 */
69 typedef struct UCollator UCollator;
70
71
72 /**
73 * UCOL_LESS is returned if source string is compared to be less than target
74 * string in the u_strcoll() method.
75 * UCOL_EQUAL is returned if source string is compared to be equal to target
76 * string in the u_strcoll() method.
77 * UCOL_GREATER is returned if source string is compared to be greater than
78 * target string in the u_strcoll() method.
79 * @see u_strcoll()
80 * <p>
81 * Possible values for a comparison result
82 * @stable ICU 2.0
83 */
84 typedef enum {
85 /** string a == string b */
86 UCOL_EQUAL = 0,
87 /** string a > string b */
88 UCOL_GREATER = 1,
89 /** string a < string b */
90 UCOL_LESS = -1
91 } UCollationResult ;
92
93
94 /** Enum containing attribute values for controling collation behavior.
95 * Here are all the allowable values. Not every attribute can take every value. The only
96 * universal value is UCOL_DEFAULT, which resets the attribute value to the predefined
97 * value for that locale
98 * @stable ICU 2.0
99 */
100 typedef enum {
101 /** accepted by most attributes */
102 UCOL_DEFAULT = -1,
103
104 /** Primary collation strength */
105 UCOL_PRIMARY = 0,
106 /** Secondary collation strength */
107 UCOL_SECONDARY = 1,
108 /** Tertiary collation strength */
109 UCOL_TERTIARY = 2,
110 /** Default collation strength */
111 UCOL_DEFAULT_STRENGTH = UCOL_TERTIARY,
112 UCOL_CE_STRENGTH_LIMIT,
113 /** Quaternary collation strength */
114 UCOL_QUATERNARY=3,
115 /** Identical collation strength */
116 UCOL_IDENTICAL=15,
117 UCOL_STRENGTH_LIMIT,
118
119 /** Turn the feature off - works for UCOL_FRENCH_COLLATION,
120 UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE
121 & UCOL_DECOMPOSITION_MODE*/
122 UCOL_OFF = 16,
123 /** Turn the feature on - works for UCOL_FRENCH_COLLATION,
124 UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE
125 & UCOL_DECOMPOSITION_MODE*/
126 UCOL_ON = 17,
127
128 /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be shifted */
129 UCOL_SHIFTED = 20,
130 /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be non ignorable */
131 UCOL_NON_IGNORABLE = 21,
132
133 /** Valid for UCOL_CASE_FIRST -
134 lower case sorts before upper case */
135 UCOL_LOWER_FIRST = 24,
136 /** upper case sorts before lower case */
137 UCOL_UPPER_FIRST = 25,
138
139 UCOL_ATTRIBUTE_VALUE_COUNT
140
141 } UColAttributeValue;
142
143 /**
144 * Base letter represents a primary difference. Set comparison
145 * level to UCOL_PRIMARY to ignore secondary and tertiary differences.
146 * Use this to set the strength of a Collator object.
147 * Example of primary difference, "abc" &lt; "abd"
148 *
149 * Diacritical differences on the same base letter represent a secondary
150 * difference. Set comparison level to UCOL_SECONDARY to ignore tertiary
151 * differences. Use this to set the strength of a Collator object.
152 * Example of secondary difference, "ä" >> "a".
153 *
154 * Uppercase and lowercase versions of the same character represents a
155 * tertiary difference. Set comparison level to UCOL_TERTIARY to include
156 * all comparison differences. Use this to set the strength of a Collator
157 * object.
158 * Example of tertiary difference, "abc" &lt;&lt;&lt; "ABC".
159 *
160 * Two characters are considered "identical" when they have the same
161 * unicode spellings. UCOL_IDENTICAL.
162 * For example, "ä" == "ä".
163 *
164 * UCollationStrength is also used to determine the strength of sort keys
165 * generated from UCollator objects
166 * These values can be now found in the UColAttributeValue enum.
167 * @stable ICU 2.0
168 **/
169 typedef UColAttributeValue UCollationStrength;
170
171 /** Attributes that collation service understands. All the attributes can take UCOL_DEFAULT
172 * value, as well as the values specific to each one.
173 * @stable ICU 2.0
174 */
175 typedef enum {
176 /** Attribute for direction of secondary weights - used in French.\
177 * Acceptable values are UCOL_ON, which results in secondary weights
178 * being considered backwards and UCOL_OFF which treats secondary
179 * weights in the order they appear.*/
180 UCOL_FRENCH_COLLATION,
181 /** Attribute for handling variable elements.\
182 * Acceptable values are UCOL_NON_IGNORABLE (default)
183 * which treats all the codepoints with non-ignorable
184 * primary weights in the same way,
185 * and UCOL_SHIFTED which causes codepoints with primary
186 * weights that are equal or below the variable top value
187 * to be ignored on primary level and moved to the quaternary
188 * level.*/
189 UCOL_ALTERNATE_HANDLING,
190 /** Controls the ordering of upper and lower case letters.\
191 * Acceptable values are UCOL_OFF (default), which orders
192 * upper and lower case letters in accordance to their tertiary
193 * weights, UCOL_UPPER_FIRST which forces upper case letters to
194 * sort before lower case letters, and UCOL_LOWER_FIRST which does
195 * the opposite. */
196 UCOL_CASE_FIRST,
197 /** Controls whether an extra case level (positioned before the third
198 * level) is generated or not.\ Acceptable values are UCOL_OFF (default),
199 * when case level is not generated, and UCOL_ON which causes the case
200 * level to be generated.\ Contents of the case level are affected by
201 * the value of UCOL_CASE_FIRST attribute.\ A simple way to ignore
202 * accent differences in a string is to set the strength to UCOL_PRIMARY
203 * and enable case level. */
204 UCOL_CASE_LEVEL,
205 /** Controls whether the normalization check and necessary normalizations
206 * are performed.\ When set to UCOL_OFF (default) no normalization check
207 * is performed.\ The correctness of the result is guaranteed only if the
208 * input data is in so-called FCD form (see users manual for more info).\
209 * When set to UCOL_ON, an incremental check is performed to see whether the input data
210 * is in the FCD form.\ If the data is not in the FCD form, incremental
211 * NFD normalization is performed. */
212 UCOL_NORMALIZATION_MODE,
213 /** An alias for UCOL_NORMALIZATION_MODE attribute */
214 UCOL_DECOMPOSITION_MODE = UCOL_NORMALIZATION_MODE,
215 /** The strength attribute.\ Can be either UCOL_PRIMARY, UCOL_SECONDARY,
216 * UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL.\ The usual strength
217 * for most locales (except Japanese) is tertiary.\ Quaternary strength
218 * is useful when combined with shifted setting for alternate handling
219 * attribute and for JIS x 4061 collation, when it is used to distinguish
220 * between Katakana and Hiragana (this is achieved by setting the
221 * UCOL_HIRAGANA_QUATERNARY mode to on.\ Otherwise, quaternary level
222 * is affected only by the number of non ignorable code points in
223 * the string.\ Identical strength is rarely useful, as it amounts
224 * to codepoints of the NFD form of the string. */
225 UCOL_STRENGTH,
226 /** when turned on, this attribute
227 * positions Hiragana before all
228 * non-ignorables on quaternary level
229 * This is a sneaky way to produce JIS
230 * sort order */
231 UCOL_HIRAGANA_QUATERNARY_MODE,
232 /** when turned on, this attribute
233 * generates a collation key
234 * for the numeric value of substrings
235 * of digits. This is a way to get '100'
236 * to sort AFTER '2'.*/
237 UCOL_NUMERIC_COLLATION,
238 UCOL_ATTRIBUTE_COUNT
239 } UColAttribute;
240
241 /** Options for retrieving the rule string
242 * @stable ICU 2.0
243 */
244 typedef enum {
245 /** Retrieve tailoring only */
246 UCOL_TAILORING_ONLY,
247 /** Retrieve UCA rules and tailoring */
248 UCOL_FULL_RULES
249 } UColRuleOption ;
250
251 /**
252 * Open a UCollator for comparing strings.
253 * The UCollator pointer is used in all the calls to the Collation
254 * service. After finished, collator must be disposed of by calling
255 * \Ref{ucol_close}.
256 * @param loc The locale containing the required collation rules.
257 * Special values for locales can be passed in -
258 * if NULL is passed for the locale, the default locale
259 * collation rules will be used. If empty string ("") or
260 * "root" are passed, UCA rules will be used.
261 * @param status A pointer to an UErrorCode to receive any errors
262 * @return A pointer to a UCollator, or 0 if an error occurred.
263 * @see ucol_openRules
264 * @see ucol_safeClone
265 * @see ucol_close
266 * @stable ICU 2.0
267 */
268 U_CAPI UCollator* U_EXPORT2
269 ucol_open(const char *loc, UErrorCode *status);
270
271 /**
272 * Produce an UCollator instance according to the rules supplied.
273 * The rules are used to change the default ordering, defined in the
274 * UCA in a process called tailoring. The resulting UCollator pointer
275 * can be used in the same way as the one obtained by \Ref{ucol_strcoll}.
276 * @param rules A string describing the collation rules. For the syntax
277 * of the rules please see users guide.
278 * @param rulesLength The length of rules, or -1 if null-terminated.
279 * @param normalizationMode The normalization mode: One of
280 * UCOL_OFF (expect the text to not need normalization),
281 * UCOL_ON (normalize), or
282 * UCOL_DEFAULT (set the mode according to the rules)
283 * @param strength The default collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY,
284 * UCOL_TERTIARY, UCOL_IDENTICAL,UCOL_DEFAULT_STRENGTH - can be also set in the rules.
285 * @param parseError A pointer to UParseError to recieve information about errors
286 * occurred during parsing. This argument can currently be set
287 * to NULL, but at users own risk. Please provide a real structure.
288 * @param status A pointer to an UErrorCode to receive any errors
289 * @return A pointer to a UCollator.\ It is not guaranteed that NULL be returned in case
290 * of error - please use status argument to check for errors.
291 * @see ucol_open
292 * @see ucol_safeClone
293 * @see ucol_close
294 * @stable ICU 2.0
295 */
296 U_CAPI UCollator* U_EXPORT2
297 ucol_openRules( const UChar *rules,
298 int32_t rulesLength,
299 UColAttributeValue normalizationMode,
300 UCollationStrength strength,
301 UParseError *parseError,
302 UErrorCode *status);
303
304 /**
305 * Close a UCollator.
306 * Once closed, a UCollator should not be used.\ Every open collator should
307 * be closed.\ Otherwise, a memory leak will result.
308 * @param coll The UCollator to close.
309 * @see ucol_open
310 * @see ucol_openRules
311 * @see ucol_safeClone
312 * @stable ICU 2.0
313 */
314 U_CAPI void U_EXPORT2
315 ucol_close(UCollator *coll);
316
317 /**
318 * Compare two strings.
319 * The strings will be compared using the options already specified.
320 * @param coll The UCollator containing the comparison rules.
321 * @param source The source string.
322 * @param sourceLength The length of source, or -1 if null-terminated.
323 * @param target The target string.
324 * @param targetLength The length of target, or -1 if null-terminated.
325 * @return The result of comparing the strings; one of UCOL_EQUAL,
326 * UCOL_GREATER, UCOL_LESS
327 * @see ucol_greater
328 * @see ucol_greaterOrEqual
329 * @see ucol_equal
330 * @stable ICU 2.0
331 */
332 U_CAPI UCollationResult U_EXPORT2
333 ucol_strcoll( const UCollator *coll,
334 const UChar *source,
335 int32_t sourceLength,
336 const UChar *target,
337 int32_t targetLength);
338
339 /**
340 * Determine if one string is greater than another.
341 * This function is equivalent to \Ref{ucol_strcoll} == UCOL_GREATER
342 * @param coll The UCollator containing the comparison rules.
343 * @param source The source string.
344 * @param sourceLength The length of source, or -1 if null-terminated.
345 * @param target The target string.
346 * @param targetLength The length of target, or -1 if null-terminated.
347 * @return TRUE if source is greater than target, FALSE otherwise.
348 * @see ucol_strcoll
349 * @see ucol_greaterOrEqual
350 * @see ucol_equal
351 * @stable ICU 2.0
352 */
353 U_CAPI UBool U_EXPORT2
354 ucol_greater(const UCollator *coll,
355 const UChar *source, int32_t sourceLength,
356 const UChar *target, int32_t targetLength);
357
358 /**
359 * Determine if one string is greater than or equal to another.
360 * This function is equivalent to \Ref{ucol_strcoll} != UCOL_LESS
361 * @param coll The UCollator containing the comparison rules.
362 * @param source The source string.
363 * @param sourceLength The length of source, or -1 if null-terminated.
364 * @param target The target string.
365 * @param targetLength The length of target, or -1 if null-terminated.
366 * @return TRUE if source is greater than or equal to target, FALSE otherwise.
367 * @see ucol_strcoll
368 * @see ucol_greater
369 * @see ucol_equal
370 * @stable ICU 2.0
371 */
372 U_CAPI UBool U_EXPORT2
373 ucol_greaterOrEqual(const UCollator *coll,
374 const UChar *source, int32_t sourceLength,
375 const UChar *target, int32_t targetLength);
376
377 /**
378 * Compare two strings for equality.
379 * This function is equivalent to \Ref{ucol_strcoll} == UCOL_EQUAL
380 * @param coll The UCollator containing the comparison rules.
381 * @param source The source string.
382 * @param sourceLength The length of source, or -1 if null-terminated.
383 * @param target The target string.
384 * @param targetLength The length of target, or -1 if null-terminated.
385 * @return TRUE if source is equal to target, FALSE otherwise
386 * @see ucol_strcoll
387 * @see ucol_greater
388 * @see ucol_greaterOrEqual
389 * @stable ICU 2.0
390 */
391 U_CAPI UBool U_EXPORT2
392 ucol_equal(const UCollator *coll,
393 const UChar *source, int32_t sourceLength,
394 const UChar *target, int32_t targetLength);
395
396 /**
397 * Compare two UTF-8 encoded trings.
398 * The strings will be compared using the options already specified.
399 * @param coll The UCollator containing the comparison rules.
400 * @param sIter The source string iterator.
401 * @param tIter The target string iterator.
402 * @return The result of comparing the strings; one of UCOL_EQUAL,
403 * UCOL_GREATER, UCOL_LESS
404 * @param status A pointer to an UErrorCode to receive any errors
405 * @see ucol_strcoll
406 * @draft ICU 2.6
407 */
408 U_CAPI UCollationResult U_EXPORT2
409 ucol_strcollIter( const UCollator *coll,
410 UCharIterator *sIter,
411 UCharIterator *tIter,
412 UErrorCode *status);
413
414 /**
415 * Get the collation strength used in a UCollator.
416 * The strength influences how strings are compared.
417 * @param coll The UCollator to query.
418 * @return The collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY,
419 * UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL
420 * @see ucol_setStrength
421 * @stable ICU 2.0
422 */
423 U_CAPI UCollationStrength U_EXPORT2
424 ucol_getStrength(const UCollator *coll);
425
426 /**
427 * Set the collation strength used in a UCollator.
428 * The strength influences how strings are compared.
429 * @param coll The UCollator to set.
430 * @param strength The desired collation strength; one of UCOL_PRIMARY,
431 * UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL, UCOL_DEFAULT
432 * @see ucol_getStrength
433 * @stable ICU 2.0
434 */
435 U_CAPI void U_EXPORT2
436 ucol_setStrength(UCollator *coll,
437 UCollationStrength strength);
438
439 /**
440 * Get the display name for a UCollator.
441 * The display name is suitable for presentation to a user.
442 * @param objLoc The locale of the collator in question.
443 * @param dispLoc The locale for display.
444 * @param result A pointer to a buffer to receive the attribute.
445 * @param resultLength The maximum size of result.
446 * @param status A pointer to an UErrorCode to receive any errors
447 * @return The total buffer size needed; if greater than resultLength,
448 * the output was truncated.
449 * @stable ICU 2.0
450 */
451 U_CAPI int32_t U_EXPORT2
452 ucol_getDisplayName( const char *objLoc,
453 const char *dispLoc,
454 UChar *result,
455 int32_t resultLength,
456 UErrorCode *status);
457
458 /**
459 * Get a locale for which collation rules are available.
460 * A UCollator in a locale returned by this function will perform the correct
461 * collation for the locale.
462 * @param index The index of the desired locale.
463 * @return A locale for which collation rules are available, or 0 if none.
464 * @see ucol_countAvailable
465 * @stable ICU 2.0
466 */
467 U_CAPI const char* U_EXPORT2
468 ucol_getAvailable(int32_t index);
469
470 /**
471 * Determine how many locales have collation rules available.
472 * This function is most useful as determining the loop ending condition for
473 * calls to \Ref{ucol_getAvailable}.
474 * @return The number of locales for which collation rules are available.
475 * @see ucol_getAvailable
476 * @stable ICU 2.0
477 */
478 U_CAPI int32_t U_EXPORT2
479 ucol_countAvailable(void);
480
481 /**
482 * Get the collation rules from a UCollator.
483 * The rules will follow the rule syntax.
484 * @param coll The UCollator to query.
485 * @param length
486 * @return The collation rules.
487 * @stable ICU 2.0
488 */
489 U_CAPI const UChar* U_EXPORT2
490 ucol_getRules( const UCollator *coll,
491 int32_t *length);
492
493 /**
494 * Get a sort key for a string from a UCollator.
495 * Sort keys may be compared using <TT>strcmp</TT>.
496 * @param coll The UCollator containing the collation rules.
497 * @param source The string to transform.
498 * @param sourceLength The length of source, or -1 if null-terminated.
499 * @param result A pointer to a buffer to receive the attribute.
500 * @param resultLength The maximum size of result.
501 * @return The size needed to fully store the sort key..
502 * @see ucol_keyHashCode
503 * @stable ICU 2.0
504 */
505 U_CAPI int32_t U_EXPORT2
506 ucol_getSortKey(const UCollator *coll,
507 const UChar *source,
508 int32_t sourceLength,
509 uint8_t *result,
510 int32_t resultLength);
511
512
513 /** Gets the next count bytes of a sort key. Caller needs
514 * to preserve state array between calls and to provide
515 * the same type of UCharIterator set with the same string.
516 * The destination buffer provided must be big enough to store
517 * the number of requested bytes. Generated sortkey is not
518 * compatible with sortkeys generated using ucol_getSortKey
519 * API, since we don't do any compression. If uncompressed
520 * sortkeys are required, this API can be used.
521 * @param coll The UCollator containing the collation rules.
522 * @param iter UCharIterator containing the string we need
523 * the sort key to be calculated for.
524 * @param state Opaque state of sortkey iteration.
525 * @param dest Buffer to hold the resulting sortkey part
526 * @param count number of sort key bytes required.
527 * @param status error code indicator.
528 * @return the actual number of bytes of a sortkey. It can be
529 * smaller than count if we have reached the end of
530 * the sort key.
531 * @draft ICU 2.6
532 */
533 U_CAPI int32_t U_EXPORT2
534 ucol_nextSortKeyPart(const UCollator *coll,
535 UCharIterator *iter,
536 uint32_t state[2],
537 uint8_t *dest, int32_t count,
538 UErrorCode *status);
539
540 /** enum that is taken by ucol_getBound API
541 * See below for explanation
542 * do not change the values assigned to the
543 * members of this enum. Underlying code
544 * depends on them having these numbers
545 * @stable ICU 2.0
546 */
547 typedef enum {
548 /** lower bound */
549 UCOL_BOUND_LOWER = 0,
550 /** upper bound that will match strings of exact size */
551 UCOL_BOUND_UPPER = 1,
552 /** upper bound that will match all the strings that have the same initial substring as the given string */
553 UCOL_BOUND_UPPER_LONG = 2,
554 UCOL_BOUND_VALUE_COUNT
555 } UColBoundMode;
556
557 /**
558 * Produce a bound for a given sortkey and a number of levels.
559 * Return value is always the number of bytes needed, regardless of
560 * whether the result buffer was big enough or even valid.<br>
561 * Resulting bounds can be used to produce a range of strings that are
562 * between upper and lower bounds. For example, if bounds are produced
563 * for a sortkey of string "smith", strings between upper and lower
564 * bounds with one level would include "Smith", "SMITH", "sMiTh".<br>
565 * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER
566 * is produced, strings matched would be as above. However, if bound
567 * produced using UCOL_BOUND_UPPER_LONG is used, the above example will
568 * also match "Smithsonian" and similar.<br>
569 * For more on usage, see example in cintltst/capitst.c in procedure
570 * TestBounds.
571 * Sort keys may be compared using <TT>strcmp</TT>.
572 * @param source The source sortkey.
573 * @param sourceLength The length of source, or -1 if null-terminated.
574 * (If an unmodified sortkey is passed, it is always null
575 * terminated).
576 * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which
577 * produces a lower inclusive bound, UCOL_BOUND_UPPER, that
578 * produces upper bound that matches strings of the same length
579 * or UCOL_BOUND_UPPER_LONG that matches strings that have the
580 * same starting substring as the source string.
581 * @param noOfLevels Number of levels required in the resulting bound (for most
582 * uses, the recommended value is 1). See users guide for
583 * explanation on number of levels a sortkey can have.
584 * @param result A pointer to a buffer to receive the resulting sortkey.
585 * @param resultLength The maximum size of result.
586 * @param status Used for returning error code if something went wrong. If the
587 * number of levels requested is higher than the number of levels
588 * in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is
589 * issued.
590 * @return The size needed to fully store the bound.
591 * @see ucol_keyHashCode
592 * @stable ICU 2.1
593 */
594 U_CAPI int32_t U_EXPORT2
595 ucol_getBound(const uint8_t *source,
596 int32_t sourceLength,
597 UColBoundMode boundType,
598 uint32_t noOfLevels,
599 uint8_t *result,
600 int32_t resultLength,
601 UErrorCode *status);
602
603 /**
604 * Gets the version information for a Collator. Version is currently
605 * an opaque 32-bit number which depends, among other things, on major
606 * versions of the collator tailoring and UCA.
607 * @param coll The UCollator to query.
608 * @param info the version # information, the result will be filled in
609 * @stable ICU 2.0
610 */
611 U_CAPI void U_EXPORT2
612 ucol_getVersion(const UCollator* coll, UVersionInfo info);
613
614
615 /**
616 * Merge two sort keys. The levels are merged with their corresponding counterparts
617 * (primaries with primaries, secondaries with secondaries etc.). Between the values
618 * from the same level a separator is inserted.
619 * example (uncompressed):
620 * 191B1D 01 050505 01 910505 00 and 1F2123 01 050505 01 910505 00
621 * will be merged as
622 * 191B1D 02 1F212301 050505 02 050505 01 910505 02 910505 00
623 * This allows for concatenating of first and last names for sorting, among other things.
624 * If the destination buffer is not big enough, the results are undefined.
625 * If any of source lengths are zero or any of source pointers are NULL/undefined,
626 * result is of size zero.
627 * @param src1 pointer to the first sortkey
628 * @param src1Length length of the first sortkey
629 * @param src2 pointer to the second sortkey
630 * @param src2Length length of the second sortkey
631 * @param dest buffer to hold the result
632 * @param destCapacity size of the buffer for the result
633 * @return size of the result. If the buffer is big enough size is always
634 * src1Length+src2Length-1
635 * @stable ICU 2.0
636 */
637 U_CAPI int32_t U_EXPORT2
638 ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length,
639 const uint8_t *src2, int32_t src2Length,
640 uint8_t *dest, int32_t destCapacity);
641
642 /**
643 * Universal attribute setter
644 * @param coll collator which attributes are to be changed
645 * @param attr attribute type
646 * @param value attribute value
647 * @param status to indicate whether the operation went on smoothly or there were errors
648 * @see UColAttribute
649 * @see UColAttributeValue
650 * @see ucol_getAttribute
651 * @stable ICU 2.0
652 */
653 U_CAPI void U_EXPORT2
654 ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status);
655
656 /**
657 * Universal attribute getter
658 * @param coll collator which attributes are to be changed
659 * @param attr attribute type
660 * @return attribute value
661 * @param status to indicate whether the operation went on smoothly or there were errors
662 * @see UColAttribute
663 * @see UColAttributeValue
664 * @see ucol_setAttribute
665 * @stable ICU 2.0
666 */
667 U_CAPI UColAttributeValue U_EXPORT2
668 ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status);
669
670 /** Variable top
671 * is a two byte primary value which causes all the codepoints with primary values that
672 * are less or equal than the variable top to be shifted when alternate handling is set
673 * to UCOL_SHIFTED.
674 * Sets the variable top to a collation element value of a string supplied.
675 * @param coll collator which variable top needs to be changed
676 * @param varTop one or more (if contraction) UChars to which the variable top should be set
677 * @param len length of variable top string. If -1 it is considered to be zero terminated.
678 * @param status error code. If error code is set, the return value is undefined.
679 * Errors set by this function are: <br>
680 * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such
681 * a contraction<br>
682 * U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
683 * @return a 32 bit value containing the value of the variable top in upper 16 bits.
684 * Lower 16 bits are undefined
685 * @see ucol_getVariableTop
686 * @see ucol_restoreVariableTop
687 * @stable ICU 2.0
688 */
689 U_CAPI uint32_t U_EXPORT2
690 ucol_setVariableTop(UCollator *coll,
691 const UChar *varTop, int32_t len,
692 UErrorCode *status);
693
694 /**
695 * Gets the variable top value of a Collator.
696 * Lower 16 bits are undefined and should be ignored.
697 * @param coll collator which variable top needs to be retrieved
698 * @param status error code (not changed by function). If error code is set,
699 * the return value is undefined.
700 * @return the variable top value of a Collator.
701 * @see ucol_setVariableTop
702 * @see ucol_restoreVariableTop
703 * @stable ICU 2.0
704 */
705 U_CAPI uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status);
706
707 /**
708 * Sets the variable top to a collation element value supplied. Variable top is
709 * set to the upper 16 bits.
710 * Lower 16 bits are ignored.
711 * @param coll collator which variable top needs to be changed
712 * @param varTop CE value, as returned by ucol_setVariableTop or ucol)getVariableTop
713 * @param status error code (not changed by function)
714 * @see ucol_getVariableTop
715 * @see ucol_setVariableTop
716 * @stable ICU 2.0
717 */
718 U_CAPI void U_EXPORT2
719 ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status);
720
721 /**
722 * Thread safe cloning operation. The result is a clone of a given collator.
723 * @param coll collator to be cloned
724 * @param stackBuffer user allocated space for the new clone.
725 * If NULL new memory will be allocated.
726 * If buffer is not large enough, new memory will be allocated.
727 * Clients can use the U_COL_SAFECLONE_BUFFERSIZE.
728 * This will probably be enough to avoid memory allocations.
729 * @param pBufferSize pointer to size of allocated space.
730 * If *pBufferSize == 0, a sufficient size for use in cloning will
731 * be returned ('pre-flighting')
732 * If *pBufferSize is not enough for a stack-based safe clone,
733 * new memory will be allocated.
734 * @param status to indicate whether the operation went on smoothly or there were errors
735 * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any
736 * allocations were necessary.
737 * @return pointer to the new clone
738 * @see ucol_open
739 * @see ucol_openRules
740 * @see ucol_close
741 * @stable ICU 2.0
742 */
743 U_CAPI UCollator* U_EXPORT2
744 ucol_safeClone(const UCollator *coll,
745 void *stackBuffer,
746 int32_t *pBufferSize,
747 UErrorCode *status);
748
749 /** default memory size for the new clone. It needs to be this large for os/400 large pointers
750 * @stable ICU 2.0
751 */
752 #define U_COL_SAFECLONE_BUFFERSIZE 512
753
754 /**
755 * Returns current rules. Delta defines whether full rules are returned or just the tailoring.
756 * Returns number of UChars needed to store rules. If buffer is NULL or bufferLen is not enough
757 * to store rules, will store up to available space.
758 * @param coll collator to get the rules from
759 * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES.
760 * @param buffer buffer to store the result in. If NULL, you'll get no rules.
761 * @param bufferLen lenght of buffer to store rules in. If less then needed you'll get only the part that fits in.
762 * @return current rules
763 * @stable ICU 2.0
764 */
765 U_CAPI int32_t U_EXPORT2
766 ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen);
767
768 /**
769 * gets the locale name of the collator. If the collator
770 * is instantiated from the rules, then this function returns
771 * NULL.
772 * @param coll The UCollator for which the locale is needed
773 * @param type You can choose between requested, valid and actual
774 * locale. For description see the definition of
775 * ULocDataLocaleType in uloc.h
776 * @param status error code of the operation
777 * @return real locale name from which the collation data comes.
778 * If the collator was instantiated from rules, returns
779 * NULL.
780 * @stable ICU 2.1
781 */
782 U_CAPI const char * U_EXPORT2
783 ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status);
784
785
786 /**
787 * Get an Unicode set that contains all the characters and sequences tailored in
788 * this collator. The result must be disposed of by using uset_close.
789 * @param coll The UCollator for which we want to get tailored chars
790 * @param status error code of the operation
791 * @return a pointer to newly created USet. Must be be disposed by using uset_close
792 * @see ucol_openRules
793 * @see uset_close
794 * @draft ICU 2.4
795 */
796 U_CAPI USet * U_EXPORT2
797 ucol_getTailoredSet(const UCollator *coll, UErrorCode *status);
798
799 #endif /* #if !UCONFIG_NO_COLLATION */
800
801 #endif