git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/common/unicode/ustring.h

... / ...

Commit	Line	Data
	1	/*
	2	**********************************************************************
	3	* Copyright (C) 1998-2008, International Business Machines
	4	* Corporation and others. All Rights Reserved.
	5	**********************************************************************
	6	*
	7	* File ustring.h
	8	*
	9	* Modification History:
	10	*
	11	* Date Name Description
	12	* 12/07/98 bertrand Creation.
	13	******************************************************************************
	14	*/
	15
	16	#ifndef USTRING_H
	17	#define USTRING_H
	18
	19	#include "unicode/utypes.h"
	20	#include "unicode/putil.h"
	21	#include "unicode/uiter.h"
	22
	23	/** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. @stable ICU 2.1*/
	24	#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
	25	# define UBRK_TYPEDEF_UBREAK_ITERATOR
	26	typedef void UBreakIterator;
	27	#endif
	28
	29	/**
	30	* \file
	31	* \brief C API: Unicode string handling functions
	32	*
	33	* These C API functions provide general Unicode string handling.
	34	*
	35	* Some functions are equivalent in name, signature, and behavior to the ANSI C <string.h>
	36	* functions. (For example, they do not check for bad arguments like NULL string pointers.)
	37	* In some cases, only the thread-safe variant of such a function is implemented here
	38	* (see u_strtok_r()).
	39	*
	40	* Other functions provide more Unicode-specific functionality like locale-specific
	41	* upper/lower-casing and string comparison in code point order.
	42	*
	43	* ICU uses 16-bit Unicode (UTF-16) in the form of arrays of UChar code units.
	44	* UTF-16 encodes each Unicode code point with either one or two UChar code units.
	45	* (This is the default form of Unicode, and a forward-compatible extension of the original,
	46	* fixed-width form that was known as UCS-2. UTF-16 superseded UCS-2 with Unicode 2.0
	47	* in 1996.)
	48	*
	49	* Some APIs accept a 32-bit UChar32 value for a single code point.
	50	*
	51	* ICU also handles 16-bit Unicode text with unpaired surrogates.
	52	* Such text is not well-formed UTF-16.
	53	* Code-point-related functions treat unpaired surrogates as surrogate code points,
	54	* i.e., as separate units.
	55	*
	56	* Although UTF-16 is a variable-width encoding form (like some legacy multi-byte encodings),
	57	* it is much more efficient even for random access because the code unit values
	58	* for single-unit characters vs. lead units vs. trail units are completely disjoint.
	59	* This means that it is easy to determine character (code point) boundaries from
	60	* random offsets in the string.
	61	*
	62	* Unicode (UTF-16) string processing is optimized for the single-unit case.
	63	* Although it is important to support supplementary characters
	64	* (which use pairs of lead/trail code units called "surrogates"),
	65	* their occurrence is rare. Almost all characters in modern use require only
	66	* a single UChar code unit (i.e., their code point values are <=0xffff).
	67	*
	68	* For more details see the User Guide Strings chapter (http://icu-project.org/userguide/strings.html).
	69	* For a discussion of the handling of unpaired surrogates see also
	70	* Jitterbug 2145 and its icu mailing list proposal on 2002-sep-18.
	71	*/
	72
	73	/**
	74	* \defgroup ustring_ustrlen String Length
	75	* \ingroup ustring_strlen
	76	*/
	77	/@{/
	78	/**
	79	* Determine the length of an array of UChar.
	80	*
	81	* @param s The array of UChars, NULL (U+0000) terminated.
	82	* @return The number of UChars in <code>chars</code>, minus the terminator.
	83	* @stable ICU 2.0
	84	*/
	85	U_STABLE int32_t U_EXPORT2
	86	u_strlen(const UChar *s);
	87	/@}/
	88
	89	/**
	90	* Count Unicode code points in the length UChar code units of the string.
	91	* A code point may occupy either one or two UChar code units.
	92	* Counting code points involves reading all code units.
	93	*
	94	* This functions is basically the inverse of the U16_FWD_N() macro (see utf.h).
	95	*
	96	* @param s The input string.
	97	* @param length The number of UChar code units to be checked, or -1 to count all
	98	* code points before the first NUL (U+0000).
	99	* @return The number of code points in the specified code units.
	100	* @stable ICU 2.0
	101	*/
	102	U_STABLE int32_t U_EXPORT2
	103	u_countChar32(const UChar *s, int32_t length);
	104
	105	/**
	106	* Check if the string contains more Unicode code points than a certain number.
	107	* This is more efficient than counting all code points in the entire string
	108	* and comparing that number with a threshold.
	109	* This function may not need to scan the string at all if the length is known
	110	* (not -1 for NUL-termination) and falls within a certain range, and
	111	* never needs to count more than 'number+1' code points.
	112	* Logically equivalent to (u_countChar32(s, length)>number).
	113	* A Unicode code point may occupy either one or two UChar code units.
	114	*
	115	* @param s The input string.
	116	* @param length The length of the string, or -1 if it is NUL-terminated.
	117	* @param number The number of code points in the string is compared against
	118	* the 'number' parameter.
	119	* @return Boolean value for whether the string contains more Unicode code points
	120	* than 'number'. Same as (u_countChar32(s, length)>number).
	121	* @stable ICU 2.4
	122	*/
	123	U_STABLE UBool U_EXPORT2
	124	u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number);
	125
	126	/**
	127	* Concatenate two ustrings. Appends a copy of <code>src</code>,
	128	* including the null terminator, to <code>dst</code>. The initial copied
	129	* character from <code>src</code> overwrites the null terminator in <code>dst</code>.
	130	*
	131	* @param dst The destination string.
	132	* @param src The source string.
	133	* @return A pointer to <code>dst</code>.
	134	* @stable ICU 2.0
	135	*/
	136	U_STABLE UChar* U_EXPORT2
	137	u_strcat(UChar *dst,
	138	const UChar *src);
	139
	140	/**
	141	* Concatenate two ustrings.
	142	* Appends at most <code>n</code> characters from <code>src</code> to <code>dst</code>.
	143	* Adds a terminating NUL.
	144	* If src is too long, then only <code>n-1</code> characters will be copied
	145	* before the terminating NUL.
	146	* If <code>n<=0</code> then dst is not modified.
	147	*
	148	* @param dst The destination string.
	149	* @param src The source string.
	150	* @param n The maximum number of characters to compare.
	151	* @return A pointer to <code>dst</code>.
	152	* @stable ICU 2.0
	153	*/
	154	U_STABLE UChar* U_EXPORT2
	155	u_strncat(UChar *dst,
	156	const UChar *src,
	157	int32_t n);
	158
	159	/**
	160	* Find the first occurrence of a substring in a string.
	161	* The substring is found at code point boundaries.
	162	* That means that if the substring begins with
	163	* a trail surrogate or ends with a lead surrogate,
	164	* then it is found only if these surrogates stand alone in the text.
	165	* Otherwise, the substring edge units would be matched against
	166	* halves of surrogate pairs.
	167	*
	168	* @param s The string to search (NUL-terminated).
	169	* @param substring The substring to find (NUL-terminated).
	170	* @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,
	171	* or <code>s</code> itself if the <code>substring</code> is empty,
	172	* or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
	173	* @stable ICU 2.0
	174	*
	175	* @see u_strrstr
	176	* @see u_strFindFirst
	177	* @see u_strFindLast
	178	*/
	179	U_STABLE UChar * U_EXPORT2
	180	u_strstr(const UChar s, const UChar substring);
	181
	182	/**
	183	* Find the first occurrence of a substring in a string.
	184	* The substring is found at code point boundaries.
	185	* That means that if the substring begins with
	186	* a trail surrogate or ends with a lead surrogate,
	187	* then it is found only if these surrogates stand alone in the text.
	188	* Otherwise, the substring edge units would be matched against
	189	* halves of surrogate pairs.
	190	*
	191	* @param s The string to search.
	192	* @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
	193	* @param substring The substring to find (NUL-terminated).
	194	* @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
	195	* @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,
	196	* or <code>s</code> itself if the <code>substring</code> is empty,
	197	* or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
	198	* @stable ICU 2.4
	199	*
	200	* @see u_strstr
	201	* @see u_strFindLast
	202	*/
	203	U_STABLE UChar * U_EXPORT2
	204	u_strFindFirst(const UChar s, int32_t length, const UChar substring, int32_t subLength);
	205
	206	/**
	207	* Find the first occurrence of a BMP code point in a string.
	208	* A surrogate code point is found only if its match in the text is not
	209	* part of a surrogate pair.
	210	* A NUL character is found at the string terminator.
	211	*
	212	* @param s The string to search (NUL-terminated).
	213	* @param c The BMP code point to find.
	214	* @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
	215	* or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
	216	* @stable ICU 2.0
	217	*
	218	* @see u_strchr32
	219	* @see u_memchr
	220	* @see u_strstr
	221	* @see u_strFindFirst
	222	*/
	223	U_STABLE UChar * U_EXPORT2
	224	u_strchr(const UChar *s, UChar c);
	225
	226	/**
	227	* Find the first occurrence of a code point in a string.
	228	* A surrogate code point is found only if its match in the text is not
	229	* part of a surrogate pair.
	230	* A NUL character is found at the string terminator.
	231	*
	232	* @param s The string to search (NUL-terminated).
	233	* @param c The code point to find.
	234	* @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
	235	* or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
	236	* @stable ICU 2.0
	237	*
	238	* @see u_strchr
	239	* @see u_memchr32
	240	* @see u_strstr
	241	* @see u_strFindFirst
	242	*/
	243	U_STABLE UChar * U_EXPORT2
	244	u_strchr32(const UChar *s, UChar32 c);
	245
	246	/**
	247	* Find the last occurrence of a substring in a string.
	248	* The substring is found at code point boundaries.
	249	* That means that if the substring begins with
	250	* a trail surrogate or ends with a lead surrogate,
	251	* then it is found only if these surrogates stand alone in the text.
	252	* Otherwise, the substring edge units would be matched against
	253	* halves of surrogate pairs.
	254	*
	255	* @param s The string to search (NUL-terminated).
	256	* @param substring The substring to find (NUL-terminated).
	257	* @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,
	258	* or <code>s</code> itself if the <code>substring</code> is empty,
	259	* or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
	260	* @stable ICU 2.4
	261	*
	262	* @see u_strstr
	263	* @see u_strFindFirst
	264	* @see u_strFindLast
	265	*/
	266	U_STABLE UChar * U_EXPORT2
	267	u_strrstr(const UChar s, const UChar substring);
	268
	269	/**
	270	* Find the last occurrence of a substring in a string.
	271	* The substring is found at code point boundaries.
	272	* That means that if the substring begins with
	273	* a trail surrogate or ends with a lead surrogate,
	274	* then it is found only if these surrogates stand alone in the text.
	275	* Otherwise, the substring edge units would be matched against
	276	* halves of surrogate pairs.
	277	*
	278	* @param s The string to search.
	279	* @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
	280	* @param substring The substring to find (NUL-terminated).
	281	* @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
	282	* @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,
	283	* or <code>s</code> itself if the <code>substring</code> is empty,
	284	* or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
	285	* @stable ICU 2.4
	286	*
	287	* @see u_strstr
	288	* @see u_strFindLast
	289	*/
	290	U_STABLE UChar * U_EXPORT2
	291	u_strFindLast(const UChar s, int32_t length, const UChar substring, int32_t subLength);
	292
	293	/**
	294	* Find the last occurrence of a BMP code point in a string.
	295	* A surrogate code point is found only if its match in the text is not
	296	* part of a surrogate pair.
	297	* A NUL character is found at the string terminator.
	298	*
	299	* @param s The string to search (NUL-terminated).
	300	* @param c The BMP code point to find.
	301	* @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
	302	* or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
	303	* @stable ICU 2.4
	304	*
	305	* @see u_strrchr32
	306	* @see u_memrchr
	307	* @see u_strrstr
	308	* @see u_strFindLast
	309	*/
	310	U_STABLE UChar * U_EXPORT2
	311	u_strrchr(const UChar *s, UChar c);
	312
	313	/**
	314	* Find the last occurrence of a code point in a string.
	315	* A surrogate code point is found only if its match in the text is not
	316	* part of a surrogate pair.
	317	* A NUL character is found at the string terminator.
	318	*
	319	* @param s The string to search (NUL-terminated).
	320	* @param c The code point to find.
	321	* @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
	322	* or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
	323	* @stable ICU 2.4
	324	*
	325	* @see u_strrchr
	326	* @see u_memchr32
	327	* @see u_strrstr
	328	* @see u_strFindLast
	329	*/
	330	U_STABLE UChar * U_EXPORT2
	331	u_strrchr32(const UChar *s, UChar32 c);
	332
	333	/**
	334	* Locates the first occurrence in the string <code>string</code> of any of the characters
	335	* in the string <code>matchSet</code>.
	336	* Works just like C's strpbrk but with Unicode.
	337	*
	338	* @param string The string in which to search, NUL-terminated.
	339	* @param matchSet A NUL-terminated string defining a set of code points
	340	* for which to search in the text string.
	341	* @return A pointer to the character in <code>string</code> that matches one of the
	342	* characters in <code>matchSet</code>, or NULL if no such character is found.
	343	* @stable ICU 2.0
	344	*/
	345	U_STABLE UChar * U_EXPORT2
	346	u_strpbrk(const UChar string, const UChar matchSet);
	347
	348	/**
	349	* Returns the number of consecutive characters in <code>string</code>,
	350	* beginning with the first, that do not occur somewhere in <code>matchSet</code>.
	351	* Works just like C's strcspn but with Unicode.
	352	*
	353	* @param string The string in which to search, NUL-terminated.
	354	* @param matchSet A NUL-terminated string defining a set of code points
	355	* for which to search in the text string.
	356	* @return The number of initial characters in <code>string</code> that do not
	357	* occur in <code>matchSet</code>.
	358	* @see u_strspn
	359	* @stable ICU 2.0
	360	*/
	361	U_STABLE int32_t U_EXPORT2
	362	u_strcspn(const UChar string, const UChar matchSet);
	363
	364	/**
	365	* Returns the number of consecutive characters in <code>string</code>,
	366	* beginning with the first, that occur somewhere in <code>matchSet</code>.
	367	* Works just like C's strspn but with Unicode.
	368	*
	369	* @param string The string in which to search, NUL-terminated.
	370	* @param matchSet A NUL-terminated string defining a set of code points
	371	* for which to search in the text string.
	372	* @return The number of initial characters in <code>string</code> that do
	373	* occur in <code>matchSet</code>.
	374	* @see u_strcspn
	375	* @stable ICU 2.0
	376	*/
	377	U_STABLE int32_t U_EXPORT2
	378	u_strspn(const UChar string, const UChar matchSet);
	379
	380	/**
	381	* The string tokenizer API allows an application to break a string into
	382	* tokens. Unlike strtok(), the saveState (the current pointer within the
	383	* original string) is maintained in saveState. In the first call, the
	384	* argument src is a pointer to the string. In subsequent calls to
	385	* return successive tokens of that string, src must be specified as
	386	* NULL. The value saveState is set by this function to maintain the
	387	* function's position within the string, and on each subsequent call
	388	* you must give this argument the same variable. This function does
	389	* handle surrogate pairs. This function is similar to the strtok_r()
	390	* the POSIX Threads Extension (1003.1c-1995) version.
	391	*
	392	* @param src String containing token(s). This string will be modified.
	393	* After the first call to u_strtok_r(), this argument must
	394	* be NULL to get to the next token.
	395	* @param delim Set of delimiter characters (Unicode code points).
	396	* @param saveState The current pointer within the original string,
	397	* which is set by this function. The saveState
	398	* parameter should the address of a local variable of type
	399	* UChar . (i.e. defined "Uhar myLocalSaveState" and use
	400	* &myLocalSaveState for this parameter).
	401	* @return A pointer to the next token found in src, or NULL
	402	* when there are no more tokens.
	403	* @stable ICU 2.0
	404	*/
	405	U_STABLE UChar * U_EXPORT2
	406	u_strtok_r(UChar *src,
	407	const UChar *delim,
	408	UChar **saveState);
	409
	410	/**
	411	* Compare two Unicode strings for bitwise equality (code unit order).
	412	*
	413	* @param s1 A string to compare.
	414	* @param s2 A string to compare.
	415	* @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative
	416	* value if <code>s1</code> is bitwise less than <code>s2,</code>; a positive
	417	* value if <code>s1</code> is bitwise greater than <code>s2</code>.
	418	* @stable ICU 2.0
	419	*/
	420	U_STABLE int32_t U_EXPORT2
	421	u_strcmp(const UChar *s1,
	422	const UChar *s2);
	423
	424	/**
	425	* Compare two Unicode strings in code point order.
	426	* See u_strCompare for details.
	427	*
	428	* @param s1 A string to compare.
	429	* @param s2 A string to compare.
	430	* @return a negative/zero/positive integer corresponding to whether
	431	* the first string is less than/equal to/greater than the second one
	432	* in code point order
	433	* @stable ICU 2.0
	434	*/
	435	U_STABLE int32_t U_EXPORT2
	436	u_strcmpCodePointOrder(const UChar s1, const UChar s2);
	437
	438	/**
	439	* Compare two Unicode strings (binary order).
	440	*
	441	* The comparison can be done in code unit order or in code point order.
	442	* They differ only in UTF-16 when
	443	* comparing supplementary code points (U+10000..U+10ffff)
	444	* to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
	445	* In code unit order, high BMP code points sort after supplementary code points
	446	* because they are stored as pairs of surrogates which are at U+d800..U+dfff.
	447	*
	448	* This functions works with strings of different explicitly specified lengths
	449	* unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
	450	* NUL-terminated strings are possible with length arguments of -1.
	451	*
	452	* @param s1 First source string.
	453	* @param length1 Length of first source string, or -1 if NUL-terminated.
	454	*
	455	* @param s2 Second source string.
	456	* @param length2 Length of second source string, or -1 if NUL-terminated.
	457	*
	458	* @param codePointOrder Choose between code unit order (FALSE)
	459	* and code point order (TRUE).
	460	*
	461	* @return <0 or 0 or >0 as usual for string comparisons
	462	*
	463	* @stable ICU 2.2
	464	*/
	465	U_STABLE int32_t U_EXPORT2
	466	u_strCompare(const UChar *s1, int32_t length1,
	467	const UChar *s2, int32_t length2,
	468	UBool codePointOrder);
	469
	470	/**
	471	* Compare two Unicode strings (binary order)
	472	* as presented by UCharIterator objects.
	473	* Works otherwise just like u_strCompare().
	474	*
	475	* Both iterators are reset to their start positions.
	476	* When the function returns, it is undefined where the iterators
	477	* have stopped.
	478	*
	479	* @param iter1 First source string iterator.
	480	* @param iter2 Second source string iterator.
	481	* @param codePointOrder Choose between code unit order (FALSE)
	482	* and code point order (TRUE).
	483	*
	484	* @return <0 or 0 or >0 as usual for string comparisons
	485	*
	486	* @see u_strCompare
	487	*
	488	* @stable ICU 2.6
	489	*/
	490	U_STABLE int32_t U_EXPORT2
	491	u_strCompareIter(UCharIterator iter1, UCharIterator iter2, UBool codePointOrder);
	492
	493	#ifndef U_COMPARE_CODE_POINT_ORDER
	494	/* see also unistr.h and unorm.h */
	495	/**
	496	* Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
	497	* Compare strings in code point order instead of code unit order.
	498	* @stable ICU 2.2
	499	*/
	500	#define U_COMPARE_CODE_POINT_ORDER 0x8000
	501	#endif
	502
	503	/**
	504	* Compare two strings case-insensitively using full case folding.
	505	* This is equivalent to
	506	* u_strCompare(u_strFoldCase(s1, options),
	507	* u_strFoldCase(s2, options),
	508	* (options&U_COMPARE_CODE_POINT_ORDER)!=0).
	509	*
	510	* The comparison can be done in UTF-16 code unit order or in code point order.
	511	* They differ only when comparing supplementary code points (U+10000..U+10ffff)
	512	* to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
	513	* In code unit order, high BMP code points sort after supplementary code points
	514	* because they are stored as pairs of surrogates which are at U+d800..U+dfff.
	515	*
	516	* This functions works with strings of different explicitly specified lengths
	517	* unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
	518	* NUL-terminated strings are possible with length arguments of -1.
	519	*
	520	* @param s1 First source string.
	521	* @param length1 Length of first source string, or -1 if NUL-terminated.
	522	*
	523	* @param s2 Second source string.
	524	* @param length2 Length of second source string, or -1 if NUL-terminated.
	525	*
	526	* @param options A bit set of options:
	527	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
	528	* Comparison in code unit order with default case folding.
	529	*
	530	* - U_COMPARE_CODE_POINT_ORDER
	531	* Set to choose code point order instead of code unit order
	532	* (see u_strCompare for details).
	533	*
	534	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
	535	*
	536	* @param pErrorCode Must be a valid pointer to an error code value,
	537	* which must not indicate a failure before the function call.
	538	*
	539	* @return <0 or 0 or >0 as usual for string comparisons
	540	*
	541	* @stable ICU 2.2
	542	*/
	543	U_STABLE int32_t U_EXPORT2
	544	u_strCaseCompare(const UChar *s1, int32_t length1,
	545	const UChar *s2, int32_t length2,
	546	uint32_t options,
	547	UErrorCode *pErrorCode);
	548
	549	/**
	550	* Compare two ustrings for bitwise equality.
	551	* Compares at most <code>n</code> characters.
	552	*
	553	* @param ucs1 A string to compare.
	554	* @param ucs2 A string to compare.
	555	* @param n The maximum number of characters to compare.
	556	* @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative
	557	* value if <code>s1</code> is bitwise less than <code>s2</code>; a positive
	558	* value if <code>s1</code> is bitwise greater than <code>s2</code>.
	559	* @stable ICU 2.0
	560	*/
	561	U_STABLE int32_t U_EXPORT2
	562	u_strncmp(const UChar *ucs1,
	563	const UChar *ucs2,
	564	int32_t n);
	565
	566	/**
	567	* Compare two Unicode strings in code point order.
	568	* This is different in UTF-16 from u_strncmp() if supplementary characters are present.
	569	* For details, see u_strCompare().
	570	*
	571	* @param s1 A string to compare.
	572	* @param s2 A string to compare.
	573	* @param n The maximum number of characters to compare.
	574	* @return a negative/zero/positive integer corresponding to whether
	575	* the first string is less than/equal to/greater than the second one
	576	* in code point order
	577	* @stable ICU 2.0
	578	*/
	579	U_STABLE int32_t U_EXPORT2
	580	u_strncmpCodePointOrder(const UChar s1, const UChar s2, int32_t n);
	581
	582	/**
	583	* Compare two strings case-insensitively using full case folding.
	584	* This is equivalent to u_strcmp(u_strFoldCase(s1, options), u_strFoldCase(s2, options)).
	585	*
	586	* @param s1 A string to compare.
	587	* @param s2 A string to compare.
	588	* @param options A bit set of options:
	589	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
	590	* Comparison in code unit order with default case folding.
	591	*
	592	* - U_COMPARE_CODE_POINT_ORDER
	593	* Set to choose code point order instead of code unit order
	594	* (see u_strCompare for details).
	595	*
	596	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
	597	*
	598	* @return A negative, zero, or positive integer indicating the comparison result.
	599	* @stable ICU 2.0
	600	*/
	601	U_STABLE int32_t U_EXPORT2
	602	u_strcasecmp(const UChar s1, const UChar s2, uint32_t options);
	603
	604	/**
	605	* Compare two strings case-insensitively using full case folding.
	606	* This is equivalent to u_strcmp(u_strFoldCase(s1, at most n, options),
	607	* u_strFoldCase(s2, at most n, options)).
	608	*
	609	* @param s1 A string to compare.
	610	* @param s2 A string to compare.
	611	* @param n The maximum number of characters each string to case-fold and then compare.
	612	* @param options A bit set of options:
	613	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
	614	* Comparison in code unit order with default case folding.
	615	*
	616	* - U_COMPARE_CODE_POINT_ORDER
	617	* Set to choose code point order instead of code unit order
	618	* (see u_strCompare for details).
	619	*
	620	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
	621	*
	622	* @return A negative, zero, or positive integer indicating the comparison result.
	623	* @stable ICU 2.0
	624	*/
	625	U_STABLE int32_t U_EXPORT2
	626	u_strncasecmp(const UChar s1, const UChar s2, int32_t n, uint32_t options);
	627
	628	/**
	629	* Compare two strings case-insensitively using full case folding.
	630	* This is equivalent to u_strcmp(u_strFoldCase(s1, n, options),
	631	* u_strFoldCase(s2, n, options)).
	632	*
	633	* @param s1 A string to compare.
	634	* @param s2 A string to compare.
	635	* @param length The number of characters in each string to case-fold and then compare.
	636	* @param options A bit set of options:
	637	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
	638	* Comparison in code unit order with default case folding.
	639	*
	640	* - U_COMPARE_CODE_POINT_ORDER
	641	* Set to choose code point order instead of code unit order
	642	* (see u_strCompare for details).
	643	*
	644	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
	645	*
	646	* @return A negative, zero, or positive integer indicating the comparison result.
	647	* @stable ICU 2.0
	648	*/
	649	U_STABLE int32_t U_EXPORT2
	650	u_memcasecmp(const UChar s1, const UChar s2, int32_t length, uint32_t options);
	651
	652	/**
	653	* Copy a ustring. Adds a null terminator.
	654	*
	655	* @param dst The destination string.
	656	* @param src The source string.
	657	* @return A pointer to <code>dst</code>.
	658	* @stable ICU 2.0
	659	*/
	660	U_STABLE UChar* U_EXPORT2
	661	u_strcpy(UChar *dst,
	662	const UChar *src);
	663
	664	/**
	665	* Copy a ustring.
	666	* Copies at most <code>n</code> characters. The result will be null terminated
	667	* if the length of <code>src</code> is less than <code>n</code>.
	668	*
	669	* @param dst The destination string.
	670	* @param src The source string.
	671	* @param n The maximum number of characters to copy.
	672	* @return A pointer to <code>dst</code>.
	673	* @stable ICU 2.0
	674	*/
	675	U_STABLE UChar* U_EXPORT2
	676	u_strncpy(UChar *dst,
	677	const UChar *src,
	678	int32_t n);
	679
	680	#if !UCONFIG_NO_CONVERSION
	681
	682	/**
	683	* Copy a byte string encoded in the default codepage to a ustring.
	684	* Adds a null terminator.
	685	* Performs a host byte to UChar conversion
	686	*
	687	* @param dst The destination string.
	688	* @param src The source string.
	689	* @return A pointer to <code>dst</code>.
	690	* @stable ICU 2.0
	691	*/
	692	U_STABLE UChar* U_EXPORT2 u_uastrcpy(UChar *dst,
	693	const char *src );
	694
	695	/**
	696	* Copy a byte string encoded in the default codepage to a ustring.
	697	* Copies at most <code>n</code> characters. The result will be null terminated
	698	* if the length of <code>src</code> is less than <code>n</code>.
	699	* Performs a host byte to UChar conversion
	700	*
	701	* @param dst The destination string.
	702	* @param src The source string.
	703	* @param n The maximum number of characters to copy.
	704	* @return A pointer to <code>dst</code>.
	705	* @stable ICU 2.0
	706	*/
	707	U_STABLE UChar* U_EXPORT2 u_uastrncpy(UChar *dst,
	708	const char *src,
	709	int32_t n);
	710
	711	/**
	712	* Copy ustring to a byte string encoded in the default codepage.
	713	* Adds a null terminator.
	714	* Performs a UChar to host byte conversion
	715	*
	716	* @param dst The destination string.
	717	* @param src The source string.
	718	* @return A pointer to <code>dst</code>.
	719	* @stable ICU 2.0
	720	*/
	721	U_STABLE char* U_EXPORT2 u_austrcpy(char *dst,
	722	const UChar *src );
	723
	724	/**
	725	* Copy ustring to a byte string encoded in the default codepage.
	726	* Copies at most <code>n</code> characters. The result will be null terminated
	727	* if the length of <code>src</code> is less than <code>n</code>.
	728	* Performs a UChar to host byte conversion
	729	*
	730	* @param dst The destination string.
	731	* @param src The source string.
	732	* @param n The maximum number of characters to copy.
	733	* @return A pointer to <code>dst</code>.
	734	* @stable ICU 2.0
	735	*/
	736	U_STABLE char* U_EXPORT2 u_austrncpy(char *dst,
	737	const UChar *src,
	738	int32_t n );
	739
	740	#endif
	741
	742	/**
	743	* Synonym for memcpy(), but with UChars only.
	744	* @param dest The destination string
	745	* @param src The source string
	746	* @param count The number of characters to copy
	747	* @return A pointer to <code>dest</code>
	748	* @stable ICU 2.0
	749	*/
	750	U_STABLE UChar* U_EXPORT2
	751	u_memcpy(UChar dest, const UChar src, int32_t count);
	752
	753	/**
	754	* Synonym for memmove(), but with UChars only.
	755	* @param dest The destination string
	756	* @param src The source string
	757	* @param count The number of characters to move
	758	* @return A pointer to <code>dest</code>
	759	* @stable ICU 2.0
	760	*/
	761	U_STABLE UChar* U_EXPORT2
	762	u_memmove(UChar dest, const UChar src, int32_t count);
	763
	764	/**
	765	* Initialize <code>count</code> characters of <code>dest</code> to <code>c</code>.
	766	*
	767	* @param dest The destination string.
	768	* @param c The character to initialize the string.
	769	* @param count The maximum number of characters to set.
	770	* @return A pointer to <code>dest</code>.
	771	* @stable ICU 2.0
	772	*/
	773	U_STABLE UChar* U_EXPORT2
	774	u_memset(UChar *dest, UChar c, int32_t count);
	775
	776	/**
	777	* Compare the first <code>count</code> UChars of each buffer.
	778	*
	779	* @param buf1 The first string to compare.
	780	* @param buf2 The second string to compare.
	781	* @param count The maximum number of UChars to compare.
	782	* @return When buf1 < buf2, a negative number is returned.
	783	* When buf1 == buf2, 0 is returned.
	784	* When buf1 > buf2, a positive number is returned.
	785	* @stable ICU 2.0
	786	*/
	787	U_STABLE int32_t U_EXPORT2
	788	u_memcmp(const UChar buf1, const UChar buf2, int32_t count);
	789
	790	/**
	791	* Compare two Unicode strings in code point order.
	792	* This is different in UTF-16 from u_memcmp() if supplementary characters are present.
	793	* For details, see u_strCompare().
	794	*
	795	* @param s1 A string to compare.
	796	* @param s2 A string to compare.
	797	* @param count The maximum number of characters to compare.
	798	* @return a negative/zero/positive integer corresponding to whether
	799	* the first string is less than/equal to/greater than the second one
	800	* in code point order
	801	* @stable ICU 2.0
	802	*/
	803	U_STABLE int32_t U_EXPORT2
	804	u_memcmpCodePointOrder(const UChar s1, const UChar s2, int32_t count);
	805
	806	/**
	807	* Find the first occurrence of a BMP code point in a string.
	808	* A surrogate code point is found only if its match in the text is not
	809	* part of a surrogate pair.
	810	* A NUL character is found at the string terminator.
	811	*
	812	* @param s The string to search (contains <code>count</code> UChars).
	813	* @param c The BMP code point to find.
	814	* @param count The length of the string.
	815	* @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
	816	* or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
	817	* @stable ICU 2.0
	818	*
	819	* @see u_strchr
	820	* @see u_memchr32
	821	* @see u_strFindFirst
	822	*/
	823	U_STABLE UChar* U_EXPORT2
	824	u_memchr(const UChar *s, UChar c, int32_t count);
	825
	826	/**
	827	* Find the first occurrence of a code point in a string.
	828	* A surrogate code point is found only if its match in the text is not
	829	* part of a surrogate pair.
	830	* A NUL character is found at the string terminator.
	831	*
	832	* @param s The string to search (contains <code>count</code> UChars).
	833	* @param c The code point to find.
	834	* @param count The length of the string.
	835	* @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
	836	* or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
	837	* @stable ICU 2.0
	838	*
	839	* @see u_strchr32
	840	* @see u_memchr
	841	* @see u_strFindFirst
	842	*/
	843	U_STABLE UChar* U_EXPORT2
	844	u_memchr32(const UChar *s, UChar32 c, int32_t count);
	845
	846	/**
	847	* Find the last occurrence of a BMP code point in a string.
	848	* A surrogate code point is found only if its match in the text is not
	849	* part of a surrogate pair.
	850	* A NUL character is found at the string terminator.
	851	*
	852	* @param s The string to search (contains <code>count</code> UChars).
	853	* @param c The BMP code point to find.
	854	* @param count The length of the string.
	855	* @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
	856	* or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
	857	* @stable ICU 2.4
	858	*
	859	* @see u_strrchr
	860	* @see u_memrchr32
	861	* @see u_strFindLast
	862	*/
	863	U_STABLE UChar* U_EXPORT2
	864	u_memrchr(const UChar *s, UChar c, int32_t count);
	865
	866	/**
	867	* Find the last occurrence of a code point in a string.
	868	* A surrogate code point is found only if its match in the text is not
	869	* part of a surrogate pair.
	870	* A NUL character is found at the string terminator.
	871	*
	872	* @param s The string to search (contains <code>count</code> UChars).
	873	* @param c The code point to find.
	874	* @param count The length of the string.
	875	* @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
	876	* or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
	877	* @stable ICU 2.4
	878	*
	879	* @see u_strrchr32
	880	* @see u_memrchr
	881	* @see u_strFindLast
	882	*/
	883	U_STABLE UChar* U_EXPORT2
	884	u_memrchr32(const UChar *s, UChar32 c, int32_t count);
	885
	886	/**
	887	* Unicode String literals in C.
	888	* We need one macro to declare a variable for the string
	889	* and to statically preinitialize it if possible,
	890	* and a second macro to dynamically intialize such a string variable if necessary.
	891	*
	892	* The macros are defined for maximum performance.
	893	* They work only for strings that contain "invariant characters", i.e.,
	894	* only latin letters, digits, and some punctuation.
	895	* See utypes.h for details.
	896	*
	897	* A pair of macros for a single string must be used with the same
	898	* parameters.
	899	* The string parameter must be a C string literal.
	900	* The length of the string, not including the terminating
	901	* <code>NUL</code>, must be specified as a constant.
	902	* The U_STRING_DECL macro should be invoked exactly once for one
	903	* such string variable before it is used.
	904	*
	905	* Usage:
	906	* <pre>
	907	* U_STRING_DECL(ustringVar1, "Quick-Fox 2", 11);
	908	* U_STRING_DECL(ustringVar2, "jumps 5%", 8);
	909	* static UBool didInit=FALSE;
	910	*
	911	* int32_t function() {
	912	* if(!didInit) {
	913	* U_STRING_INIT(ustringVar1, "Quick-Fox 2", 11);
	914	* U_STRING_INIT(ustringVar2, "jumps 5%", 8);
	915	* didInit=TRUE;
	916	* }
	917	* return u_strcmp(ustringVar1, ustringVar2);
	918	* }
	919	* </pre>
	920	* @stable ICU 2.0
	921	*/
	922	#if defined(U_DECLARE_UTF16)
	923	# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=U_DECLARE_UTF16(cs)
	924	/*@stable ICU 2.0 /
	925	# define U_STRING_INIT(var, cs, length)
	926	#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY \|\| (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
	927	# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=L ## cs
	928	/*@stable ICU 2.0 /
	929	# define U_STRING_INIT(var, cs, length)
	930	#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
	931	# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=cs
	932	/*@stable ICU 2.0 /
	933	# define U_STRING_INIT(var, cs, length)
	934	#else
	935	# define U_STRING_DECL(var, cs, length) static UChar var[(length)+1]
	936	/*@stable ICU 2.0 /
	937	# define U_STRING_INIT(var, cs, length) u_charsToUChars(cs, var, length+1)
	938	#endif
	939
	940	/**
	941	* Unescape a string of characters and write the resulting
	942	* Unicode characters to the destination buffer. The following escape
	943	* sequences are recognized:
	944	*
	945	* \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
	946	* \\Uhhhhhhhh 8 hex digits
	947	* \\xhh 1-2 hex digits
	948	* \\x{h...} 1-8 hex digits
	949	* \\ooo 1-3 octal digits; o in [0-7]
	950	* \\cX control-X; X is masked with 0x1F
	951	*
	952	* as well as the standard ANSI C escapes:
	953	*
	954	* \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
	955	* \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
	956	* \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
	957	*
	958	* Anything else following a backslash is generically escaped. For
	959	* example, "[a\\-z]" returns "[a-z]".
	960	*
	961	* If an escape sequence is ill-formed, this method returns an empty
	962	* string. An example of an ill-formed sequence is "\\u" followed by
	963	* fewer than 4 hex digits.
	964	*
	965	* The above characters are recognized in the compiler's codepage,
	966	* that is, they are coded as 'u', '\\', etc. Characters that are
	967	* not parts of escape sequences are converted using u_charsToUChars().
	968	*
	969	* This function is similar to UnicodeString::unescape() but not
	970	* identical to it. The latter takes a source UnicodeString, so it
	971	* does escape recognition but no conversion.
	972	*
	973	* @param src a zero-terminated string of invariant characters
	974	* @param dest pointer to buffer to receive converted and unescaped
	975	* text and, if there is room, a zero terminator. May be NULL for
	976	* preflighting, in which case no UChars will be written, but the
	977	* return value will still be valid. On error, an empty string is
	978	* stored here (if possible).
	979	* @param destCapacity the number of UChars that may be written at
	980	* dest. Ignored if dest == NULL.
	981	* @return the length of unescaped string.
	982	* @see u_unescapeAt
	983	* @see UnicodeString#unescape()
	984	* @see UnicodeString#unescapeAt()
	985	* @stable ICU 2.0
	986	*/
	987	U_STABLE int32_t U_EXPORT2
	988	u_unescape(const char *src,
	989	UChar *dest, int32_t destCapacity);
	990
	991	U_CDECL_BEGIN
	992	/**
	993	* Callback function for u_unescapeAt() that returns a character of
	994	* the source text given an offset and a context pointer. The context
	995	* pointer will be whatever is passed into u_unescapeAt().
	996	*
	997	* @param offset pointer to the offset that will be passed to u_unescapeAt().
	998	* @param context an opaque pointer passed directly into u_unescapeAt()
	999	* @return the character represented by the escape sequence at
	1000	* offset
	1001	* @see u_unescapeAt
	1002	* @stable ICU 2.0
	1003	*/
	1004	typedef UChar (U_CALLCONV UNESCAPE_CHAR_AT)(int32_t offset, void context);
	1005	U_CDECL_END
	1006
	1007	/**
	1008	* Unescape a single sequence. The character at offset-1 is assumed
	1009	* (without checking) to be a backslash. This method takes a callback
	1010	* pointer to a function that returns the UChar at a given offset. By
	1011	* varying this callback, ICU functions are able to unescape char*
	1012	* strings, UnicodeString objects, and UFILE pointers.
	1013	*
	1014	* If offset is out of range, or if the escape sequence is ill-formed,
	1015	* (UChar32)0xFFFFFFFF is returned. See documentation of u_unescape()
	1016	* for a list of recognized sequences.
	1017	*
	1018	* @param charAt callback function that returns a UChar of the source
	1019	* text given an offset and a context pointer.
	1020	* @param offset pointer to the offset that will be passed to charAt.
	1021	* The offset value will be updated upon return to point after the
	1022	* last parsed character of the escape sequence. On error the offset
	1023	* is unchanged.
	1024	* @param length the number of characters in the source text. The
	1025	* last character of the source text is considered to be at offset
	1026	* length-1.
	1027	* @param context an opaque pointer passed directly into charAt.
	1028	* @return the character represented by the escape sequence at
	1029	* offset, or (UChar32)0xFFFFFFFF on error.
	1030	* @see u_unescape()
	1031	* @see UnicodeString#unescape()
	1032	* @see UnicodeString#unescapeAt()
	1033	* @stable ICU 2.0
	1034	*/
	1035	U_STABLE UChar32 U_EXPORT2
	1036	u_unescapeAt(UNESCAPE_CHAR_AT charAt,
	1037	int32_t *offset,
	1038	int32_t length,
	1039	void *context);
	1040
	1041	/**
	1042	* Uppercase the characters in a string.
	1043	* Casing is locale-dependent and context-sensitive.
	1044	* The result may be longer or shorter than the original.
	1045	* The source string and the destination buffer are allowed to overlap.
	1046	*
	1047	* @param dest A buffer for the result string. The result will be zero-terminated if
	1048	* the buffer is large enough.
	1049	* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
	1050	* dest may be NULL and the function will only return the length of the result
	1051	* without writing any of the result string.
	1052	* @param src The original string
	1053	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
	1054	* @param locale The locale to consider, or "" for the root locale or NULL for the default locale.
	1055	* @param pErrorCode Must be a valid pointer to an error code value,
	1056	* which must not indicate a failure before the function call.
	1057	* @return The length of the result string. It may be greater than destCapacity. In that case,
	1058	* only some of the result was written to the destination buffer.
	1059	* @stable ICU 2.0
	1060	*/
	1061	U_STABLE int32_t U_EXPORT2
	1062	u_strToUpper(UChar *dest, int32_t destCapacity,
	1063	const UChar *src, int32_t srcLength,
	1064	const char *locale,
	1065	UErrorCode *pErrorCode);
	1066
	1067	/**
	1068	* Lowercase the characters in a string.
	1069	* Casing is locale-dependent and context-sensitive.
	1070	* The result may be longer or shorter than the original.
	1071	* The source string and the destination buffer are allowed to overlap.
	1072	*
	1073	* @param dest A buffer for the result string. The result will be zero-terminated if
	1074	* the buffer is large enough.
	1075	* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
	1076	* dest may be NULL and the function will only return the length of the result
	1077	* without writing any of the result string.
	1078	* @param src The original string
	1079	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
	1080	* @param locale The locale to consider, or "" for the root locale or NULL for the default locale.
	1081	* @param pErrorCode Must be a valid pointer to an error code value,
	1082	* which must not indicate a failure before the function call.
	1083	* @return The length of the result string. It may be greater than destCapacity. In that case,
	1084	* only some of the result was written to the destination buffer.
	1085	* @stable ICU 2.0
	1086	*/
	1087	U_STABLE int32_t U_EXPORT2
	1088	u_strToLower(UChar *dest, int32_t destCapacity,
	1089	const UChar *src, int32_t srcLength,
	1090	const char *locale,
	1091	UErrorCode *pErrorCode);
	1092
	1093	#if !UCONFIG_NO_BREAK_ITERATION
	1094
	1095	/**
	1096	* Titlecase a string.
	1097	* Casing is locale-dependent and context-sensitive.
	1098	* Titlecasing uses a break iterator to find the first characters of words
	1099	* that are to be titlecased. It titlecases those characters and lowercases
	1100	* all others.
	1101	*
	1102	* The titlecase break iterator can be provided to customize for arbitrary
	1103	* styles, using rules and dictionaries beyond the standard iterators.
	1104	* It may be more efficient to always provide an iterator to avoid
	1105	* opening and closing one for each string.
	1106	* The standard titlecase iterator for the root locale implements the
	1107	* algorithm of Unicode TR 21.
	1108	*
	1109	* This function uses only the setText(), first() and next() methods of the
	1110	* provided break iterator.
	1111	*
	1112	* The result may be longer or shorter than the original.
	1113	* The source string and the destination buffer are allowed to overlap.
	1114	*
	1115	* @param dest A buffer for the result string. The result will be zero-terminated if
	1116	* the buffer is large enough.
	1117	* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
	1118	* dest may be NULL and the function will only return the length of the result
	1119	* without writing any of the result string.
	1120	* @param src The original string
	1121	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
	1122	* @param titleIter A break iterator to find the first characters of words
	1123	* that are to be titlecased.
	1124	* If none is provided (NULL), then a standard titlecase
	1125	* break iterator is opened.
	1126	* @param locale The locale to consider, or "" for the root locale or NULL for the default locale.
	1127	* @param pErrorCode Must be a valid pointer to an error code value,
	1128	* which must not indicate a failure before the function call.
	1129	* @return The length of the result string. It may be greater than destCapacity. In that case,
	1130	* only some of the result was written to the destination buffer.
	1131	* @stable ICU 2.1
	1132	*/
	1133	U_STABLE int32_t U_EXPORT2
	1134	u_strToTitle(UChar *dest, int32_t destCapacity,
	1135	const UChar *src, int32_t srcLength,
	1136	UBreakIterator *titleIter,
	1137	const char *locale,
	1138	UErrorCode *pErrorCode);
	1139
	1140	#endif
	1141
	1142	/**
	1143	* Case-fold the characters in a string.
	1144	* Case-folding is locale-independent and not context-sensitive,
	1145	* but there is an option for whether to include or exclude mappings for dotted I
	1146	* and dotless i that are marked with 'I' in CaseFolding.txt.
	1147	* The result may be longer or shorter than the original.
	1148	* The source string and the destination buffer are allowed to overlap.
	1149	*
	1150	* @param dest A buffer for the result string. The result will be zero-terminated if
	1151	* the buffer is large enough.
	1152	* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
	1153	* dest may be NULL and the function will only return the length of the result
	1154	* without writing any of the result string.
	1155	* @param src The original string
	1156	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
	1157	* @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
	1158	* @param pErrorCode Must be a valid pointer to an error code value,
	1159	* which must not indicate a failure before the function call.
	1160	* @return The length of the result string. It may be greater than destCapacity. In that case,
	1161	* only some of the result was written to the destination buffer.
	1162	* @stable ICU 2.0
	1163	*/
	1164	U_STABLE int32_t U_EXPORT2
	1165	u_strFoldCase(UChar *dest, int32_t destCapacity,
	1166	const UChar *src, int32_t srcLength,
	1167	uint32_t options,
	1168	UErrorCode *pErrorCode);
	1169
	1170	#if defined(U_WCHAR_IS_UTF16) \|\| defined(U_WCHAR_IS_UTF32) \|\| !UCONFIG_NO_CONVERSION
	1171	/**
	1172	* Converts a sequence of UChars to wchar_t units.
	1173	*
	1174	* @param dest A buffer for the result string. The result will be zero-terminated if
	1175	* the buffer is large enough.
	1176	* @param destCapacity The size of the buffer (number of wchar_t's). If it is 0, then
	1177	* dest may be NULL and the function will only return the length of the
	1178	* result without writing any of the result string (pre-flighting).
	1179	* @param pDestLength A pointer to receive the number of units written to the destination. If
	1180	* pDestLength!=NULL then *pDestLength is always set to the
	1181	* number of output units corresponding to the transformation of
	1182	* all the input units, even in case of a buffer overflow.
	1183	* @param src The original source string
	1184	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
	1185	* @param pErrorCode Must be a valid pointer to an error code value,
	1186	* which must not indicate a failure before the function call.
	1187	* @return The pointer to destination buffer.
	1188	* @stable ICU 2.0
	1189	*/
	1190	U_STABLE wchar_t* U_EXPORT2
	1191	u_strToWCS(wchar_t *dest,
	1192	int32_t destCapacity,
	1193	int32_t *pDestLength,
	1194	const UChar *src,
	1195	int32_t srcLength,
	1196	UErrorCode *pErrorCode);
	1197	/**
	1198	* Converts a sequence of wchar_t units to UChars
	1199	*
	1200	* @param dest A buffer for the result string. The result will be zero-terminated if
	1201	* the buffer is large enough.
	1202	* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
	1203	* dest may be NULL and the function will only return the length of the
	1204	* result without writing any of the result string (pre-flighting).
	1205	* @param pDestLength A pointer to receive the number of units written to the destination. If
	1206	* pDestLength!=NULL then *pDestLength is always set to the
	1207	* number of output units corresponding to the transformation of
	1208	* all the input units, even in case of a buffer overflow.
	1209	* @param src The original source string
	1210	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
	1211	* @param pErrorCode Must be a valid pointer to an error code value,
	1212	* which must not indicate a failure before the function call.
	1213	* @return The pointer to destination buffer.
	1214	* @stable ICU 2.0
	1215	*/
	1216	U_STABLE UChar* U_EXPORT2
	1217	u_strFromWCS(UChar *dest,
	1218	int32_t destCapacity,
	1219	int32_t *pDestLength,
	1220	const wchar_t *src,
	1221	int32_t srcLength,
	1222	UErrorCode *pErrorCode);
	1223	#endif /* defined(U_WCHAR_IS_UTF16) \|\| defined(U_WCHAR_IS_UTF32) \|\| !UCONFIG_NO_CONVERSION */
	1224
	1225	/**
	1226	* Converts a sequence of UChars (UTF-16) to UTF-8 bytes
	1227	*
	1228	* @param dest A buffer for the result string. The result will be zero-terminated if
	1229	* the buffer is large enough.
	1230	* @param destCapacity The size of the buffer (number of chars). If it is 0, then
	1231	* dest may be NULL and the function will only return the length of the
	1232	* result without writing any of the result string (pre-flighting).
	1233	* @param pDestLength A pointer to receive the number of units written to the destination. If
	1234	* pDestLength!=NULL then *pDestLength is always set to the
	1235	* number of output units corresponding to the transformation of
	1236	* all the input units, even in case of a buffer overflow.
	1237	* @param src The original source string
	1238	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
	1239	* @param pErrorCode Must be a valid pointer to an error code value,
	1240	* which must not indicate a failure before the function call.
	1241	* @return The pointer to destination buffer.
	1242	* @stable ICU 2.0
	1243	* @see u_strToUTF8WithSub
	1244	* @see u_strFromUTF8
	1245	*/
	1246	U_STABLE char* U_EXPORT2
	1247	u_strToUTF8(char *dest,
	1248	int32_t destCapacity,
	1249	int32_t *pDestLength,
	1250	const UChar *src,
	1251	int32_t srcLength,
	1252	UErrorCode *pErrorCode);
	1253
	1254	/**
	1255	* Converts a sequence of UTF-8 bytes to UChars (UTF-16).
	1256	*
	1257	* @param dest A buffer for the result string. The result will be zero-terminated if
	1258	* the buffer is large enough.
	1259	* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
	1260	* dest may be NULL and the function will only return the length of the
	1261	* result without writing any of the result string (pre-flighting).
	1262	* @param pDestLength A pointer to receive the number of units written to the destination. If
	1263	* pDestLength!=NULL then *pDestLength is always set to the
	1264	* number of output units corresponding to the transformation of
	1265	* all the input units, even in case of a buffer overflow.
	1266	* @param src The original source string
	1267	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
	1268	* @param pErrorCode Must be a valid pointer to an error code value,
	1269	* which must not indicate a failure before the function call.
	1270	* @return The pointer to destination buffer.
	1271	* @stable ICU 2.0
	1272	* @see u_strFromUTF8WithSub
	1273	* @see u_strFromUTF8Lenient
	1274	*/
	1275	U_STABLE UChar* U_EXPORT2
	1276	u_strFromUTF8(UChar *dest,
	1277	int32_t destCapacity,
	1278	int32_t *pDestLength,
	1279	const char *src,
	1280	int32_t srcLength,
	1281	UErrorCode *pErrorCode);
	1282
	1283	/**
	1284	* Converts a sequence of UChars (UTF-16) to UTF-8 bytes.
	1285	* Same as u_strToUTF8() except for the additional subchar which is output for
	1286	* illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
	1287	* With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF8().
	1288	*
	1289	* @param dest A buffer for the result string. The result will be zero-terminated if
	1290	* the buffer is large enough.
	1291	* @param destCapacity The size of the buffer (number of chars). If it is 0, then
	1292	* dest may be NULL and the function will only return the length of the
	1293	* result without writing any of the result string (pre-flighting).
	1294	* @param pDestLength A pointer to receive the number of units written to the destination. If
	1295	* pDestLength!=NULL then *pDestLength is always set to the
	1296	* number of output units corresponding to the transformation of
	1297	* all the input units, even in case of a buffer overflow.
	1298	* @param src The original source string
	1299	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
	1300	* @param subchar The substitution character to use in place of an illegal input sequence,
	1301	* or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
	1302	* A substitution character can be any valid Unicode code point (up to U+10FFFF)
	1303	* except for surrogate code points (U+D800..U+DFFF).
	1304	* The recommended value is U+FFFD "REPLACEMENT CHARACTER".
	1305	* @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
	1306	* Set to 0 if no substitutions occur or subchar<0.
	1307	* pNumSubstitutions can be NULL.
	1308	* @param pErrorCode Pointer to a standard ICU error code. Its input value must
	1309	* pass the U_SUCCESS() test, or else the function returns
	1310	* immediately. Check for U_FAILURE() on output or use with
	1311	* function chaining. (See User Guide for details.)
	1312	* @return The pointer to destination buffer.
	1313	* @see u_strToUTF8
	1314	* @see u_strFromUTF8WithSub
	1315	* @stable ICU 3.6
	1316	*/
	1317	U_STABLE char* U_EXPORT2
	1318	u_strToUTF8WithSub(char *dest,
	1319	int32_t destCapacity,
	1320	int32_t *pDestLength,
	1321	const UChar *src,
	1322	int32_t srcLength,
	1323	UChar32 subchar, int32_t *pNumSubstitutions,
	1324	UErrorCode *pErrorCode);
	1325
	1326	/**
	1327	* Converts a sequence of UTF-8 bytes to UChars (UTF-16).
	1328	* Same as u_strFromUTF8() except for the additional subchar which is output for
	1329	* illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
	1330	* With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF8().
	1331	*
	1332	* @param dest A buffer for the result string. The result will be zero-terminated if
	1333	* the buffer is large enough.
	1334	* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
	1335	* dest may be NULL and the function will only return the length of the
	1336	* result without writing any of the result string (pre-flighting).
	1337	* @param pDestLength A pointer to receive the number of units written to the destination. If
	1338	* pDestLength!=NULL then *pDestLength is always set to the
	1339	* number of output units corresponding to the transformation of
	1340	* all the input units, even in case of a buffer overflow.
	1341	* @param src The original source string
	1342	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
	1343	* @param subchar The substitution character to use in place of an illegal input sequence,
	1344	* or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
	1345	* A substitution character can be any valid Unicode code point (up to U+10FFFF)
	1346	* except for surrogate code points (U+D800..U+DFFF).
	1347	* The recommended value is U+FFFD "REPLACEMENT CHARACTER".
	1348	* @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
	1349	* Set to 0 if no substitutions occur or subchar<0.
	1350	* pNumSubstitutions can be NULL.
	1351	* @param pErrorCode Pointer to a standard ICU error code. Its input value must
	1352	* pass the U_SUCCESS() test, or else the function returns
	1353	* immediately. Check for U_FAILURE() on output or use with
	1354	* function chaining. (See User Guide for details.)
	1355	* @return The pointer to destination buffer.
	1356	* @see u_strFromUTF8
	1357	* @see u_strFromUTF8Lenient
	1358	* @see u_strToUTF8WithSub
	1359	* @stable ICU 3.6
	1360	*/
	1361	U_STABLE UChar* U_EXPORT2
	1362	u_strFromUTF8WithSub(UChar *dest,
	1363	int32_t destCapacity,
	1364	int32_t *pDestLength,
	1365	const char *src,
	1366	int32_t srcLength,
	1367	UChar32 subchar, int32_t *pNumSubstitutions,
	1368	UErrorCode *pErrorCode);
	1369
	1370	/**
	1371	* Converts a sequence of UTF-8 bytes to UChars (UTF-16).
	1372	* Same as u_strFromUTF8() except that this function is designed to be very fast,
	1373	* which it achieves by being lenient about malformed UTF-8 sequences.
	1374	* This function is intended for use in environments where UTF-8 text is
	1375	* expected to be well-formed.
	1376	*
	1377	* Its semantics are:
	1378	* - Well-formed UTF-8 text is correctly converted to well-formed UTF-16 text.
	1379	* - The function will not read beyond the input string, nor write beyond
	1380	* the destCapacity.
	1381	* - Malformed UTF-8 results in "garbage" 16-bit Unicode strings which may not
	1382	* be well-formed UTF-16.
	1383	* The function will resynchronize to valid code point boundaries
	1384	* within a small number of code points after an illegal sequence.
	1385	* - Non-shortest forms are not detected and will result in "spoofing" output.
	1386	*
	1387	* For further performance improvement, if srcLength is given (>=0),
	1388	* then it must be destCapacity>=srcLength.
	1389	*
	1390	* @param dest A buffer for the result string. The result will be zero-terminated if
	1391	* the buffer is large enough.
	1392	* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
	1393	* dest may be NULL and the function will only return the length of the
	1394	* result without writing any of the result string (pre-flighting).
	1395	* Unlike for other ICU functions, if srcLength>=0 then it
	1396	* must be destCapacity>=srcLength.
	1397	* @param pDestLength A pointer to receive the number of units written to the destination. If
	1398	* pDestLength!=NULL then *pDestLength is always set to the
	1399	* number of output units corresponding to the transformation of
	1400	* all the input units, even in case of a buffer overflow.
	1401	* Unlike for other ICU functions, if srcLength>=0 but
	1402	* destCapacity<srcLength, then *pDestLength will be set to srcLength
	1403	* (and U_BUFFER_OVERFLOW_ERROR will be set)
	1404	* regardless of the actual result length.
	1405	* @param src The original source string
	1406	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
	1407	* @param pErrorCode Pointer to a standard ICU error code. Its input value must
	1408	* pass the U_SUCCESS() test, or else the function returns
	1409	* immediately. Check for U_FAILURE() on output or use with
	1410	* function chaining. (See User Guide for details.)
	1411	* @return The pointer to destination buffer.
	1412	* @see u_strFromUTF8
	1413	* @see u_strFromUTF8WithSub
	1414	* @see u_strToUTF8WithSub
	1415	* @stable ICU 3.6
	1416	*/
	1417	U_STABLE UChar * U_EXPORT2
	1418	u_strFromUTF8Lenient(UChar *dest,
	1419	int32_t destCapacity,
	1420	int32_t *pDestLength,
	1421	const char *src,
	1422	int32_t srcLength,
	1423	UErrorCode *pErrorCode);
	1424
	1425	/**
	1426	* Converts a sequence of UChars (UTF-16) to UTF32 units.
	1427	*
	1428	* @param dest A buffer for the result string. The result will be zero-terminated if
	1429	* the buffer is large enough.
	1430	* @param destCapacity The size of the buffer (number of UChar32s). If it is 0, then
	1431	* dest may be NULL and the function will only return the length of the
	1432	* result without writing any of the result string (pre-flighting).
	1433	* @param pDestLength A pointer to receive the number of units written to the destination. If
	1434	* pDestLength!=NULL then *pDestLength is always set to the
	1435	* number of output units corresponding to the transformation of
	1436	* all the input units, even in case of a buffer overflow.
	1437	* @param src The original source string
	1438	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
	1439	* @param pErrorCode Must be a valid pointer to an error code value,
	1440	* which must not indicate a failure before the function call.
	1441	* @return The pointer to destination buffer.
	1442	* @stable ICU 2.0
	1443	*/
	1444	U_STABLE UChar32* U_EXPORT2
	1445	u_strToUTF32(UChar32 *dest,
	1446	int32_t destCapacity,
	1447	int32_t *pDestLength,
	1448	const UChar *src,
	1449	int32_t srcLength,
	1450	UErrorCode *pErrorCode);
	1451
	1452	/**
	1453	* Converts a sequence of UTF32 units to UChars (UTF-16)
	1454	*
	1455	* @param dest A buffer for the result string. The result will be zero-terminated if
	1456	* the buffer is large enough.
	1457	* @param destCapacity The size of the buffer (number of UChars). If it is 0, then
	1458	* dest may be NULL and the function will only return the length of the
	1459	* result without writing any of the result string (pre-flighting).
	1460	* @param pDestLength A pointer to receive the number of units written to the destination. If
	1461	* pDestLength!=NULL then *pDestLength is always set to the
	1462	* number of output units corresponding to the transformation of
	1463	* all the input units, even in case of a buffer overflow.
	1464	* @param src The original source string
	1465	* @param srcLength The length of the original string. If -1, then src must be zero-terminated.
	1466	* @param pErrorCode Must be a valid pointer to an error code value,
	1467	* which must not indicate a failure before the function call.
	1468	* @return The pointer to destination buffer.
	1469	* @stable ICU 2.0
	1470	*/
	1471	U_STABLE UChar* U_EXPORT2
	1472	u_strFromUTF32(UChar *dest,
	1473	int32_t destCapacity,
	1474	int32_t *pDestLength,
	1475	const UChar32 *src,
	1476	int32_t srcLength,
	1477	UErrorCode *pErrorCode);
	1478
	1479	#endif