/*
******************************************************************************
*
-* Copyright (C) 1999-2008, International Business Machines
+* Copyright (C) 1999-2013, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
#include "unicode/utypes.h"
#include "unicode/uchar.h"
+#include "unicode/localpointer.h"
/**
*\file
*
* <h2>Bidi algorithm for ICU</h2>
*
- * This is an implementation of the Unicode Bidirectional algorithm.
+ * This is an implementation of the Unicode Bidirectional Algorithm.
* The algorithm is defined in the
- * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,
- * version 13, also described in The Unicode Standard, Version 4.0 .<p>
+ * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>.<p>
*
* Note: Libraries that perform a bidirectional algorithm and
* reorder strings accordingly are sometimes called "Storage Layout Engines".
* (The maximum resolved level can be up to <code>UBIDI_MAX_EXPLICIT_LEVEL+1</code>).
* @stable ICU 2.0
*/
-#define UBIDI_MAX_EXPLICIT_LEVEL 61
+#define UBIDI_MAX_EXPLICIT_LEVEL 125
/** Bit flag for level input.
* Overrides directional properties.
* @stable ICU 2.0
*/
enum UBiDiDirection {
- /** All left-to-right text. This is a 0 value. @stable ICU 2.0 */
- UBIDI_LTR,
- /** All right-to-left text. This is a 1 value. @stable ICU 2.0 */
- UBIDI_RTL,
- /** Mixed-directional text. @stable ICU 2.0 */
- UBIDI_MIXED
+ /** Left-to-right text. This is a 0 value.
+ * <ul>
+ * <li>As return value for <code>ubidi_getDirection()</code>, it means
+ * that the source string contains no right-to-left characters, or
+ * that the source string is empty and the paragraph level is even.
+ * <li> As return value for <code>ubidi_getBaseDirection()</code>, it
+ * means that the first strong character of the source string has
+ * a left-to-right direction.
+ * </ul>
+ * @stable ICU 2.0
+ */
+ UBIDI_LTR,
+ /** Right-to-left text. This is a 1 value.
+ * <ul>
+ * <li>As return value for <code>ubidi_getDirection()</code>, it means
+ * that the source string contains no left-to-right characters, or
+ * that the source string is empty and the paragraph level is odd.
+ * <li> As return value for <code>ubidi_getBaseDirection()</code>, it
+ * means that the first strong character of the source string has
+ * a right-to-left direction.
+ * </ul>
+ * @stable ICU 2.0
+ */
+ UBIDI_RTL,
+ /** Mixed-directional text.
+ * <p>As return value for <code>ubidi_getDirection()</code>, it means
+ * that the source string contains both left-to-right and
+ * right-to-left characters.
+ * @stable ICU 2.0
+ */
+ UBIDI_MIXED,
+ /** No strongly directional text.
+ * <p>As return value for <code>ubidi_getBaseDirection()</code>, it means
+ * that the source string is missing or empty, or contains neither left-to-right
+ * nor right-to-left characters.
+ * @stable ICU 4.6
+ */
+ UBIDI_NEUTRAL
};
/** @stable ICU 2.0 */
U_STABLE void U_EXPORT2
ubidi_close(UBiDi *pBiDi);
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUBiDiPointer
+ * "Smart pointer" class, closes a UBiDi via ubidi_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiPointer, UBiDi, ubidi_close);
+
+U_NAMESPACE_END
+
+#endif
+
/**
* Modify the operation of the Bidi algorithm such that it
* approximates an "inverse Bidi" algorithm. This function
*
* <li>When the reordering mode is set to
* <code>#UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the Logical to Visual
- * Bidi algorithm used in Windows XP is used as an approximation of an
- * "inverse Bidi" algorithm.
+ * Bidi algorithm used in Windows XP is used as an approximation of an "inverse Bidi" algorithm.
* <br>
* For example, an LTR paragraph with the content "abc FED123" (where
- * upper case represents RTL characters) will be transformed to
- * "abc 123DEF.</li>
+ * upper case represents RTL characters) will be transformed to "abc 123DEF."</li>
* </ul>
*
* <p>In all the reordering modes specifying an "inverse Bidi" algorithm
U_STABLE uint32_t U_EXPORT2
ubidi_getReorderingOptions(UBiDi *pBiDi);
+/**
+ * Set the context before a call to ubidi_setPara().<p>
+ *
+ * ubidi_setPara() computes the left-right directionality for a given piece
+ * of text which is supplied as one of its arguments. Sometimes this piece
+ * of text (the "main text") should be considered in context, because text
+ * appearing before ("prologue") and/or after ("epilogue") the main text
+ * may affect the result of this computation.<p>
+ *
+ * This function specifies the prologue and/or the epilogue for the next
+ * call to ubidi_setPara(). The characters specified as prologue and
+ * epilogue should not be modified by the calling program until the call
+ * to ubidi_setPara() has returned. If successive calls to ubidi_setPara()
+ * all need specification of a context, ubidi_setContext() must be called
+ * before each call to ubidi_setPara(). In other words, a context is not
+ * "remembered" after the following successful call to ubidi_setPara().<p>
+ *
+ * If a call to ubidi_setPara() specifies UBIDI_DEFAULT_LTR or
+ * UBIDI_DEFAULT_RTL as paraLevel and is preceded by a call to
+ * ubidi_setContext() which specifies a prologue, the paragraph level will
+ * be computed taking in consideration the text in the prologue.<p>
+ *
+ * When ubidi_setPara() is called without a previous call to
+ * ubidi_setContext, the main text is handled as if preceded and followed
+ * by strong directional characters at the current paragraph level.
+ * Calling ubidi_setContext() with specification of a prologue will change
+ * this behavior by handling the main text as if preceded by the last
+ * strong character appearing in the prologue, if any.
+ * Calling ubidi_setContext() with specification of an epilogue will change
+ * the behavior of ubidi_setPara() by handling the main text as if followed
+ * by the first strong character or digit appearing in the epilogue, if any.<p>
+ *
+ * Note 1: if <code>ubidi_setContext</code> is called repeatedly without
+ * calling <code>ubidi_setPara</code>, the earlier calls have no effect,
+ * only the last call will be remembered for the next call to
+ * <code>ubidi_setPara</code>.<p>
+ *
+ * Note 2: calling <code>ubidi_setContext(pBiDi, NULL, 0, NULL, 0, &errorCode)</code>
+ * cancels any previous setting of non-empty prologue or epilogue.
+ * The next call to <code>ubidi_setPara()</code> will process no
+ * prologue or epilogue.<p>
+ *
+ * Note 3: users must be aware that even after setting the context
+ * before a call to ubidi_setPara() to perform e.g. a logical to visual
+ * transformation, the resulting string may not be identical to what it
+ * would have been if all the text, including prologue and epilogue, had
+ * been processed together.<br>
+ * Example (upper case letters represent RTL characters):<br>
+ * prologue = "<code>abc DE</code>"<br>
+ * epilogue = none<br>
+ * main text = "<code>FGH xyz</code>"<br>
+ * paraLevel = UBIDI_LTR<br>
+ * display without prologue = "<code>HGF xyz</code>"
+ * ("HGF" is adjacent to "xyz")<br>
+ * display with prologue = "<code>abc HGFED xyz</code>"
+ * ("HGF" is not adjacent to "xyz")<br>
+ *
+ * @param pBiDi is a paragraph <code>UBiDi</code> object.
+ *
+ * @param prologue is a pointer to the text which precedes the text that
+ * will be specified in a coming call to ubidi_setPara().
+ * If there is no prologue to consider, then <code>proLength</code>
+ * must be zero and this pointer can be NULL.
+ *
+ * @param proLength is the length of the prologue; if <code>proLength==-1</code>
+ * then the prologue must be zero-terminated.
+ * Otherwise proLength must be >= 0. If <code>proLength==0</code>, it means
+ * that there is no prologue to consider.
+ *
+ * @param epilogue is a pointer to the text which follows the text that
+ * will be specified in a coming call to ubidi_setPara().
+ * If there is no epilogue to consider, then <code>epiLength</code>
+ * must be zero and this pointer can be NULL.
+ *
+ * @param epiLength is the length of the epilogue; if <code>epiLength==-1</code>
+ * then the epilogue must be zero-terminated.
+ * Otherwise epiLength must be >= 0. If <code>epiLength==0</code>, it means
+ * that there is no epilogue to consider.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @see ubidi_setPara
+ * @stable ICU 4.8
+ */
+U_STABLE void U_EXPORT2
+ubidi_setContext(UBiDi *pBiDi,
+ const UChar *prologue, int32_t proLength,
+ const UChar *epilogue, int32_t epiLength,
+ UErrorCode *pErrorCode);
+
/**
* Perform the Unicode Bidi algorithm. It is defined in the
* <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Anned #9</a>,
* must take care of the deallocation of the <code>embeddingLevels</code> array.<br><br>
* <strong>Note:</strong> the <code>embeddingLevels</code> array must be
* at least <code>length</code> long.
+ * This pointer can be <code>NULL</code> if this
+ * value is not necessary.
*
* @param pErrorCode must be a valid pointer to an error code value.
* @stable ICU 2.0
* that indicates if the entire text
* represented by this object is unidirectional,
* and which direction, or if it is mixed-directional.
+ * Note - The value <code>UBIDI_NEUTRAL</code> is never returned from this method.
*
* @see UBiDiDirection
* @stable ICU 2.0
U_STABLE UBiDiDirection U_EXPORT2
ubidi_getDirection(const UBiDi *pBiDi);
+/**
+ * Gets the base direction of the text provided according
+ * to the Unicode Bidirectional Algorithm. The base direction
+ * is derived from the first character in the string with bidirectional
+ * character type L, R, or AL. If the first such character has type L,
+ * <code>UBIDI_LTR</code> is returned. If the first such character has
+ * type R or AL, <code>UBIDI_RTL</code> is returned. If the string does
+ * not contain any character of these types, then
+ * <code>UBIDI_NEUTRAL</code> is returned.
+ *
+ * This is a lightweight function for use when only the base direction
+ * is needed and no further bidi processing of the text is needed.
+ *
+ * @param text is a pointer to the text whose base
+ * direction is needed.
+ * Note: the text must be (at least) @c length long.
+ *
+ * @param length is the length of the text;
+ * if <code>length==-1</code> then the text
+ * must be zero-terminated.
+ *
+ * @return <code>UBIDI_LTR</code>, <code>UBIDI_RTL</code>,
+ * <code>UBIDI_NEUTRAL</code>
+ *
+ * @see UBiDiDirection
+ * @stable ICU 4.6
+ */
+U_STABLE UBiDiDirection U_EXPORT2
+ubidi_getBaseDirection(const UChar *text, int32_t length );
+
/**
* Get the pointer to the text.
*
*
* @return the directionality of the run,
* <code>UBIDI_LTR==0</code> or <code>UBIDI_RTL==1</code>,
- * never <code>UBIDI_MIXED</code>.
+ * never <code>UBIDI_MIXED</code>,
+ * never <code>UBIDI_NEUTRAL</code>.
*
* @see ubidi_countRuns
*