icuSources/i18n/unicode/translit.h

   1 /*
   2 **********************************************************************
   3 * Copyright (C) 1999-2003, International Business Machines
   4 * Corporation and others. All Rights Reserved.
   5 **********************************************************************
   6 *   Date        Name        Description
   7 *   11/17/99    aliu        Creation.
   8 **********************************************************************
   9 */
  10 #ifndef TRANSLIT_H
  11 #define TRANSLIT_H
  12
  13 #include "unicode/utypes.h"
  14
  15 #if !UCONFIG_NO_TRANSLITERATION
  16
  17 #include "unicode/uobject.h"
  18 #include "unicode/unistr.h"
  19 #include "unicode/parseerr.h"
  20 #include "unicode/utrans.h" // UTransPosition, UTransDirection
  21
  22 U_NAMESPACE_BEGIN
  23
  24 class UnicodeFilter;
  25 class UnicodeSet;
  26 class CompoundTransliterator;
  27 class TransliteratorParser;
  28 class NormalizationTransliterator;
  29 class TransliteratorIDParser;
  30
  31 /**
  32  * <code>Transliterator</code> is an abstract class that
  33  * transliterates text from one format to another.  The most common
  34  * kind of transliterator is a script, or alphabet, transliterator.
  35  * For example, a Russian to Latin transliterator changes Russian text
  36  * written in Cyrillic characters to phonetically equivalent Latin
  37  * characters.  It does not <em>translate</em> Russian to English!
  38  * Transliteration, unlike translation, operates on characters, without
  39  * reference to the meanings of words and sentences.
  40  *
  41  * <p>Although script conversion is its most common use, a
  42  * transliterator can actually perform a more general class of tasks.
  43  * In fact, <code>Transliterator</code> defines a very general API
  44  * which specifies only that a segment of the input text is replaced
  45  * by new text.  The particulars of this conversion are determined
  46  * entirely by subclasses of <code>Transliterator</code>.
  47  *
  48  * <p><b>Transliterators are stateless</b>
  49  *
  50  * <p><code>Transliterator</code> objects are <em>stateless</em>; they
  51  * retain no information between calls to
  52  * <code>transliterate()</code>.  (However, this does <em>not</em>
  53  * mean that threads may share transliterators without synchronizing
  54  * them.  Transliterators are not immutable, so they must be
  55  * synchronized when shared between threads.)  This1 might seem to
  56  * limit the complexity of the transliteration operation.  In
  57  * practice, subclasses perform complex transliterations by delaying
  58  * the replacement of text until it is known that no other
  59  * replacements are possible.  In other words, although the
  60  * <code>Transliterator</code> objects are stateless, the source text
  61  * itself embodies all the needed information, and delayed operation
  62  * allows arbitrary complexity.
  63  *
  64  * <p><b>Batch transliteration</b>
  65  *
  66  * <p>The simplest way to perform transliteration is all at once, on a
  67  * string of existing text.  This is referred to as <em>batch</em>
  68  * transliteration.  For example, given a string <code>input</code>
  69  * and a transliterator <code>t</code>, the call
  70  *
  71  * <blockquote><code>String result = t.transliterate(input);
  72  * </code></blockquote>
  73  *
  74  * will transliterate it and return the result.  Other methods allow
  75  * the client to specify a substring to be transliterated and to use
  76  * {@link Replaceable} objects instead of strings, in order to
  77  * preserve out-of-band information (such as text styles).
  78  *
  79  * <p><b>Keyboard transliteration</b>
  80  *
  81  * <p>Somewhat more involved is <em>keyboard</em>, or incremental
  82  * transliteration.  This is the transliteration of text that is
  83  * arriving from some source (typically the user's keyboard) one
  84  * character at a time, or in some other piecemeal fashion.
  85  *
  86  * <p>In keyboard transliteration, a <code>Replaceable</code> buffer
  87  * stores the text.  As text is inserted, as much as possible is
  88  * transliterated on the fly.  This means a GUI that displays the
  89  * contents of the buffer may show text being modified as each new
  90  * character arrives.
  91  *
  92  * <p>Consider the simple <code>RuleBasedTransliterator</code>:
  93  *
  94  * <blockquote><code>
  95  * th&gt;{theta}<br>
  96  * t&gt;{tau}
  97  * </code></blockquote>
  98  *
  99  * When the user types 't', nothing will happen, since the
 100  * transliterator is waiting to see if the next character is 'h'.  To
 101  * remedy this, we introduce the notion of a cursor, marked by a '|'
 102  * in the output string:
 103  *
 104  * <blockquote><code>
 105  * t&gt;|{tau}<br>
 106  * {tau}h&gt;{theta}
 107  * </code></blockquote>
 108  *
 109  * Now when the user types 't', tau appears, and if the next character
 110  * is 'h', the tau changes to a theta.  This is accomplished by
 111  * maintaining a cursor position (independent of the insertion point,
 112  * and invisible in the GUI) across calls to
 113  * <code>transliterate()</code>.  Typically, the cursor will
 114  * be coincident with the insertion point, but in a case like the one
 115  * above, it will precede the insertion point.
 116  *
 117  * <p>Keyboard transliteration methods maintain a set of three indices
 118  * that are updated with each call to
 119  * <code>transliterate()</code>, including the cursor, start,
 120  * and limit.  Since these indices are changed by the method, they are
 121  * passed in an <code>int[]</code> array. The <code>START</code> index
 122  * marks the beginning of the substring that the transliterator will
 123  * look at.  It is advanced as text becomes committed (but it is not
 124  * the committed index; that's the <code>CURSOR</code>).  The
 125  * <code>CURSOR</code> index, described above, marks the point at
 126  * which the transliterator last stopped, either because it reached
 127  * the end, or because it required more characters to disambiguate
 128  * between possible inputs.  The <code>CURSOR</code> can also be
 129  * explicitly set by rules in a <code>RuleBasedTransliterator</code>.
 130  * Any characters before the <code>CURSOR</code> index are frozen;
 131  * future keyboard transliteration calls within this input sequence
 132  * will not change them.  New text is inserted at the
 133  * <code>LIMIT</code> index, which marks the end of the substring that
 134  * the transliterator looks at.
 135  *
 136  * <p>Because keyboard transliteration assumes that more characters
 137  * are to arrive, it is conservative in its operation.  It only
 138  * transliterates when it can do so unambiguously.  Otherwise it waits
 139  * for more characters to arrive.  When the client code knows that no
 140  * more characters are forthcoming, perhaps because the user has
 141  * performed some input termination operation, then it should call
 142  * <code>finishTransliteration()</code> to complete any
 143  * pending transliterations.
 144  *
 145  * <p><b>Inverses</b>
 146  *
 147  * <p>Pairs of transliterators may be inverses of one another.  For
 148  * example, if transliterator <b>A</b> transliterates characters by
 149  * incrementing their Unicode value (so "abc" -> "def"), and
 150  * transliterator <b>B</b> decrements character values, then <b>A</b>
 151  * is an inverse of <b>B</b> and vice versa.  If we compose <b>A</b>
 152  * with <b>B</b> in a compound transliterator, the result is the
 153  * indentity transliterator, that is, a transliterator that does not
 154  * change its input text.
 155  *
 156  * The <code>Transliterator</code> method <code>getInverse()</code>
 157  * returns a transliterator's inverse, if one exists, or
 158  * <code>null</code> otherwise.  However, the result of
 159  * <code>getInverse()</code> usually will <em>not</em> be a true
 160  * mathematical inverse.  This is because true inverse transliterators
 161  * are difficult to formulate.  For example, consider two
 162  * transliterators: <b>AB</b>, which transliterates the character 'A'
 163  * to 'B', and <b>BA</b>, which transliterates 'B' to 'A'.  It might
 164  * seem that these are exact inverses, since
 165  *
 166  * <blockquote>"A" x <b>AB</b> -> "B"<br>
 167  * "B" x <b>BA</b> -> "A"</blockquote>
 168  *
 169  * where 'x' represents transliteration.  However,
 170  *
 171  * <blockquote>"ABCD" x <b>AB</b> -> "BBCD"<br>
 172  * "BBCD" x <b>BA</b> -> "AACD"</blockquote>
 173  *
 174  * so <b>AB</b> composed with <b>BA</b> is not the
 175  * identity. Nonetheless, <b>BA</b> may be usefully considered to be
 176  * <b>AB</b>'s inverse, and it is on this basis that
 177  * <b>AB</b><code>.getInverse()</code> could legitimately return
 178  * <b>BA</b>.
 179  *
 180  * <p><b>IDs and display names</b>
 181  *
 182  * <p>A transliterator is designated by a short identifier string or
 183  * <em>ID</em>.  IDs follow the format <em>source-destination</em>,
 184  * where <em>source</em> describes the entity being replaced, and
 185  * <em>destination</em> describes the entity replacing
 186  * <em>source</em>.  The entities may be the names of scripts,
 187  * particular sequences of characters, or whatever else it is that the
 188  * transliterator converts to or from.  For example, a transliterator
 189  * from Russian to Latin might be named "Russian-Latin".  A
 190  * transliterator from keyboard escape sequences to Latin-1 characters
 191  * might be named "KeyboardEscape-Latin1".  By convention, system
 192  * entity names are in English, with the initial letters of words
 193  * capitalized; user entity names may follow any format so long as
 194  * they do not contain dashes.
 195  *
 196  * <p>In addition to programmatic IDs, transliterator objects have
 197  * display names for presentation in user interfaces, returned by
 198  * {@link #getDisplayName()}.
 199  *
 200  * <p><b>Factory methods and registration</b>
 201  *
 202  * <p>In general, client code should use the factory method
 203  * {@link #createInstance()} to obtain an instance of a
 204  * transliterator given its ID.  Valid IDs may be enumerated using
 205  * <code>getAvailableIDs()</code>.  Since transliterators are mutable,
 206  * multiple calls to {@link #createInstance()} with the same ID will
 207  * return distinct objects.
 208  *
 209  * <p>In addition to the system transliterators registered at startup,
 210  * user transliterators may be registered by calling
 211  * <code>registerInstance()</code> at run time.  A registered instance
 212  * acts a template; future calls to {@link #createInstance()} with the ID
 213  * of the registered object return clones of that object.  Thus any
 214  * object passed to <tt>registerInstance()</tt> must implement
 215  * <tt>clone()</tt> propertly.  To register a transliterator subclass
 216  * without instantiating it (until it is needed), users may call
 217  * <code>registerClass()</code>.  In this case, the objects are
 218  * instantiated by invoking the zero-argument public constructor of
 219  * the class.
 220  *
 221  * <p><b>Subclassing</b>
 222  *
 223  * Subclasses must implement the abstract method
 224  * <code>handleTransliterate()</code>.  <p>Subclasses should override
 225  * the <code>transliterate()</code> method taking a
 226  * <code>Replaceable</code> and the <code>transliterate()</code>
 227  * method taking a <code>String</code> and <code>StringBuffer</code>
 228  * if the performance of these methods can be improved over the
 229  * performance obtained by the default implementations in this class.
 230  *
 231  * @author Alan Liu
 232  * @stable ICU 2.0
 233  */
 234 class U_I18N_API Transliterator : public UObject {
 235
 236 private:
 237
 238     /**
 239      * Programmatic name, e.g., "Latin-Arabic".
 240      */
 241     UnicodeString ID;
 242
 243     /**
 244      * This transliterator's filter.  Any character for which
 245      * <tt>filter.contains()</tt> returns <tt>false</tt> will not be
 246      * altered by this transliterator.  If <tt>filter</tt> is
 247      * <tt>null</tt> then no filtering is applied.
 248      */
 249     UnicodeFilter* filter;
 250
 251     int32_t maximumContextLength;
 252
 253  public:
 254
 255     /**
 256      * A context integer or pointer for a factory function, passed by
 257      * value.
 258      * @draft ICU 2.4
 259      */
 260     union Token {
 261         /**
 262          * This token, interpreted as a 32-bit integer.
 263          * @draft ICU 2.4
 264          */
 265         int32_t integer;
 266         /**
 267          * This token, interpreted as a native pointer.
 268          * @draft ICU 2.4
 269          */
 270         void*   pointer;
 271     };
 272
 273     /**
 274      * Return a token containing an integer.
 275      * @return a token containing an integer.
 276      * @draft ICU 2.4
 277      */
 278     inline static Token integerToken(int32_t);
 279
 280     /**
 281      * Return a token containing a pointer.
 282      * @return a token containing a pointer.
 283      * @draft ICU 2.4
 284      */
 285     inline static Token pointerToken(void*);
 286
 287     /**
 288      * A function that creates and returns a Transliterator.  When
 289      * invoked, it will be passed the ID string that is being
 290      * instantiated, together with the context pointer that was passed
 291      * in when the factory function was first registered.  Many
 292      * factory functions will ignore both parameters, however,
 293      * functions that are registered to more than one ID may use the
 294      * ID or the context parameter to parameterize the transliterator
 295      * they create.
 296      * @param ID      the string identifier for this transliterator
 297      * @param context a context pointer that will be stored and
 298      *                later passed to the factory function when an ID matching
 299      *                the registration ID is being instantiated with this factory.
 300      * @draft ICU 2.4
 301      */
 302     typedef Transliterator* (*Factory)(const UnicodeString& ID, Token context);
 303
 304 protected:
 305
 306     /**
 307      * Default constructor.
 308      * @param ID the string identifier for this transliterator
 309      * @param adoptedFilter the filter.  Any character for which
 310      * <tt>filter.contains()</tt> returns <tt>false</tt> will not be
 311      * altered by this transliterator.  If <tt>filter</tt> is
 312      * <tt>null</tt> then no filtering is applied.
 313      * @draft ICU 2.4
 314      */
 315     Transliterator(const UnicodeString& ID, UnicodeFilter* adoptedFilter);
 316
 317     /**
 318      * Copy constructor.
 319      * @draft ICU 2.4
 320      */
 321     Transliterator(const Transliterator&);
 322
 323     /**
 324      * Assignment operator.
 325      * @draft ICU 2.4
 326      */
 327     Transliterator& operator=(const Transliterator&);
 328
 329     /**
 330      * Create a transliterator from a basic ID.  This is an ID
 331      * containing only the forward direction source, target, and
 332      * variant.
 333      * @param id a basic ID of the form S-T or S-T/V.
 334      * @param canon canonical ID to assign to the object, or
 335      * NULL to leave the ID unchanged
 336      * @return a newly created Transliterator or null if the ID is
 337      * invalid.
 338      * @draft ICU 2.4
 339      */
 340     static Transliterator* createBasicInstance(const UnicodeString& id,
 341                                                const UnicodeString* canon);
 342
 343     friend class TransliteratorParser; // for parseID()
 344     friend class TransliteratorIDParser; // for createBasicInstance()
 345
 346 public:
 347
 348     /**
 349      * Destructor.
 350      * @stable ICU 2.0
 351      */
 352     virtual ~Transliterator();
 353
 354     /**
 355      * Implements Cloneable.
 356      * All subclasses are encouraged to implement this method if it is
 357      * possible and reasonable to do so.  Subclasses that are to be
 358      * registered with the system using <tt>registerInstance()<tt>
 359      * are required to implement this method.  If a subclass does not
 360      * implement clone() properly and is registered with the system
 361      * using registerInstance(), then the default clone() implementation
 362      * will return null, and calls to createInstance() will fail.
 363      *
 364      * @return a copy of the object.
 365      * @see #registerInstance
 366      * @stable ICU 2.0
 367      */
 368     virtual Transliterator* clone() const { return 0; }
 369
 370     /**
 371      * Transliterates a segment of a string, with optional filtering.
 372      *
 373      * @param text the string to be transliterated
 374      * @param start the beginning index, inclusive; <code>0 <= start
 375      * <= limit</code>.
 376      * @param limit the ending index, exclusive; <code>start <= limit
 377      * <= text.length()</code>.
 378      * @return The new limit index.  The text previously occupying <code>[start,
 379      * limit)</code> has been transliterated, possibly to a string of a different
 380      * length, at <code>[start, </code><em>new-limit</em><code>)</code>, where
 381      * <em>new-limit</em> is the return value. If the input offsets are out of bounds,
 382      * the returned value is -1 and the input string remains unchanged.
 383      * @stable ICU 2.0
 384      */
 385     virtual int32_t transliterate(Replaceable& text,
 386                                   int32_t start, int32_t limit) const;
 387
 388     /**
 389      * Transliterates an entire string in place. Convenience method.
 390      * @param text the string to be transliterated
 391      * @stable ICU 2.0
 392      */
 393     virtual void transliterate(Replaceable& text) const;
 394
 395     /**
 396      * Transliterates the portion of the text buffer that can be
 397      * transliterated unambiguosly after new text has been inserted,
 398      * typically as a result of a keyboard event.  The new text in
 399      * <code>insertion</code> will be inserted into <code>text</code>
 400      * at <code>index.limit</code>, advancing
 401      * <code>index.limit</code> by <code>insertion.length()</code>.
 402      * Then the transliterator will try to transliterate characters of
 403      * <code>text</code> between <code>index.cursor</code> and
 404      * <code>index.limit</code>.  Characters before
 405      * <code>index.cursor</code> will not be changed.
 406      *
 407      * <p>Upon return, values in <code>index</code> will be updated.
 408      * <code>index.start</code> will be advanced to the first
 409      * character that future calls to this method will read.
 410      * <code>index.cursor</code> and <code>index.limit</code> will
 411      * be adjusted to delimit the range of text that future calls to
 412      * this method may change.
 413      *
 414      * <p>Typical usage of this method begins with an initial call
 415      * with <code>index.start</code> and <code>index.limit</code>
 416      * set to indicate the portion of <code>text</code> to be
 417      * transliterated, and <code>index.cursor == index.start</code>.
 418      * Thereafter, <code>index</code> can be used without
 419      * modification in future calls, provided that all changes to
 420      * <code>text</code> are made via this method.
 421      *
 422      * <p>This method assumes that future calls may be made that will
 423      * insert new text into the buffer.  As a result, it only performs
 424      * unambiguous transliterations.  After the last call to this
 425      * method, there may be untransliterated text that is waiting for
 426      * more input to resolve an ambiguity.  In order to perform these
 427      * pending transliterations, clients should call {@link
 428      * #finishTransliteration()} after the last call to this
 429      * method has been made.
 430      *
 431      * @param text the buffer holding transliterated and untransliterated text
 432      * @param index an array of three integers.
 433      *
 434      * <ul><li><code>index.start</code>: the beginning index,
 435      * inclusive; <code>0 <= index.start <= index.limit</code>.
 436      *
 437      * <li><code>index.limit</code>: the ending index, exclusive;
 438      * <code>index.start <= index.limit <= text.length()</code>.
 439      * <code>insertion</code> is inserted at
 440      * <code>index.limit</code>.
 441      *
 442      * <li><code>index.cursor</code>: the next character to be
 443      * considered for transliteration; <code>index.start <=
 444      * index.cursor <= index.limit</code>.  Characters before
 445      * <code>index.cursor</code> will not be changed by future calls
 446      * to this method.</ul>
 447      *
 448      * @param insertion text to be inserted and possibly
 449      * transliterated into the translation buffer at
 450      * <code>index.limit</code>.  If <code>null</code> then no text
 451      * is inserted.
 452      * @param status    Output param to filled in with a success or an error.
 453      * @see #handleTransliterate
 454      * @exception IllegalArgumentException if <code>index</code>
 455      * is invalid
 456      * @see UTransPosition
 457      * @stable ICU 2.0
 458      */
 459     virtual void transliterate(Replaceable& text, UTransPosition& index,
 460                                const UnicodeString& insertion,
 461                                UErrorCode& status) const;
 462
 463     /**
 464      * Transliterates the portion of the text buffer that can be
 465      * transliterated unambiguosly after a new character has been
 466      * inserted, typically as a result of a keyboard event.  This is a
 467      * convenience method; see {@link
 468      * #transliterate(Replaceable, UTransPosition, UnicodeString)} for details.
 469      * @param text the buffer holding transliterated and
 470      * untransliterated text
 471      * @param index an array of three integers.  See {@link
 472      * #transliterate(Replaceable, UTransPosition, UnicodeString)}.
 473      * @param insertion text to be inserted and possibly
 474      * transliterated into the translation buffer at
 475      * <code>index.limit</code>.
 476      * @param status    Output param to filled in with a success or an error.
 477      * @see #transliterate(Replaceable, UTransPosition, UnicodeString)
 478      * @stable ICU 2.0
 479      */
 480     virtual void transliterate(Replaceable& text, UTransPosition& index,
 481                                UChar32 insertion,
 482                                UErrorCode& status) const;
 483
 484     /**
 485      * Transliterates the portion of the text buffer that can be
 486      * transliterated unambiguosly.  This is a convenience method; see
 487      * {@link #transliterate(Replaceable, UTransPosition, UnicodeString)} for
 488      * details.
 489      * @param text the buffer holding transliterated and
 490      * untransliterated text
 491      * @param index an array of three integers.  See {@link
 492      * #transliterate(Replaceable, UTransPosition, UnicodeString)}.
 493      * @param status    Output param to filled in with a success or an error.
 494      * @see #transliterate(Replaceable, int[], String)
 495      * @stable ICU 2.0
 496      */
 497     virtual void transliterate(Replaceable& text, UTransPosition& index,
 498                                UErrorCode& status) const;
 499
 500     /**
 501      * Finishes any pending transliterations that were waiting for
 502      * more characters.  Clients should call this method as the last
 503      * call after a sequence of one or more calls to
 504      * <code>transliterate()</code>.
 505      * @param text the buffer holding transliterated and
 506      * untransliterated text.
 507      * @param index the array of indices previously passed to {@link
 508      * #transliterate()}
 509      * @stable ICU 2.0
 510      */
 511     virtual void finishTransliteration(Replaceable& text,
 512                                        UTransPosition& index) const;
 513
 514 private:
 515
 516     /**
 517      * This internal method does incremental transliteration.  If the
 518      * 'insertion' is non-null then we append it to 'text' before
 519      * proceeding.  This method calls through to the pure virtual
 520      * framework method handleTransliterate() to do the actual
 521      * work.
 522      * @param text the buffer holding transliterated and
 523      * untransliterated text
 524      * @param index an array of three integers.  See {@link
 525      * #transliterate(Replaceable, int[], String)}.
 526      * @param insertion text to be inserted and possibly
 527      * transliterated into the translation buffer at
 528      * <code>index.limit</code>.
 529      * @param status    Output param to filled in with a success or an error.
 530      */
 531     void _transliterate(Replaceable& text,
 532                         UTransPosition& index,
 533                         const UnicodeString* insertion,
 534                         UErrorCode &status) const;
 535
 536 protected:
 537
 538     /**
 539      * Abstract method that concrete subclasses define to implement
 540      * their transliteration algorithm.  This method handles both
 541      * incremental and non-incremental transliteration.  Let
 542      * <code>originalStart</code> refer to the value of
 543      * <code>pos.start</code> upon entry.
 544      *
 545      * <ul>
 546      *  <li>If <code>incremental</code> is false, then this method
 547      *  should transliterate all characters between
 548      *  <code>pos.start</code> and <code>pos.limit</code>. Upon return
 549      *  <code>pos.start</code> must == <code> pos.limit</code>.</li>
 550      *
 551      *  <li>If <code>incremental</code> is true, then this method
 552      *  should transliterate all characters between
 553      *  <code>pos.start</code> and <code>pos.limit</code> that can be
 554      *  unambiguously transliterated, regardless of future insertions
 555      *  of text at <code>pos.limit</code>.  Upon return,
 556      *  <code>pos.start</code> should be in the range
 557      *  [<code>originalStart</code>, <code>pos.limit</code>).
 558      *  <code>pos.start</code> should be positioned such that
 559      *  characters [<code>originalStart</code>, <code>
 560      *  pos.start</code>) will not be changed in the future by this
 561      *  transliterator and characters [<code>pos.start</code>,
 562      *  <code>pos.limit</code>) are unchanged.</li>
 563      * </ul>
 564      *
 565      * <p>Implementations of this method should also obey the
 566      * following invariants:</p>
 567      *
 568      * <ul>
 569      *  <li> <code>pos.limit</code> and <code>pos.contextLimit</code>
 570      *  should be updated to reflect changes in length of the text
 571      *  between <code>pos.start</code> and <code>pos.limit</code>. The
 572      *  difference <code> pos.contextLimit - pos.limit</code> should
 573      *  not change.</li>
 574      *
 575      *  <li><code>pos.contextStart</code> should not change.</li>
 576      *
 577      *  <li>Upon return, neither <code>pos.start</code> nor
 578      *  <code>pos.limit</code> should be less than
 579      *  <code>originalStart</code>.</li>
 580      *
 581      *  <li>Text before <code>originalStart</code> and text after
 582      *  <code>pos.limit</code> should not change.</li>
 583      *
 584      *  <li>Text before <code>pos.contextStart</code> and text after
 585      *  <code> pos.contextLimit</code> should be ignored.</li>
 586      * </ul>
 587      *
 588      * <p>Subclasses may safely assume that all characters in
 589      * [<code>pos.start</code>, <code>pos.limit</code>) are filtered.
 590      * In other words, the filter has already been applied by the time
 591      * this method is called.  See
 592      * <code>filteredTransliterate()</code>.
 593      *
 594      * <p>This method is <b>not</b> for public consumption.  Calling
 595      * this method directly will transliterate
 596      * [<code>pos.start</code>, <code>pos.limit</code>) without
 597      * applying the filter. End user code should call <code>
 598      * transliterate()</code> instead of this method. Subclass code
 599      * should call <code>filteredTransliterate()</code> instead of
 600      * this method.<p>
 601      *
 602      * @param text the buffer holding transliterated and
 603      * untransliterated text
 604      *
 605      * @param pos the indices indicating the start, limit, context
 606      * start, and context limit of the text.
 607      *
 608      * @param incremental if true, assume more text may be inserted at
 609      * <code>pos.limit</code> and act accordingly.  Otherwise,
 610      * transliterate all text between <code>pos.start</code> and
 611      * <code>pos.limit</code> and move <code>pos.start</code> up to
 612      * <code>pos.limit</code>.
 613      *
 614      * @see #transliterate
 615      * @draft ICU 2.4
 616      */
 617     virtual void handleTransliterate(Replaceable& text,
 618                                      UTransPosition& pos,
 619                                      UBool incremental) const = 0;
 620
 621     /**
 622      * Transliterate a substring of text, as specified by index, taking filters
 623      * into account.  This method is for subclasses that need to delegate to
 624      * another transliterator, such as CompoundTransliterator.
 625      * @param text the text to be transliterated
 626      * @param index the position indices
 627      * @param incremental if TRUE, then assume more characters may be inserted
 628      * at index.limit, and postpone processing to accomodate future incoming
 629      * characters
 630      * @draft ICU 2.4
 631      */
 632     virtual void filteredTransliterate(Replaceable& text,
 633                                        UTransPosition& index,
 634                                        UBool incremental) const;
 635
 636     friend class CompoundTransliterator; // for filteredTransliterate()
 637     friend class AnyTransliterator; // for filteredTransliterate()
 638
 639 private:
 640
 641     /**
 642      * Top-level transliteration method, handling filtering, incremental and
 643      * non-incremental transliteration, and rollback.  All transliteration
 644      * public API methods eventually call this method with a rollback argument
 645      * of TRUE.  Other entities may call this method but rollback should be
 646      * FALSE.
 647      *
 648      * <p>If this transliterator has a filter, break up the input text into runs
 649      * of unfiltered characters.  Pass each run to
 650      * <subclass>.handleTransliterate().
 651      *
 652      * <p>In incremental mode, if rollback is TRUE, perform a special
 653      * incremental procedure in which several passes are made over the input
 654      * text, adding one character at a time, and committing successful
 655      * transliterations as they occur.  Unsuccessful transliterations are rolled
 656      * back and retried with additional characters to give correct results.
 657      *
 658      * @param text the text to be transliterated
 659      * @param index the position indices
 660      * @param incremental if TRUE, then assume more characters may be inserted
 661      * at index.limit, and postpone processing to accomodate future incoming
 662      * characters
 663      * @param rollback if TRUE and if incremental is TRUE, then perform special
 664      * incremental processing, as described above, and undo partial
 665      * transliterations where necessary.  If incremental is FALSE then this
 666      * parameter is ignored.
 667      */
 668     virtual void filteredTransliterate(Replaceable& text,
 669                                        UTransPosition& index,
 670                                        UBool incremental,
 671                                        UBool rollback) const;
 672
 673 public:
 674
 675     /**
 676      * Returns the length of the longest context required by this transliterator.
 677      * This is <em>preceding</em> context.  The default implementation supplied
 678      * by <code>Transliterator</code> returns zero; subclasses
 679      * that use preceding context should override this method to return the
 680      * correct value.  For example, if a transliterator translates "ddd" (where
 681      * d is any digit) to "555" when preceded by "(ddd)", then the preceding
 682      * context length is 5, the length of "(ddd)".
 683      *
 684      * @return The maximum number of preceding context characters this
 685      * transliterator needs to examine
 686      * @stable ICU 2.0
 687      */
 688     int32_t getMaximumContextLength(void) const;
 689
 690 protected:
 691
 692     /**
 693      * Method for subclasses to use to set the maximum context length.
 694      * @param maxContextLength the new value to be set.
 695      * @see #getMaximumContextLength
 696      * @draft ICU 2.4
 697      */
 698     void setMaximumContextLength(int32_t maxContextLength);
 699
 700 public:
 701
 702     /**
 703      * Returns a programmatic identifier for this transliterator.
 704      * If this identifier is passed to <code>createInstance()</code>, it
 705      * will return this object, if it has been registered.
 706      * @return a programmatic identifier for this transliterator.
 707      * @see #registerInstance
 708      * @see #registerClass
 709      * @see #getAvailableIDs
 710      * @stable ICU 2.0
 711      */
 712     virtual const UnicodeString& getID(void) const;
 713
 714     /**
 715      * Returns a name for this transliterator that is appropriate for
 716      * display to the user in the default locale.  See {@link
 717      * #getDisplayName()} for details.
 718      * @param ID     the string identifier for this transliterator
 719      * @param result Output param to receive the display name
 720      * @return       A reference to 'result'.
 721      * @stable ICU 2.0
 722      */
 723     static UnicodeString& getDisplayName(const UnicodeString& ID,
 724                                          UnicodeString& result);
 725
 726     /**
 727      * Returns a name for this transliterator that is appropriate for
 728      * display to the user in the given locale.  This name is taken
 729      * from the locale resource data in the standard manner of the
 730      * <code>java.text</code> package.
 731      *
 732      * <p>If no localized names exist in the system resource bundles,
 733      * a name is synthesized using a localized
 734      * <code>MessageFormat</code> pattern from the resource data.  The
 735      * arguments to this pattern are an integer followed by one or two
 736      * strings.  The integer is the number of strings, either 1 or 2.
 737      * The strings are formed by splitting the ID for this
 738      * transliterator at the first '-'.  If there is no '-', then the
 739      * entire ID forms the only string.
 740      * @param ID       the string identifier for this transliterator
 741      * @param inLocale the Locale in which the display name should be
 742      *                 localized.
 743      * @param result   Output param to receive the display name
 744      * @return         A reference to 'result'.
 745      * @stable ICU 2.0
 746      */
 747     static UnicodeString& getDisplayName(const UnicodeString& ID,
 748                                          const Locale& inLocale,
 749                                          UnicodeString& result);
 750
 751     /**
 752      * Returns the filter used by this transliterator, or <tt>NULL</tt>
 753      * if this transliterator uses no filter.
 754      * @return the filter used by this transliterator, or <tt>NULL</tt>
 755      *         if this transliterator uses no filter.
 756      * @stable ICU 2.0
 757      */
 758     const UnicodeFilter* getFilter(void) const;
 759
 760     /**
 761      * Returns the filter used by this transliterator, or <tt>NULL</tt> if this
 762      * transliterator uses no filter.  The caller must eventually delete the
 763      * result.  After this call, this transliterator's filter is set to
 764      * <tt>NULL</tt>.
 765      * @return the filter used by this transliterator, or <tt>NULL</tt> if this
 766      *         transliterator uses no filter.
 767      * @draft ICU 2.4
 768      */
 769     UnicodeFilter* orphanFilter(void);
 770
 771     /**
 772      * Changes the filter used by this transliterator.  If the filter
 773      * is set to <tt>null</tt> then no filtering will occur.
 774      *
 775      * <p>Callers must take care if a transliterator is in use by
 776      * multiple threads.  The filter should not be changed by one
 777      * thread while another thread may be transliterating.
 778      * @param adoptedFilter the new filter to be adopted.
 779      * @stable ICU 2.0
 780      */
 781     void adoptFilter(UnicodeFilter* adoptedFilter);
 782
 783     /**
 784      * Returns this transliterator's inverse.  See the class
 785      * documentation for details.  This implementation simply inverts
 786      * the two entities in the ID and attempts to retrieve the
 787      * resulting transliterator.  That is, if <code>getID()</code>
 788      * returns "A-B", then this method will return the result of
 789      * <code>createInstance("B-A")</code>, or <code>null</code> if that
 790      * call fails.
 791      *
 792      * <p>Subclasses with knowledge of their inverse may wish to
 793      * override this method.
 794      *
 795      * @param status Output param to filled in with a success or an error.
 796      * @return a transliterator that is an inverse, not necessarily
 797      * exact, of this transliterator, or <code>null</code> if no such
 798      * transliterator is registered.
 799      * @see #registerInstance
 800      * @stable ICU 2.0
 801      */
 802     Transliterator* createInverse(UErrorCode& status) const;
 803
 804     /**
 805      * Returns a <code>Transliterator</code> object given its ID.
 806      * The ID must be either a system transliterator ID or a ID registered
 807      * using <code>registerInstance()</code>.
 808      *
 809      * @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code>
 810      * @param dir        either FORWARD or REVERSE.
 811      * @param parseError Struct to recieve information on position
 812      *                   of error if an error is encountered
 813      * @param status     Output param to filled in with a success or an error.
 814      * @return A <code>Transliterator</code> object with the given ID
 815      * @see #registerInstance
 816      * @see #getAvailableIDs
 817      * @see #getID
 818      * @stable ICU 2.0
 819      */
 820     static Transliterator* createInstance(const UnicodeString& ID,
 821                                           UTransDirection dir,
 822                                           UParseError& parseError,
 823                                           UErrorCode& status);
 824
 825     /**
 826      * Returns a <code>Transliterator</code> object given its ID.
 827      * The ID must be either a system transliterator ID or a ID registered
 828      * using <code>registerInstance()</code>.
 829      * @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code>
 830      * @param dir        either FORWARD or REVERSE.
 831      * @param status     Output param to filled in with a success or an error.
 832      * @return A <code>Transliterator</code> object with the given ID
 833      * @stable ICU 2.0
 834      */
 835     static Transliterator* createInstance(const UnicodeString& ID,
 836                                           UTransDirection dir,
 837                                           UErrorCode& status);
 838     /**
 839      * Returns a <code>Transliterator</code> object constructed from
 840      * the given rule string.  This will be a RuleBasedTransliterator,
 841      * if the rule string contains only rules, or a
 842      * CompoundTransliterator, if it contains ID blocks, or a
 843      * NullTransliterator, if it contains ID blocks which parse as
 844      * empty for the given direction.
 845      * @param ID            the id for the transliterator.
 846      * @param rules         rules, separated by ';'
 847      * @param dir           either FORWARD or REVERSE.
 848      * @param parseError    Struct to recieve information on position
 849      *                      of error if an error is encountered
 850      * @param status        Output param set to success/failure code.
 851      * @stable ICU 2.0
 852      */
 853     static Transliterator* createFromRules(const UnicodeString& ID,
 854                                            const UnicodeString& rules,
 855                                            UTransDirection dir,
 856                                            UParseError& parseError,
 857                                            UErrorCode& status);
 858
 859     /**
 860      * Create a rule string that can be passed to createFromRules()
 861      * to recreate this transliterator.
 862      * @param result the string to receive the rules.  Previous
 863      * contents will be deleted.
 864      * @param escapeUnprintable if TRUE then convert unprintable
 865      * character to their hex escape representations, \uxxxx or
 866      * \Uxxxxxxxx.  Unprintable characters are those other than
 867      * U+000A, U+0020..U+007E.
 868      * @stable ICU 2.0
 869      */
 870     virtual UnicodeString& toRules(UnicodeString& result,
 871                                    UBool escapeUnprintable) const;
 872
 873     /**
 874      * Returns the set of all characters that may be modified in the
 875      * input text by this Transliterator.  This incorporates this
 876      * object's current filter; if the filter is changed, the return
 877      * value of this function will change.  The default implementation
 878      * returns an empty set.  Some subclasses may override {@link
 879      * #handleGetSourceSet()} to return a more precise result.  The
 880      * return result is approximate in any case and is intended for
 881      * use by tests, tools, or utilities.
 882      * @param result receives result set; previous contents lost
 883      * @return a reference to result
 884      * @see #getTargetSet
 885      * @see #handleGetSourceSet
 886      * @draft ICU 2.4
 887      */
 888     UnicodeSet& getSourceSet(UnicodeSet& result) const;
 889
 890     /**
 891      * Framework method that returns the set of all characters that
 892      * may be modified in the input text by this Transliterator,
 893      * ignoring the effect of this object's filter.  The base class
 894      * implementation returns the empty set.  Subclasses that wish to
 895      * implement this should override this method.
 896      * @return the set of characters that this transliterator may
 897      * modify.  The set may be modified, so subclasses should return a
 898      * newly-created object.
 899      * @param result receives result set; previous contents lost
 900      * @see #getSourceSet
 901      * @see #getTargetSet
 902      * @draft ICU 2.4
 903      */
 904     virtual void handleGetSourceSet(UnicodeSet& result) const;
 905
 906     /**
 907      * Returns the set of all characters that may be generated as
 908      * replacement text by this transliterator.  The default
 909      * implementation returns the empty set.  Some subclasses may
 910      * override this method to return a more precise result.  The
 911      * return result is approximate in any case and is intended for
 912      * use by tests, tools, or utilities requiring such
 913      * meta-information.
 914      * @param result receives result set; previous contents lost
 915      * @return a reference to result
 916      * @see #getTargetSet
 917      * @draft ICU 2.4
 918      */
 919     virtual UnicodeSet& getTargetSet(UnicodeSet& result) const;
 920
 921 public:
 922
 923     /**
 924      * Registers a factory function that creates transliterators of
 925      * a given ID.
 926      * @param id the ID being registered
 927      * @param factory a function pointer that will be copied and
 928      * called later when the given ID is passed to createInstance()
 929      * @param context a context pointer that will be stored and
 930      * later passed to the factory function when an ID matching
 931      * the registration ID is being instantiated with this factory.
 932      * @stable ICU 2.0
 933      */
 934     static void registerFactory(const UnicodeString& id,
 935                                 Factory factory,
 936                                 Token context);
 937
 938     /**
 939      * Registers a instance <tt>obj</tt> of a subclass of
 940      * <code>Transliterator</code> with the system.  When
 941      * <tt>createInstance()</tt> is called with an ID string that is
 942      * equal to <tt>obj->getID()</tt>, then <tt>obj->clone()</tt> is
 943      * returned.
 944      *
 945      * After this call the Transliterator class owns the adoptedObj
 946      * and will delete it.
 947      *
 948      * @param adoptedObj an instance of subclass of
 949      * <code>Transliterator</code> that defines <tt>clone()</tt>
 950      * @see #createInstance
 951      * @see #registerClass
 952      * @see #unregister
 953      * @stable ICU 2.0
 954      */
 955     static void registerInstance(Transliterator* adoptedObj);
 956
 957 protected:
 958
 959     /**
 960      * @internal
 961      * @param id the ID being registered
 962      * @param factory a function pointer that will be copied and
 963      * called later when the given ID is passed to createInstance()
 964      * @param context a context pointer that will be stored and
 965      * later passed to the factory function when an ID matching
 966      * the registration ID is being instantiated with this factory.
 967      */
 968     static void _registerFactory(const UnicodeString& id,
 969                                  Factory factory,
 970                                  Token context);
 971
 972     /**
 973      * @internal
 974      */
 975     static void _registerInstance(Transliterator* adoptedObj);
 976
 977     /**
 978      * Register two targets as being inverses of one another.  For
 979      * example, calling registerSpecialInverse("NFC", "NFD", true) causes
 980      * Transliterator to form the following inverse relationships:
 981      *
 982      * <pre>NFC => NFD
 983      * Any-NFC => Any-NFD
 984      * NFD => NFC
 985      * Any-NFD => Any-NFC</pre>
 986      *
 987      * (Without the special inverse registration, the inverse of NFC
 988      * would be NFC-Any.)  Note that NFD is shorthand for Any-NFD, but
 989      * that the presence or absence of "Any-" is preserved.
 990      *
 991      * <p>The relationship is symmetrical; registering (a, b) is
 992      * equivalent to registering (b, a).
 993      *
 994      * <p>The relevant IDs must still be registered separately as
 995      * factories or classes.
 996      *
 997      * <p>Only the targets are specified.  Special inverses always
 998      * have the form Any-Target1 <=> Any-Target2.  The target should
 999      * have canonical casing (the casing desired to be produced when
1000      * an inverse is formed) and should contain no whitespace or other
1001      * extraneous characters.
1002      *
1003      * @param target the target against which to register the inverse
1004      * @param inverseTarget the inverse of target, that is
1005      * Any-target.getInverse() => Any-inverseTarget
1006      * @param bidirectional if true, register the reverse relation
1007      * as well, that is, Any-inverseTarget.getInverse() => Any-target
1008      * @internal
1009      */
1010     static void _registerSpecialInverse(const UnicodeString& target,
1011                                         const UnicodeString& inverseTarget,
1012                                         UBool bidirectional);
1013
1014 public:
1015
1016     /**
1017      * Unregisters a transliterator or class.  This may be either
1018      * a system transliterator or a user transliterator or class.
1019      * Any attempt to construct an unregistered transliterator based
1020      * on its ID will fail.
1021      *
1022      * @param ID the ID of the transliterator or class
1023      * @return the <code>Object</code> that was registered with
1024      * <code>ID</code>, or <code>null</code> if none was
1025      * @see #registerInstance
1026      * @see #registerClass
1027      * @stable ICU 2.0
1028      */
1029     static void unregister(const UnicodeString& ID);
1030
1031 public:
1032
1033     /**
1034      * Return the number of IDs currently registered with the system.
1035      * To retrieve the actual IDs, call getAvailableID(i) with
1036      * i from 0 to countAvailableIDs() - 1.
1037      * @return the number of IDs currently registered with the system.
1038      * @stable ICU 2.0
1039      */
1040     static int32_t countAvailableIDs(void);
1041
1042     /**
1043      * Return the index-th available ID.  index must be between 0
1044      * and countAvailableIDs() - 1, inclusive.  If index is out of
1045      * range, the result of getAvailableID(0) is returned.
1046      * @param index the given ID index.
1047      * @return      the index-th available ID.  index must be between 0
1048      *              and countAvailableIDs() - 1, inclusive.  If index is out of
1049      *              range, the result of getAvailableID(0) is returned.
1050      * @stable ICU 2.0
1051      */
1052     static const UnicodeString& getAvailableID(int32_t index);
1053
1054     /**
1055      * Return the number of registered source specifiers.
1056      * @return the number of registered source specifiers.
1057      * @stable ICU 2.0
1058      */
1059     static int32_t countAvailableSources(void);
1060
1061     /**
1062      * Return a registered source specifier.
1063      * @param index which specifier to return, from 0 to n-1, where
1064      * n = countAvailableSources()
1065      * @param result fill-in paramter to receive the source specifier.
1066      * If index is out of range, result will be empty.
1067      * @return reference to result
1068      * @stable ICU 2.0
1069      */
1070     static UnicodeString& getAvailableSource(int32_t index,
1071                                              UnicodeString& result);
1072
1073     /**
1074      * Return the number of registered target specifiers for a given
1075      * source specifier.
1076      * @param source the given source specifier.
1077      * @return the number of registered target specifiers for a given
1078      *         source specifier.
1079      * @stable ICU 2.0
1080      */
1081     static int32_t countAvailableTargets(const UnicodeString& source);
1082
1083     /**
1084      * Return a registered target specifier for a given source.
1085      * @param index which specifier to return, from 0 to n-1, where
1086      * n = countAvailableTargets(source)
1087      * @param source the source specifier
1088      * @param result fill-in paramter to receive the target specifier.
1089      * If source is invalid or if index is out of range, result will
1090      * be empty.
1091      * @return reference to result
1092      * @stable ICU 2.0
1093      */
1094     static UnicodeString& getAvailableTarget(int32_t index,
1095                                              const UnicodeString& source,
1096                                              UnicodeString& result);
1097
1098     /**
1099      * Return the number of registered variant specifiers for a given
1100      * source-target pair.
1101      * @param source    the source specifiers.
1102      * @param target    the target specifiers.
1103      * @stable ICU 2.0
1104      */
1105     static int32_t countAvailableVariants(const UnicodeString& source,
1106                                           const UnicodeString& target);
1107
1108     /**
1109      * Return a registered variant specifier for a given source-target
1110      * pair.
1111      * @param index which specifier to return, from 0 to n-1, where
1112      * n = countAvailableVariants(source, target)
1113      * @param source the source specifier
1114      * @param target the target specifier
1115      * @param result fill-in paramter to receive the variant
1116      * specifier.  If source is invalid or if target is invalid or if
1117      * index is out of range, result will be empty.
1118      * @return reference to result
1119      * @stable ICU 2.0
1120      */
1121     static UnicodeString& getAvailableVariant(int32_t index,
1122                                               const UnicodeString& source,
1123                                               const UnicodeString& target,
1124                                               UnicodeString& result);
1125
1126 protected:
1127
1128     /**
1129      * Non-mutexed internal method
1130      * @internal
1131      */
1132     static int32_t _countAvailableSources(void);
1133
1134     /**
1135      * Non-mutexed internal method
1136      * @internal
1137      */
1138     static UnicodeString& _getAvailableSource(int32_t index,
1139                                               UnicodeString& result);
1140
1141     /**
1142      * Non-mutexed internal method
1143      * @internal
1144      */
1145     static int32_t _countAvailableTargets(const UnicodeString& source);
1146
1147     /**
1148      * Non-mutexed internal method
1149      * @internal
1150      */
1151     static UnicodeString& _getAvailableTarget(int32_t index,
1152                                               const UnicodeString& source,
1153                                               UnicodeString& result);
1154
1155     /**
1156      * Non-mutexed internal method
1157      * @internal
1158      */
1159     static int32_t _countAvailableVariants(const UnicodeString& source,
1160                                            const UnicodeString& target);
1161
1162     /**
1163      * Non-mutexed internal method
1164      * @internal
1165      */
1166     static UnicodeString& _getAvailableVariant(int32_t index,
1167                                                const UnicodeString& source,
1168                                                const UnicodeString& target,
1169                                                UnicodeString& result);
1170
1171 protected:
1172
1173     /**
1174      * Set the ID of this transliterators.  Subclasses shouldn't do
1175      * this, unless the underlying script behavior has changed.
1176      * @param id the new id t to be set.
1177      * @draft ICU 2.4
1178      */
1179     void setID(const UnicodeString& id);
1180
1181 public:
1182
1183     /**
1184      * Return the class ID for this class.  This is useful only for
1185      * comparing to a return value from getDynamicClassID().  For example:
1186      * <pre>
1187      * .      Base* polymorphic_pointer = createPolymorphicObject();
1188      * .      if (polymorphic_pointer->getDynamicClassID() ==
1189      * .          Derived::getStaticClassID()) ...
1190      * </pre>
1191      * @return          The class ID for all objects of this class.
1192      * @stable ICU 2.0
1193      */
1194     static inline UClassID getStaticClassID(void);
1195
1196     /**
1197      * Returns a unique class ID <b>polymorphically</b>.  This method
1198      * is to implement a simple version of RTTI, since not all C++
1199      * compilers support genuine RTTI.  Polymorphic operator==() and
1200      * clone() methods call this method.
1201      *
1202      * <p>Concrete subclasses of Transliterator that wish clients to
1203      * be able to identify them should implement getDynamicClassID()
1204      * and also a static method and data member:
1205      *
1206      * <pre>
1207      * static UClassID getStaticClassID() { return (UClassID)&fgClassID; }
1208      * static char fgClassID;
1209      * </pre>
1210      *
1211      * Subclasses that do not implement this method will have a
1212      * dynamic class ID of Transliterator::getStatisClassID().
1213      *
1214      * @return The class ID for this object. All objects of a given
1215      * class have the same class ID.  Objects of other classes have
1216      * different class IDs.
1217      * @stable ICU 2.0
1218      */
1219     virtual UClassID getDynamicClassID(void) const = 0;
1220
1221 private:
1222
1223     /**
1224      * Class identifier for subclasses of Transliterator that do not
1225      * define their class (anonymous subclasses).
1226      */
1227     static const char fgClassID;
1228
1229 private:
1230     static UBool initializeRegistry(void);
1231
1232 };
1233
1234 inline UClassID
1235 Transliterator::getStaticClassID(void)
1236 { return (UClassID)&fgClassID; }
1237
1238 inline int32_t Transliterator::getMaximumContextLength(void) const {
1239     return maximumContextLength;
1240 }
1241
1242 inline void Transliterator::setID(const UnicodeString& id) {
1243     ID = id;
1244 }
1245
1246 inline Transliterator::Token Transliterator::integerToken(int32_t i) {
1247     Token t;
1248     t.integer = i;
1249     return t;
1250 }
1251
1252 inline Transliterator::Token Transliterator::pointerToken(void* p) {
1253     Token t;
1254     t.pointer = p;
1255     return t;
1256 }
1257
1258 U_NAMESPACE_END
1259
1260 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
1261
1262 #endif